Skip to content

Commit

Permalink
runs
Browse files Browse the repository at this point in the history
  • Loading branch information
natolambert committed Jul 22, 2024
1 parent 3d8c916 commit c164735
Showing 1 changed file with 7 additions and 2 deletions.
9 changes: 7 additions & 2 deletions open_instruct/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ def instruction_output_to_messages(example):
example["messages"] = messages
return example


def query_answer_to_messages(example):
"""
Convert a query-answer pair to a list of messages.
Expand All @@ -78,6 +79,7 @@ def query_answer_to_messages(example):
example["messages"] = messages
return example


def query_response_to_messages(example):
"""
Convert a query-response pair to a list of messages.
Expand All @@ -89,6 +91,7 @@ def query_response_to_messages(example):
example["messages"] = messages
return example


def prompt_completion_to_messages(example):
"""
Convert a prompt-completion pair to a list of messages.
Expand Down Expand Up @@ -240,9 +243,11 @@ def get_datasets(
dataset = dataset.add_column("id", id_col)

# Remove redundant columns to avoid schema conflicts on load
dataset = dataset.remove_columns([col for col in dataset.column_names if col not in (columns_to_keep+["id"])])
dataset = dataset.remove_columns(
[col for col in dataset.column_names if col not in (columns_to_keep + ["id"])]
)

# add tag to the dataset corresponding to where it was sourced from, for
# add tag to the dataset corresponding to where it was sourced from, for
if "train" in split:
raw_train_datasets.append(dataset)
elif "test" in split:
Expand Down

0 comments on commit c164735

Please sign in to comment.