|
--- |
|
{} |
|
--- |
|
``` |
|
pre_text = "The following is an interaction between a user and an AI assistant that is related to the above text." |
|
def ds_map_fn(row): |
|
input = f"[[[Title]]] {row['title'].strip()}\n[[[Content]]] {row['context'].strip()}\n\n" + pre_text + "\n\n[[[User]]] " |
|
output = f"{row['question'].strip()}\n[[[Assistant]]] {row['answer'].strip()}" |
|
|
|
input = tokenizer.encode(input, add_special_tokens=False) |
|
output = tokenizer.encode(output, add_special_tokens=False) |
|
|
|
input_ids = input + output + [tokenizer.eos_token_id] |
|
labels = [-100]*len(input) + output + [tokenizer.eos_token_id] |
|
|
|
return {'input_ids': input_ids, 'labels': labels} |
|
ds = ds.map(ds_map_fn, remove_columns=ds.column_names) |
|
``` |