M96820
feat: finetune on synthetic data
1d27c1f unverified
raw
history blame
1.09 kB
import json
def transform_data(data):
# Create a new dictionary with the transformed data
final_data = []
transformed_0 = {}
# Transform user_prompt to user and append the required text
user_text = data["user_prompt"] + " Generate the next story segment and choices."
transformed_0["content"] = user_text
transformed_0["role"] = "user"
# Transform answer to assistant
try:
transformed_1 = {}
transformed_1["content"] = "choices: " + " | ".join(data["answer"])
transformed_1["role"] = "assistant"
final_data.append(transformed_0)
final_data.append(transformed_1)
except:
return None
return final_data
# Example usage
input_data = [json.loads(line) for line in open('synthetic_data.jsonl')]
transformed_data = [transform_data(item) for item in input_data]
print(json.dumps(transformed_data, indent=2))
# Filter out None values and write to JSONL file
with open('transformed_data.jsonl', 'w') as f:
for item in filter(None, transformed_data):
f.write(json.dumps(item) + '\n')