import json | |
def transform_data(data): | |
# Create a new dictionary with the transformed data | |
final_data = [] | |
transformed_0 = {} | |
# Transform user_prompt to user and append the required text | |
user_text = data["user_prompt"] + " Generate the next story segment and choices." | |
transformed_0["content"] = user_text | |
transformed_0["role"] = "user" | |
# Transform answer to assistant | |
try: | |
transformed_1 = {} | |
transformed_1["content"] = "choices: " + " | ".join(data["answer"]) | |
transformed_1["role"] = "assistant" | |
final_data.append(transformed_0) | |
final_data.append(transformed_1) | |
except: | |
return None | |
return final_data | |
# Example usage | |
input_data = [json.loads(line) for line in open('synthetic_data.jsonl')] | |
transformed_data = [transform_data(item) for item in input_data] | |
print(json.dumps(transformed_data, indent=2)) | |
# Filter out None values and write to JSONL file | |
with open('transformed_data.jsonl', 'w') as f: | |
for item in filter(None, transformed_data): | |
f.write(json.dumps(item) + '\n') |