nanoalpaca-3b / convert-dataset.py
Sovenok-Hacker's picture
Add dataset converter and already converted dataset
c39278f
import json
with open('databricks-dolly-15k.jsonl') as file:
in_data = [json.loads(t) for t in file.read().splitlines()]
with open('data.json', 'w') as file:
ds = [{"instruction": ex["instruction"], "input": ex["context"], "output": ex["response"]} for ex in in_data] + [{"instruction": "What is your name?", "input": "", "output": "My name is LibreAlpaca."}]
json.dump(ds, file)