import os import json import collections def read_qs(): qs = [] directory = "./dialogues_set" filenames = [ 'dialogues_film.json', 'dialogues_jindong.json', 'dialogues_music.json', 'dialogues_natural.json', 'dialogues_taobao.json', 'dialogues_travel_kd.json' ] for filename in filenames: with open(f"{directory}/{filename}", "r", encoding="utf-8") as f: for idx,line in enumerate(f): idx2query = json.loads(line) query = idx2query[str(idx)] qs.append(query) print(f"read {len(qs)} queries from files") return qs def read_qas(): qas = [] directory = "./dialogues_set" for filename in os.listdir(directory): if filename.endswith(".json") and "qas" in filename: with open(f"{directory}/{filename}", "r", encoding="utf-8") as f: for qa in json.loads(f.read()): qas.append(qa) print(f"read {len(qas)} query-answers from files") return qas def merge(qs, qas): q_to_as = collections.defaultdict(lambda:[]) for qa in qas: q_to_as[qa["q"]].append(qa["a"]) qas = [] for q in qs: if len(q_to_as[q])==0: continue a = q_to_as[q].pop() qas.append({"q":q, "a":a}) print(f"merge {len(qas)} query-answers from files") return qas if __name__ == "__main__": qs = read_qs() qas = read_qas() qas = merge(qs, qas) with open("./dialogues_set/qas.json", "w", encoding="utf-8") as f: f.write(json.dumps(qas, ensure_ascii=False, indent=2))