|
import os |
|
import json |
|
import collections |
|
|
|
def read_qs(): |
|
qs = [] |
|
directory = "./dialogues_set" |
|
filenames = [ |
|
'dialogues_film.json', |
|
'dialogues_jindong.json', |
|
'dialogues_music.json', |
|
'dialogues_natural.json', |
|
'dialogues_taobao.json', |
|
'dialogues_travel_kd.json' |
|
] |
|
for filename in filenames: |
|
with open(f"{directory}/{filename}", "r", encoding="utf-8") as f: |
|
for idx,line in enumerate(f): |
|
idx2query = json.loads(line) |
|
query = idx2query[str(idx)] |
|
qs.append(query) |
|
print(f"read {len(qs)} queries from files") |
|
return qs |
|
|
|
def read_qas(): |
|
qas = [] |
|
directory = "./dialogues_set" |
|
|
|
for filename in os.listdir(directory): |
|
if filename.endswith(".json") and "qas" in filename: |
|
with open(f"{directory}/{filename}", "r", encoding="utf-8") as f: |
|
for qa in json.loads(f.read()): |
|
qas.append(qa) |
|
print(f"read {len(qas)} query-answers from files") |
|
return qas |
|
|
|
def merge(qs, qas): |
|
q_to_as = collections.defaultdict(lambda:[]) |
|
for qa in qas: |
|
q_to_as[qa["q"]].append(qa["a"]) |
|
|
|
qas = [] |
|
for q in qs: |
|
if len(q_to_as[q])==0: |
|
continue |
|
a = q_to_as[q].pop() |
|
qas.append({"q":q, "a":a}) |
|
print(f"merge {len(qas)} query-answers from files") |
|
return qas |
|
|
|
|
|
if __name__ == "__main__": |
|
qs = read_qs() |
|
qas = read_qas() |
|
qas = merge(qs, qas) |
|
with open("./dialogues_set/qas.json", "w", encoding="utf-8") as f: |
|
f.write(json.dumps(qas, ensure_ascii=False, indent=2)) |