govsearch / scripts /convert-csv-to-json.py
Katsuya Oda
Initial commit
5285b72 unverified
raw
history blame
622 Bytes
#!/usr/bin/env python
import sys
import pandas as pd
df = pd.read_csv(sys.argv[1])
df = df[df["ID"].notna()]
assert isinstance(df, pd.DataFrame), "Narrowing down the type of df"
df["id"] = df["ID"].apply(lambda x: f"id:govsearch:qa::{x}")
df["fields"] = df.apply(
lambda row: {
"doc_id": row["ID"],
"category_major": row["ε€§εˆ†ι‘ž"],
"category_medium": row["δΈ­εˆ†ι‘ž"],
"category_minor": row["ε°εˆ†ι‘ž"],
"question": row["問い"],
"answer": row["ε›žη­”"],
},
axis=1,
)
print(df[["id", "fields"]].to_json(orient="records", force_ascii=False, lines=True))