import os import json import spacy TARGET_POS = [ "NOUN", "VERB", "ADJ", "ADV" ] def do_frameid(): nlp = spacy.load("it_core_news_md") with open("data/migration/corpus_titoli_all_raw.txt", encoding="utf-8") as f_in, \ open("output/migration/pos_based_targetid/corpus_titoli_all_raw.jsonl", "w", encoding="utf-8") as f_out: for line in f_in: doc = nlp(line.strip()) out = { "tokens": [t.text for t in doc], "predicates": [i for i, t in enumerate(doc) if t.pos_ in TARGET_POS] } f_out.write(json.dumps(out) + os.linesep) if __name__ == "__main__": do_frameid()