from collections import Counter | |
import json | |
from tqdm import tqdm | |
if __name__ == "__main__": | |
counter = Counter() | |
with open("/media/data/EL/blink/train.alby-format.jsonl") as f_in: | |
for line in tqdm(f_in): | |
sample = json.loads(line) | |
for ss, se, label in sample["doc_annotations"]: | |
if label == "--NME--": | |
continue | |
counter.update([label]) | |
with open("frequency_blink.txt", "w") as f_out: | |
for k, v in counter.most_common(): | |
f_out.write(f"{k}\t{v}\n") | |