File size: 1,009 Bytes
0806367
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import json
import pandas as pd
with open(r"data/raw_data/annotations/Letter 0-1-ccf1b225-ann.json", "r") as file:
    json_letter = json.load(file)



data_token = []
for block in json_letter['Blocks']:
    if block["BlockType"] == 'WORD':
        data_token.append({'blockid' : block['Id'], 'token' : block['Text']})

df_token = pd.DataFrame(data_token, columns = ['blockid', 'token'])

data_nertags = []
for block in json_letter['Entities']:
    ner_tag = block['Type']
    for subref in block['BlockReferences']:
        counter = 0
        for child in subref['ChildBlocks']:
            if counter == 0:
                data_nertags.append({'blockid': child['ChildBlockId'], 'ner_tag': f"B-{ner_tag}"})
                counter = counter+1
            else:
                data_nertags.append({'blockid': child['ChildBlockId'], 'ner_tag': f"I-{ner_tag}"})

df_nertags = pd.DataFrame(data_nertags, columns = ['blockid', 'ner_tag'])

df = pd.merge(df_token, df_nertags, on='blockid', how='left')

print()