legal-entity-ner-transformers / source /services /ner /awscomprehend_2_ner_format.py
aimlnerd's picture
add
0806367
raw
history blame
1.01 kB
import json
import pandas as pd
with open(r"data/raw_data/annotations/Letter 0-1-ccf1b225-ann.json", "r") as file:
json_letter = json.load(file)
data_token = []
for block in json_letter['Blocks']:
if block["BlockType"] == 'WORD':
data_token.append({'blockid' : block['Id'], 'token' : block['Text']})
df_token = pd.DataFrame(data_token, columns = ['blockid', 'token'])
data_nertags = []
for block in json_letter['Entities']:
ner_tag = block['Type']
for subref in block['BlockReferences']:
counter = 0
for child in subref['ChildBlocks']:
if counter == 0:
data_nertags.append({'blockid': child['ChildBlockId'], 'ner_tag': f"B-{ner_tag}"})
counter = counter+1
else:
data_nertags.append({'blockid': child['ChildBlockId'], 'ner_tag': f"I-{ner_tag}"})
df_nertags = pd.DataFrame(data_nertags, columns = ['blockid', 'ner_tag'])
df = pd.merge(df_token, df_nertags, on='blockid', how='left')
print()