Spaces:
Runtime error
Runtime error
rename methods
Browse files
source/services/ner/awscomprehend_2_ner_format.py
CHANGED
@@ -14,7 +14,7 @@ class Comprehend2NERFormat:
|
|
14 |
return json_letter
|
15 |
|
16 |
@staticmethod
|
17 |
-
def
|
18 |
data_token = []
|
19 |
for block in jsondata['Blocks']:
|
20 |
if block["BlockType"] == 'WORD':
|
@@ -23,7 +23,7 @@ class Comprehend2NERFormat:
|
|
23 |
return df_token
|
24 |
|
25 |
@staticmethod
|
26 |
-
def
|
27 |
df_line = pd.DataFrame(columns = ['lineid', 'childid'])
|
28 |
for block in jsondata['Blocks']:
|
29 |
if block["BlockType"] == 'LINE':
|
@@ -33,7 +33,7 @@ class Comprehend2NERFormat:
|
|
33 |
return df_line
|
34 |
|
35 |
@staticmethod
|
36 |
-
def
|
37 |
data_nertags = []
|
38 |
for block in jsondata['Entities']:
|
39 |
ner_tag = block['Type']
|
@@ -50,7 +50,7 @@ class Comprehend2NERFormat:
|
|
50 |
return df_nertags
|
51 |
|
52 |
@staticmethod
|
53 |
-
def
|
54 |
df = pd.DataFrame()
|
55 |
df_insert = pd.DataFrame({'blockid' : 'newline', 'token' : '\n', 'ner_tag': 'O','lineid': 'newline'},index=[0])
|
56 |
for group in df_prev.groupby('lineid'):
|
@@ -62,16 +62,16 @@ class Comprehend2NERFormat:
|
|
62 |
|
63 |
def __call__(self):
|
64 |
json_letter = self.load_data()
|
65 |
-
df_token = self.
|
66 |
-
df_line = self.
|
67 |
-
df_nertags = self.
|
68 |
|
69 |
df1 = pd.merge(df_token, df_nertags, on='blockid', how='left')
|
70 |
df1['ner_tag'][df1['ner_tag'].isna()] = 'O'
|
71 |
df2 = pd.merge(df1, df_line, left_on='blockid', right_on='childid', how='left').drop(columns=['childid'])
|
72 |
df2['linewordrank'] = np.arange(df2.shape[0])
|
73 |
|
74 |
-
df3 = self.
|
75 |
return {"tokens": df3['token'].tolist(),
|
76 |
"ner_tags": df3['ner_tag'].tolist(),
|
77 |
"filename": self.letterfilepath.name
|
|
|
14 |
return json_letter
|
15 |
|
16 |
@staticmethod
|
17 |
+
def _get_tokens(jsondata):
|
18 |
data_token = []
|
19 |
for block in jsondata['Blocks']:
|
20 |
if block["BlockType"] == 'WORD':
|
|
|
23 |
return df_token
|
24 |
|
25 |
@staticmethod
|
26 |
+
def _get_line_child_ids(jsondata):
|
27 |
df_line = pd.DataFrame(columns = ['lineid', 'childid'])
|
28 |
for block in jsondata['Blocks']:
|
29 |
if block["BlockType"] == 'LINE':
|
|
|
33 |
return df_line
|
34 |
|
35 |
@staticmethod
|
36 |
+
def _get_ner_tags(jsondata):
|
37 |
data_nertags = []
|
38 |
for block in jsondata['Entities']:
|
39 |
ner_tag = block['Type']
|
|
|
50 |
return df_nertags
|
51 |
|
52 |
@staticmethod
|
53 |
+
def _insert_newline_char(df_prev):
|
54 |
df = pd.DataFrame()
|
55 |
df_insert = pd.DataFrame({'blockid' : 'newline', 'token' : '\n', 'ner_tag': 'O','lineid': 'newline'},index=[0])
|
56 |
for group in df_prev.groupby('lineid'):
|
|
|
62 |
|
63 |
def __call__(self):
|
64 |
json_letter = self.load_data()
|
65 |
+
df_token = self._get_tokens(jsondata=json_letter)
|
66 |
+
df_line = self._get_line_child_ids(jsondata=json_letter)
|
67 |
+
df_nertags = self._get_ner_tags(jsondata=json_letter)
|
68 |
|
69 |
df1 = pd.merge(df_token, df_nertags, on='blockid', how='left')
|
70 |
df1['ner_tag'][df1['ner_tag'].isna()] = 'O'
|
71 |
df2 = pd.merge(df1, df_line, left_on='blockid', right_on='childid', how='left').drop(columns=['childid'])
|
72 |
df2['linewordrank'] = np.arange(df2.shape[0])
|
73 |
|
74 |
+
df3 = self._insert_newline_char(df_prev=df2)
|
75 |
return {"tokens": df3['token'].tolist(),
|
76 |
"ner_tags": df3['ner_tag'].tolist(),
|
77 |
"filename": self.letterfilepath.name
|