aimlnerd commited on
Commit
f432830
1 Parent(s): a118223

rename methods

Browse files
source/services/ner/awscomprehend_2_ner_format.py CHANGED
@@ -14,7 +14,7 @@ class Comprehend2NERFormat:
14
  return json_letter
15
 
16
  @staticmethod
17
- def get_tokens(jsondata):
18
  data_token = []
19
  for block in jsondata['Blocks']:
20
  if block["BlockType"] == 'WORD':
@@ -23,7 +23,7 @@ class Comprehend2NERFormat:
23
  return df_token
24
 
25
  @staticmethod
26
- def get_line_child_ids(jsondata):
27
  df_line = pd.DataFrame(columns = ['lineid', 'childid'])
28
  for block in jsondata['Blocks']:
29
  if block["BlockType"] == 'LINE':
@@ -33,7 +33,7 @@ class Comprehend2NERFormat:
33
  return df_line
34
 
35
  @staticmethod
36
- def get_ner_tags(jsondata):
37
  data_nertags = []
38
  for block in jsondata['Entities']:
39
  ner_tag = block['Type']
@@ -50,7 +50,7 @@ class Comprehend2NERFormat:
50
  return df_nertags
51
 
52
  @staticmethod
53
- def insert_newline_char(df_prev):
54
  df = pd.DataFrame()
55
  df_insert = pd.DataFrame({'blockid' : 'newline', 'token' : '\n', 'ner_tag': 'O','lineid': 'newline'},index=[0])
56
  for group in df_prev.groupby('lineid'):
@@ -62,16 +62,16 @@ class Comprehend2NERFormat:
62
 
63
  def __call__(self):
64
  json_letter = self.load_data()
65
- df_token = self.get_tokens(jsondata=json_letter)
66
- df_line = self.get_line_child_ids(jsondata=json_letter)
67
- df_nertags = self.get_ner_tags(jsondata=json_letter)
68
 
69
  df1 = pd.merge(df_token, df_nertags, on='blockid', how='left')
70
  df1['ner_tag'][df1['ner_tag'].isna()] = 'O'
71
  df2 = pd.merge(df1, df_line, left_on='blockid', right_on='childid', how='left').drop(columns=['childid'])
72
  df2['linewordrank'] = np.arange(df2.shape[0])
73
 
74
- df3 = self.insert_newline_char(df_prev=df2)
75
  return {"tokens": df3['token'].tolist(),
76
  "ner_tags": df3['ner_tag'].tolist(),
77
  "filename": self.letterfilepath.name
 
14
  return json_letter
15
 
16
  @staticmethod
17
+ def _get_tokens(jsondata):
18
  data_token = []
19
  for block in jsondata['Blocks']:
20
  if block["BlockType"] == 'WORD':
 
23
  return df_token
24
 
25
  @staticmethod
26
+ def _get_line_child_ids(jsondata):
27
  df_line = pd.DataFrame(columns = ['lineid', 'childid'])
28
  for block in jsondata['Blocks']:
29
  if block["BlockType"] == 'LINE':
 
33
  return df_line
34
 
35
  @staticmethod
36
+ def _get_ner_tags(jsondata):
37
  data_nertags = []
38
  for block in jsondata['Entities']:
39
  ner_tag = block['Type']
 
50
  return df_nertags
51
 
52
  @staticmethod
53
+ def _insert_newline_char(df_prev):
54
  df = pd.DataFrame()
55
  df_insert = pd.DataFrame({'blockid' : 'newline', 'token' : '\n', 'ner_tag': 'O','lineid': 'newline'},index=[0])
56
  for group in df_prev.groupby('lineid'):
 
62
 
63
  def __call__(self):
64
  json_letter = self.load_data()
65
+ df_token = self._get_tokens(jsondata=json_letter)
66
+ df_line = self._get_line_child_ids(jsondata=json_letter)
67
+ df_nertags = self._get_ner_tags(jsondata=json_letter)
68
 
69
  df1 = pd.merge(df_token, df_nertags, on='blockid', how='left')
70
  df1['ner_tag'][df1['ner_tag'].isna()] = 'O'
71
  df2 = pd.merge(df1, df_line, left_on='blockid', right_on='childid', how='left').drop(columns=['childid'])
72
  df2['linewordrank'] = np.arange(df2.shape[0])
73
 
74
+ df3 = self._insert_newline_char(df_prev=df2)
75
  return {"tokens": df3['token'].tolist(),
76
  "ner_tags": df3['ner_tag'].tolist(),
77
  "filename": self.letterfilepath.name