emanuelaboros commited on
Commit
2447e1e
Β·
verified Β·
1 Parent(s): 0fa92dc

Update generic_ner.py

Browse files
Files changed (1) hide show
  1. generic_ner.py +9 -4
generic_ner.py CHANGED
@@ -9,6 +9,9 @@ import torch.nn.functional as F
9
  import re
10
 
11
 
 
 
 
12
  def tokenize(text):
13
  # print(text)
14
  for punctuation in string.punctuation:
@@ -109,14 +112,18 @@ def get_entities(tokens, tags, confidences, text):
109
  "score": np.average(confidences[idx : idx + len(subtree)]),
110
  "index": (idx, idx + len(subtree)),
111
  "word": original_string,
112
- "start": entity_start_position,
113
  "end": entity_end_position,
114
- "text": text,
115
  }
116
  )
117
  # assert (
118
  # text[entity_start_position:entity_end_position] == original_string
119
  # )
 
 
 
 
 
120
  idx += len(subtree)
121
 
122
  # Update the current character position
@@ -148,8 +155,6 @@ def realign(
148
  return words_list, preds_list, confidence_list
149
 
150
 
151
- import re, string
152
-
153
  # List of additional "strange" punctuation marks
154
  additional_punctuation = "β€˜β€™β€œβ€β€žΒ«Β»β€’β€“β€”β€•β€£β—¦β€¦Β§ΒΆβ€ β€‘β€°β€²β€³γ€ˆγ€‰"
155
 
 
9
  import re
10
 
11
 
12
+ import re, string
13
+
14
+
15
  def tokenize(text):
16
  # print(text)
17
  for punctuation in string.punctuation:
 
112
  "score": np.average(confidences[idx : idx + len(subtree)]),
113
  "index": (idx, idx + len(subtree)),
114
  "word": original_string,
115
+ "start": entity_start_position-1,
116
  "end": entity_end_position,
 
117
  }
118
  )
119
  # assert (
120
  # text[entity_start_position:entity_end_position] == original_string
121
  # )
122
+ print(
123
+ text[entity_start_position:entity_end_position],
124
+ "------",
125
+ original_string,
126
+ )
127
  idx += len(subtree)
128
 
129
  # Update the current character position
 
155
  return words_list, preds_list, confidence_list
156
 
157
 
 
 
158
  # List of additional "strange" punctuation marks
159
  additional_punctuation = "β€˜β€™β€œβ€β€žΒ«Β»β€’β€“β€”β€•β€£β—¦β€¦Β§ΒΆβ€ β€‘β€°β€²β€³γ€ˆγ€‰"
160