Update generic_ner.py
Browse files- generic_ner.py +9 -4
generic_ner.py
CHANGED
@@ -9,6 +9,9 @@ import torch.nn.functional as F
|
|
9 |
import re
|
10 |
|
11 |
|
|
|
|
|
|
|
12 |
def tokenize(text):
|
13 |
# print(text)
|
14 |
for punctuation in string.punctuation:
|
@@ -109,14 +112,18 @@ def get_entities(tokens, tags, confidences, text):
|
|
109 |
"score": np.average(confidences[idx : idx + len(subtree)]),
|
110 |
"index": (idx, idx + len(subtree)),
|
111 |
"word": original_string,
|
112 |
-
"start": entity_start_position,
|
113 |
"end": entity_end_position,
|
114 |
-
"text": text,
|
115 |
}
|
116 |
)
|
117 |
# assert (
|
118 |
# text[entity_start_position:entity_end_position] == original_string
|
119 |
# )
|
|
|
|
|
|
|
|
|
|
|
120 |
idx += len(subtree)
|
121 |
|
122 |
# Update the current character position
|
@@ -148,8 +155,6 @@ def realign(
|
|
148 |
return words_list, preds_list, confidence_list
|
149 |
|
150 |
|
151 |
-
import re, string
|
152 |
-
|
153 |
# List of additional "strange" punctuation marks
|
154 |
additional_punctuation = "βββββ«»β’ββββ£β¦β¦Β§ΒΆβ β‘β°β²β³γγ"
|
155 |
|
|
|
9 |
import re
|
10 |
|
11 |
|
12 |
+
import re, string
|
13 |
+
|
14 |
+
|
15 |
def tokenize(text):
|
16 |
# print(text)
|
17 |
for punctuation in string.punctuation:
|
|
|
112 |
"score": np.average(confidences[idx : idx + len(subtree)]),
|
113 |
"index": (idx, idx + len(subtree)),
|
114 |
"word": original_string,
|
115 |
+
"start": entity_start_position-1,
|
116 |
"end": entity_end_position,
|
|
|
117 |
}
|
118 |
)
|
119 |
# assert (
|
120 |
# text[entity_start_position:entity_end_position] == original_string
|
121 |
# )
|
122 |
+
print(
|
123 |
+
text[entity_start_position:entity_end_position],
|
124 |
+
"------",
|
125 |
+
original_string,
|
126 |
+
)
|
127 |
idx += len(subtree)
|
128 |
|
129 |
# Update the current character position
|
|
|
155 |
return words_list, preds_list, confidence_list
|
156 |
|
157 |
|
|
|
|
|
158 |
# List of additional "strange" punctuation marks
|
159 |
additional_punctuation = "βββββ«»β’ββββ£β¦β¦Β§ΒΆβ β‘β°β²β³γγ"
|
160 |
|