Update app.py
Browse files
app.py
CHANGED
@@ -4,6 +4,7 @@ from transformers import pipeline, AutoModelForTokenClassification, AutoTokenize
|
|
4 |
|
5 |
def get_result_text_es_pt (list_entity, text, lang):
|
6 |
result_words = []
|
|
|
7 |
if lang == "es":
|
8 |
punc_tags = ['¿', '?', '¡', '!', ',', '.', ':']
|
9 |
else:
|
@@ -22,11 +23,17 @@ def get_result_text_es_pt (list_entity, text, lang):
|
|
22 |
# check subwords
|
23 |
if word[0] == "#":
|
24 |
subword = True
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
if tag == "l":
|
31 |
word = word
|
32 |
elif tag == "u":
|
@@ -59,6 +66,7 @@ def get_result_text_ca (list_entity, text):
|
|
59 |
end = entity["end"]
|
60 |
tag = entity["entity"]
|
61 |
word = entity["word"]
|
|
|
62 |
|
63 |
# check punctuation
|
64 |
punc_in = next((p for p in punc_tags if p in tag), "")
|
@@ -67,11 +75,15 @@ def get_result_text_ca (list_entity, text):
|
|
67 |
# check subwords
|
68 |
if word[0] != "Ġ":
|
69 |
subword = True
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
|
|
|
|
|
|
|
|
75 |
word = text[start:end]
|
76 |
|
77 |
if tag == "l":
|
|
|
4 |
|
5 |
def get_result_text_es_pt (list_entity, text, lang):
|
6 |
result_words = []
|
7 |
+
tmp_word = ""
|
8 |
if lang == "es":
|
9 |
punc_tags = ['¿', '?', '¡', '!', ',', '.', ':']
|
10 |
else:
|
|
|
23 |
# check subwords
|
24 |
if word[0] == "#":
|
25 |
subword = True
|
26 |
+
if tmp_word == "":
|
27 |
+
p_s = list_entity[idx-1]["start"]
|
28 |
+
p_e = list_entity[idx-1]["end"]
|
29 |
+
tmp_word = text[p_s:p_e] + text[start:end]
|
30 |
+
else:
|
31 |
+
tmp_word = tmp_word + text[start:end]
|
32 |
+
word = tmp_word
|
33 |
+
else:
|
34 |
+
tmp_word = ""
|
35 |
+
word = text[start:end]
|
36 |
+
|
37 |
if tag == "l":
|
38 |
word = word
|
39 |
elif tag == "u":
|
|
|
66 |
end = entity["end"]
|
67 |
tag = entity["entity"]
|
68 |
word = entity["word"]
|
69 |
+
tmp_word = ""
|
70 |
|
71 |
# check punctuation
|
72 |
punc_in = next((p for p in punc_tags if p in tag), "")
|
|
|
75 |
# check subwords
|
76 |
if word[0] != "Ġ":
|
77 |
subword = True
|
78 |
+
if tmp_word == "":
|
79 |
+
p_s = list_entity[idx-1]["start"]
|
80 |
+
p_e = list_entity[idx-1]["end"]
|
81 |
+
tmp_word = text[p_s:p_e] + text[start:end]
|
82 |
+
else:
|
83 |
+
tmp_word = tmp_word + text[start:end]
|
84 |
+
word = tmp_word
|
85 |
+
else:
|
86 |
+
tmp_word = ""
|
87 |
word = text[start:end]
|
88 |
|
89 |
if tag == "l":
|