Spaces:

VOCALINLP
/

punctuation_and_capitalization_restoration_sanivert

Runtime error

jcg00v commited on Mar 2

Commit

c627b59

•

1 Parent(s): 389d4d9

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -4,6 +4,7 @@ from transformers import pipeline, AutoModelForTokenClassification, AutoTokenize
 def get_result_text_es_pt (list_entity, text, lang):
     result_words = []
     if lang == "es":
         punc_tags = ['¿', '?', '¡', '!', ',', '.', ':']
     else:
@@ -22,11 +23,17 @@ def get_result_text_es_pt (list_entity, text, lang):
         # check subwords
         if word[0] == "#":
             subword = True
-            p_s = list_entity[idx-1]["start"]
-            p_e = list_entity[idx-1]["end"]
-            # word = result_words[-1].replace(punc_in, "") + text[start:end]
-            word = text[p_s:p_e] + text[start:end]
         if tag == "l":
             word = word
         elif tag == "u":
@@ -59,6 +66,7 @@ def get_result_text_ca (list_entity, text):
         end = entity["end"]
         tag = entity["entity"]
         word = entity["word"]
         # check punctuation
         punc_in = next((p for p in punc_tags if p in tag), "")
@@ -67,11 +75,15 @@ def get_result_text_ca (list_entity, text):
         # check subwords
         if word[0] != "Ġ":
             subword = True
-            p_s = list_entity[idx-1]["start"]
-            p_e = list_entity[idx-1]["end"]
-            # word = result_words[-1].replace(punc_in, "") + text[start:end]
-            word = text[p_s:p_e] + text[start:end]
-        else:
             word = text[start:end]
         if tag == "l":

 def get_result_text_es_pt (list_entity, text, lang):
     result_words = []
+    tmp_word = ""
     if lang == "es":
         punc_tags = ['¿', '?', '¡', '!', ',', '.', ':']
     else:
         # check subwords
         if word[0] == "#":
             subword = True
+            if tmp_word == "":
+                p_s = list_entity[idx-1]["start"]
+                p_e = list_entity[idx-1]["end"]
+                tmp_word = text[p_s:p_e] + text[start:end]
+            else:
+                tmp_word = tmp_word + text[start:end]
+            word = tmp_word
+        else:
+            tmp_word = ""
+            word = text[start:end]
         if tag == "l":
             word = word
         elif tag == "u":
         end = entity["end"]
         tag = entity["entity"]
         word = entity["word"]
+        tmp_word = ""
         # check punctuation
         punc_in = next((p for p in punc_tags if p in tag), "")
         # check subwords
         if word[0] != "Ġ":
             subword = True
+            if tmp_word == "":
+                p_s = list_entity[idx-1]["start"]
+                p_e = list_entity[idx-1]["end"]
+                tmp_word = text[p_s:p_e] + text[start:end]
+            else:
+                tmp_word = tmp_word + text[start:end]
+            word = tmp_word
+        else:
+            tmp_word = ""
             word = text[start:end]
         if tag == "l":