jcg00v commited on
Commit
c627b59
1 Parent(s): 389d4d9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -10
app.py CHANGED
@@ -4,6 +4,7 @@ from transformers import pipeline, AutoModelForTokenClassification, AutoTokenize
4
 
5
  def get_result_text_es_pt (list_entity, text, lang):
6
  result_words = []
 
7
  if lang == "es":
8
  punc_tags = ['¿', '?', '¡', '!', ',', '.', ':']
9
  else:
@@ -22,11 +23,17 @@ def get_result_text_es_pt (list_entity, text, lang):
22
  # check subwords
23
  if word[0] == "#":
24
  subword = True
25
- p_s = list_entity[idx-1]["start"]
26
- p_e = list_entity[idx-1]["end"]
27
- # word = result_words[-1].replace(punc_in, "") + text[start:end]
28
- word = text[p_s:p_e] + text[start:end]
29
-
 
 
 
 
 
 
30
  if tag == "l":
31
  word = word
32
  elif tag == "u":
@@ -59,6 +66,7 @@ def get_result_text_ca (list_entity, text):
59
  end = entity["end"]
60
  tag = entity["entity"]
61
  word = entity["word"]
 
62
 
63
  # check punctuation
64
  punc_in = next((p for p in punc_tags if p in tag), "")
@@ -67,11 +75,15 @@ def get_result_text_ca (list_entity, text):
67
  # check subwords
68
  if word[0] != "Ġ":
69
  subword = True
70
- p_s = list_entity[idx-1]["start"]
71
- p_e = list_entity[idx-1]["end"]
72
- # word = result_words[-1].replace(punc_in, "") + text[start:end]
73
- word = text[p_s:p_e] + text[start:end]
74
- else:
 
 
 
 
75
  word = text[start:end]
76
 
77
  if tag == "l":
 
4
 
5
  def get_result_text_es_pt (list_entity, text, lang):
6
  result_words = []
7
+ tmp_word = ""
8
  if lang == "es":
9
  punc_tags = ['¿', '?', '¡', '!', ',', '.', ':']
10
  else:
 
23
  # check subwords
24
  if word[0] == "#":
25
  subword = True
26
+ if tmp_word == "":
27
+ p_s = list_entity[idx-1]["start"]
28
+ p_e = list_entity[idx-1]["end"]
29
+ tmp_word = text[p_s:p_e] + text[start:end]
30
+ else:
31
+ tmp_word = tmp_word + text[start:end]
32
+ word = tmp_word
33
+ else:
34
+ tmp_word = ""
35
+ word = text[start:end]
36
+
37
  if tag == "l":
38
  word = word
39
  elif tag == "u":
 
66
  end = entity["end"]
67
  tag = entity["entity"]
68
  word = entity["word"]
69
+ tmp_word = ""
70
 
71
  # check punctuation
72
  punc_in = next((p for p in punc_tags if p in tag), "")
 
75
  # check subwords
76
  if word[0] != "Ġ":
77
  subword = True
78
+ if tmp_word == "":
79
+ p_s = list_entity[idx-1]["start"]
80
+ p_e = list_entity[idx-1]["end"]
81
+ tmp_word = text[p_s:p_e] + text[start:end]
82
+ else:
83
+ tmp_word = tmp_word + text[start:end]
84
+ word = tmp_word
85
+ else:
86
+ tmp_word = ""
87
  word = text[start:end]
88
 
89
  if tag == "l":