paascorb commited on
Commit
3b2bf58
1 Parent(s): 2d452b4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -17
app.py CHANGED
@@ -26,6 +26,21 @@ else:
26
  dev = "cpu"
27
  device = torch.device(dev)
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  def request_pathname(files):
30
  if files is None:
31
  return [[]]
@@ -51,27 +66,13 @@ def traducir_parrafos(parrafos, tokenizer, model, tam_bloque=8, ):
51
  return parrafos_traducidos
52
 
53
  def traducir_es_en(texto):
54
- mname = "Helsinki-NLP/opus-mt-es-en"
55
- tokenizer = MarianTokenizer.from_pretrained(mname)
56
- model = MarianMTModel.from_pretrained(mname)
57
- model.to(device)
58
-
59
- lt = LineTokenizer()
60
- batch_size = 8
61
  parrafos = lt.tokenize(texto)
62
- par_tra = traducir_parrafos(parrafos, tokenizer, model)
63
  return "\n".join(par_tra)
64
 
65
  def traducir_en_es(texto):
66
- mname = "Helsinki-NLP/opus-mt-en-es"
67
- tokenizer = MarianTokenizer.from_pretrained(mname)
68
- model = MarianMTModel.from_pretrained(mname)
69
- model.to(device)
70
-
71
- lt = LineTokenizer()
72
- batch_size = 8
73
  parrafos = lt.tokenize(texto)
74
- par_tra = traducir_parrafos(parrafos, tokenizer, model)
75
  return "\n".join(par_tra)
76
 
77
  def validate_dataset(dataset):
@@ -91,7 +92,6 @@ def do_ask(question, button, dataset):
91
  path = row['filepath']
92
  text = Path(f'{path}').read_text()
93
  text_en = traducir_es_en(text)
94
- question_answerer = pipeline("question-answering", model='distilbert-base-cased-distilled-squad')
95
  QA_input = {
96
  'question': traducir_es_en(question),
97
  'context': text_en
 
26
  dev = "cpu"
27
  device = torch.device(dev)
28
 
29
+ # Definimos los modelos:
30
+ mname = "Helsinki-NLP/opus-mt-es-en"
31
+ tokenizer_es_en = MarianTokenizer.from_pretrained(mname)
32
+ model_es_en = MarianMTModel.from_pretrained(mname)
33
+ model_es_en.to(device)
34
+
35
+ mname = "Helsinki-NLP/opus-mt-en-es"
36
+ tokenizer_en_es = MarianTokenizer.from_pretrained(mname)
37
+ model_en_es = MarianMTModel.from_pretrained(mname)
38
+ model_en_es.to(device)
39
+
40
+ lt = LineTokenizer()
41
+
42
+ question_answerer = pipeline("question-answering", model='distilbert-base-cased-distilled-squad')
43
+
44
  def request_pathname(files):
45
  if files is None:
46
  return [[]]
 
66
  return parrafos_traducidos
67
 
68
  def traducir_es_en(texto):
 
 
 
 
 
 
 
69
  parrafos = lt.tokenize(texto)
70
+ par_tra = traducir_parrafos(parrafos, tokenizer_es_en, model_es_en)
71
  return "\n".join(par_tra)
72
 
73
  def traducir_en_es(texto):
 
 
 
 
 
 
 
74
  parrafos = lt.tokenize(texto)
75
+ par_tra = traducir_parrafos(parrafos, tokenizer_en_es, model_en_es)
76
  return "\n".join(par_tra)
77
 
78
  def validate_dataset(dataset):
 
92
  path = row['filepath']
93
  text = Path(f'{path}').read_text()
94
  text_en = traducir_es_en(text)
 
95
  QA_input = {
96
  'question': traducir_es_en(question),
97
  'context': text_en