Zeimoto commited on
Commit
1f8b546
·
1 Parent(s): e9355ad

added max_new_tokens to translation

Browse files
Files changed (2) hide show
  1. app.py +7 -7
  2. translation.py +2 -1
app.py CHANGED
@@ -12,26 +12,26 @@ def main ():
12
  print(f"Running main")
13
 
14
  #print(subprocess.Popen('pip freeze > requirements_hug.txt', shell=True))
15
- # text = "Tenho uma proposta para a Caixa Geral de Depositos, para 3 consultores outsystems, 300 euros por dia e um periodo de seis meses."
16
- # st.write(text)
17
  # traducao = get_translation(text_to_translate=text, languageCode="pt")
18
  # st.write(traducao)
19
- s2t = init_model_trans()
20
- # ner = init_model_ner() #async
21
 
 
 
22
  print("Rendering UI...")
23
  start_render = set_start()
24
  wav_audio_data = st_audiorec()
25
  audit_elapsedtime(function="Rendering UI", start=start_render)
26
 
27
- if wav_audio_data is not None and s2t is not None:
 
 
28
  print("Loading data...")
29
  start_loading = set_start()
30
  st.audio(wav_audio_data, format='audio/wav')
31
  original = transcribe(wav_audio_data, s2t)
32
  st.write(f"Original: {original}")
33
- print("next is translation")
34
- print("translating audio...")
35
  translation = translate(original)
36
  st.write(f"Transcription: {translation}")
37
 
 
12
  print(f"Running main")
13
 
14
  #print(subprocess.Popen('pip freeze > requirements_hug.txt', shell=True))
15
+ original = "Tenho uma proposta para a Caixa Geral de Depositos, para 3 consultores Outsystems, 300 euros por dia e um periodo de seis meses."
16
+ st.write(f"Original: {original}")
17
  # traducao = get_translation(text_to_translate=text, languageCode="pt")
18
  # st.write(traducao)
 
 
19
 
20
+ translation = translate(original)
21
+ st.write(f"Translation: {translation}")
22
  print("Rendering UI...")
23
  start_render = set_start()
24
  wav_audio_data = st_audiorec()
25
  audit_elapsedtime(function="Rendering UI", start=start_render)
26
 
27
+ if wav_audio_data is not None:
28
+ s2t = init_model_trans()
29
+ # ner = init_model_ner()
30
  print("Loading data...")
31
  start_loading = set_start()
32
  st.audio(wav_audio_data, format='audio/wav')
33
  original = transcribe(wav_audio_data, s2t)
34
  st.write(f"Original: {original}")
 
 
35
  translation = translate(original)
36
  st.write(f"Transcription: {translation}")
37
 
translation.py CHANGED
@@ -13,9 +13,10 @@ def translate(text_to_translate: str) -> str:
13
 
14
  start = set_start()
15
  print("Initiating translation model...")
 
16
  tokenizer = AutoTokenizer.from_pretrained("unicamp-dl/translation-pt-en-t5")
17
  model = AutoModelForSeq2SeqLM.from_pretrained("unicamp-dl/translation-pt-en-t5")
18
- pten_pipeline = pipeline('text2text-generation', model=model, tokenizer=tokenizer)
19
  translated_text = pten_pipeline(text_to_translate)
20
 
21
  audit_elapsedtime(function="Finished translation", start=start)
 
13
 
14
  start = set_start()
15
  print("Initiating translation model...")
16
+ text_size = len(text_to_translate)*2
17
  tokenizer = AutoTokenizer.from_pretrained("unicamp-dl/translation-pt-en-t5")
18
  model = AutoModelForSeq2SeqLM.from_pretrained("unicamp-dl/translation-pt-en-t5")
19
+ pten_pipeline = pipeline('text2text-generation', kwargs={"max_new_tokens": text_size},model=model, tokenizer=tokenizer)
20
  translated_text = pten_pipeline(text_to_translate)
21
 
22
  audit_elapsedtime(function="Finished translation", start=start)