Zeimoto commited on
Commit
c438acc
1 Parent(s): 0536b78

added translation task in whisper model

Browse files
Files changed (2) hide show
  1. app.py +19 -18
  2. speech2text.py +19 -1
app.py CHANGED
@@ -2,8 +2,8 @@ import streamlit as st
2
  from st_audiorec import st_audiorec
3
 
4
  from nameder import init_model_ner, get_entity_labels
5
- from speech2text import init_model_trans, transcribe
6
- from translation import get_translation
7
  from resources import audit_elapsedtime, set_start
8
  import subprocess
9
 
@@ -12,25 +12,26 @@ def main ():
12
  print(f"Running main")
13
 
14
  #print(subprocess.Popen('pip freeze > requirements_hug.txt', shell=True))
15
- text = "Tenho uma proposta para a Caixa Geral de Depositos, para 3 consultores outsystems, 300 euros por dia e um periodo de seis meses."
16
- st.write(text)
17
- traducao = get_translation(text_to_translate=text, languageCode="pt")
18
- st.write(traducao)
19
- # s2t = init_model_trans()
20
  # ner = init_model_ner() #async
21
 
22
- # print("Rendering UI...")
23
- # start_render = set_start()
24
- # wav_audio_data = st_audiorec()
25
- # audit_elapsedtime(function="Rendering UI", start=start_render)
26
 
27
- # if wav_audio_data is not None and s2t is not None:
28
- # print("Loading data...")
29
- # start_loading = set_start()
30
- # st.audio(wav_audio_data, format='audio/wav')
31
- # text = transcribe(wav_audio_data, s2t)
32
- # print("translating audio...")
33
- # translation = get_translation("pt")
 
34
 
35
  # if text is not None and ner is not None:
36
  # st.write('Entities: ', get_entity_labels(model=ner, text=text))
 
2
  from st_audiorec import st_audiorec
3
 
4
  from nameder import init_model_ner, get_entity_labels
5
+ from speech2text import init_model_trans, transcribe, translate
6
+ # from translation import get_translation
7
  from resources import audit_elapsedtime, set_start
8
  import subprocess
9
 
 
12
  print(f"Running main")
13
 
14
  #print(subprocess.Popen('pip freeze > requirements_hug.txt', shell=True))
15
+ # text = "Tenho uma proposta para a Caixa Geral de Depositos, para 3 consultores outsystems, 300 euros por dia e um periodo de seis meses."
16
+ # st.write(text)
17
+ # traducao = get_translation(text_to_translate=text, languageCode="pt")
18
+ # st.write(traducao)
19
+ s2t = init_model_trans()
20
  # ner = init_model_ner() #async
21
 
22
+ print("Rendering UI...")
23
+ start_render = set_start()
24
+ wav_audio_data = st_audiorec()
25
+ audit_elapsedtime(function="Rendering UI", start=start_render)
26
 
27
+ if wav_audio_data is not None and s2t is not None:
28
+ print("Loading data...")
29
+ start_loading = set_start()
30
+ st.audio(wav_audio_data, format='audio/wav')
31
+ original = transcribe(wav_audio_data, s2t)
32
+ print("translating audio...")
33
+ translation = translate(original)
34
+ st.write(f"Original: {original}/nTranscription: {translation}")
35
 
36
  # if text is not None and ner is not None:
37
  # st.write('Entities: ', get_entity_labels(model=ner, text=text))
speech2text.py CHANGED
@@ -14,7 +14,7 @@ def init_model_trans ():
14
  model_id = "openai/whisper-large-v3"
15
 
16
  model = AutoModelForSpeechSeq2Seq.from_pretrained(
17
- model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=False, use_safetensors=True
18
  )
19
  model.to(device)
20
 
@@ -41,9 +41,27 @@ def transcribe (audio_sample: bytes, pipe) -> str:
41
  start = set_start()
42
  # dataset = load_dataset("distil-whisper/librispeech_long", "clean", split="validation")
43
  # sample = dataset[0]["audio"]
 
 
44
  result = pipe(audio_sample)
 
45
  audit_elapsedtime(function="Transcription", start=start)
46
  print("transcription result",result)
47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  #st.write('trancription: ', result["text"])
49
  return result["text"]
 
14
  model_id = "openai/whisper-large-v3"
15
 
16
  model = AutoModelForSpeechSeq2Seq.from_pretrained(
17
+ model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
18
  )
19
  model.to(device)
20
 
 
41
  start = set_start()
42
  # dataset = load_dataset("distil-whisper/librispeech_long", "clean", split="validation")
43
  # sample = dataset[0]["audio"]
44
+
45
+ #result = pipe(audio_sample)
46
  result = pipe(audio_sample)
47
+
48
  audit_elapsedtime(function="Transcription", start=start)
49
  print("transcription result",result)
50
 
51
+ #st.write('trancription: ', result["text"])
52
+ return result["text"]
53
+
54
+ def translate (audio_sample: bytes, pipe) -> str:
55
+ print("Initiating Translation...")
56
+ start = set_start()
57
+ # dataset = load_dataset("distil-whisper/librispeech_long", "clean", split="validation")
58
+ # sample = dataset[0]["audio"]
59
+
60
+ #result = pipe(audio_sample)
61
+ result = pipe(audio_sample, generate_kwargs={"task": "translate"})
62
+
63
+ audit_elapsedtime(function="Translation", start=start)
64
+ print("Translation result",result)
65
+
66
  #st.write('trancription: ', result["text"])
67
  return result["text"]