Spaces:
Runtime error
Runtime error
Zeimoto
commited on
Commit
•
c438acc
1
Parent(s):
0536b78
added translation task in whisper model
Browse files- app.py +19 -18
- speech2text.py +19 -1
app.py
CHANGED
@@ -2,8 +2,8 @@ import streamlit as st
|
|
2 |
from st_audiorec import st_audiorec
|
3 |
|
4 |
from nameder import init_model_ner, get_entity_labels
|
5 |
-
from speech2text import init_model_trans, transcribe
|
6 |
-
from translation import get_translation
|
7 |
from resources import audit_elapsedtime, set_start
|
8 |
import subprocess
|
9 |
|
@@ -12,25 +12,26 @@ def main ():
|
|
12 |
print(f"Running main")
|
13 |
|
14 |
#print(subprocess.Popen('pip freeze > requirements_hug.txt', shell=True))
|
15 |
-
text = "Tenho uma proposta para a Caixa Geral de Depositos, para 3 consultores outsystems, 300 euros por dia e um periodo de seis meses."
|
16 |
-
st.write(text)
|
17 |
-
traducao = get_translation(text_to_translate=text, languageCode="pt")
|
18 |
-
st.write(traducao)
|
19 |
-
|
20 |
# ner = init_model_ner() #async
|
21 |
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
|
|
34 |
|
35 |
# if text is not None and ner is not None:
|
36 |
# st.write('Entities: ', get_entity_labels(model=ner, text=text))
|
|
|
2 |
from st_audiorec import st_audiorec
|
3 |
|
4 |
from nameder import init_model_ner, get_entity_labels
|
5 |
+
from speech2text import init_model_trans, transcribe, translate
|
6 |
+
# from translation import get_translation
|
7 |
from resources import audit_elapsedtime, set_start
|
8 |
import subprocess
|
9 |
|
|
|
12 |
print(f"Running main")
|
13 |
|
14 |
#print(subprocess.Popen('pip freeze > requirements_hug.txt', shell=True))
|
15 |
+
# text = "Tenho uma proposta para a Caixa Geral de Depositos, para 3 consultores outsystems, 300 euros por dia e um periodo de seis meses."
|
16 |
+
# st.write(text)
|
17 |
+
# traducao = get_translation(text_to_translate=text, languageCode="pt")
|
18 |
+
# st.write(traducao)
|
19 |
+
s2t = init_model_trans()
|
20 |
# ner = init_model_ner() #async
|
21 |
|
22 |
+
print("Rendering UI...")
|
23 |
+
start_render = set_start()
|
24 |
+
wav_audio_data = st_audiorec()
|
25 |
+
audit_elapsedtime(function="Rendering UI", start=start_render)
|
26 |
|
27 |
+
if wav_audio_data is not None and s2t is not None:
|
28 |
+
print("Loading data...")
|
29 |
+
start_loading = set_start()
|
30 |
+
st.audio(wav_audio_data, format='audio/wav')
|
31 |
+
original = transcribe(wav_audio_data, s2t)
|
32 |
+
print("translating audio...")
|
33 |
+
translation = translate(original)
|
34 |
+
st.write(f"Original: {original}/nTranscription: {translation}")
|
35 |
|
36 |
# if text is not None and ner is not None:
|
37 |
# st.write('Entities: ', get_entity_labels(model=ner, text=text))
|
speech2text.py
CHANGED
@@ -14,7 +14,7 @@ def init_model_trans ():
|
|
14 |
model_id = "openai/whisper-large-v3"
|
15 |
|
16 |
model = AutoModelForSpeechSeq2Seq.from_pretrained(
|
17 |
-
model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=
|
18 |
)
|
19 |
model.to(device)
|
20 |
|
@@ -41,9 +41,27 @@ def transcribe (audio_sample: bytes, pipe) -> str:
|
|
41 |
start = set_start()
|
42 |
# dataset = load_dataset("distil-whisper/librispeech_long", "clean", split="validation")
|
43 |
# sample = dataset[0]["audio"]
|
|
|
|
|
44 |
result = pipe(audio_sample)
|
|
|
45 |
audit_elapsedtime(function="Transcription", start=start)
|
46 |
print("transcription result",result)
|
47 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
#st.write('trancription: ', result["text"])
|
49 |
return result["text"]
|
|
|
14 |
model_id = "openai/whisper-large-v3"
|
15 |
|
16 |
model = AutoModelForSpeechSeq2Seq.from_pretrained(
|
17 |
+
model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
|
18 |
)
|
19 |
model.to(device)
|
20 |
|
|
|
41 |
start = set_start()
|
42 |
# dataset = load_dataset("distil-whisper/librispeech_long", "clean", split="validation")
|
43 |
# sample = dataset[0]["audio"]
|
44 |
+
|
45 |
+
#result = pipe(audio_sample)
|
46 |
result = pipe(audio_sample)
|
47 |
+
|
48 |
audit_elapsedtime(function="Transcription", start=start)
|
49 |
print("transcription result",result)
|
50 |
|
51 |
+
#st.write('trancription: ', result["text"])
|
52 |
+
return result["text"]
|
53 |
+
|
54 |
+
def translate (audio_sample: bytes, pipe) -> str:
|
55 |
+
print("Initiating Translation...")
|
56 |
+
start = set_start()
|
57 |
+
# dataset = load_dataset("distil-whisper/librispeech_long", "clean", split="validation")
|
58 |
+
# sample = dataset[0]["audio"]
|
59 |
+
|
60 |
+
#result = pipe(audio_sample)
|
61 |
+
result = pipe(audio_sample, generate_kwargs={"task": "translate"})
|
62 |
+
|
63 |
+
audit_elapsedtime(function="Translation", start=start)
|
64 |
+
print("Translation result",result)
|
65 |
+
|
66 |
#st.write('trancription: ', result["text"])
|
67 |
return result["text"]
|