Update app.py
Browse files
app.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
import gradio as gr
|
2 |
from transformers import pipeline, Wav2Vec2ProcessorWithLM, Wav2Vec2ForCTC
|
|
|
3 |
import os
|
4 |
import soundfile as sf
|
5 |
from pyannote.audio import Pipeline
|
@@ -9,7 +10,7 @@ from pydub.playback import play
|
|
9 |
from datetime import datetime, timedelta
|
10 |
import time
|
11 |
from silero_vad import load_silero_vad, read_audio, get_speech_timestamps
|
12 |
-
from googletrans import Translator
|
13 |
|
14 |
|
15 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
@@ -26,8 +27,17 @@ model = load_silero_vad()
|
|
26 |
pipeline_dia = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1",
|
27 |
use_auth_token=HF_TOKEN)
|
28 |
|
|
|
29 |
|
30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
temp_path = "temp.wav"
|
32 |
|
33 |
def preprocess(audio_path):
|
@@ -66,7 +76,13 @@ def fast_transcribe(diarise, how_diarise, translate, audio):
|
|
66 |
lines.append(f"{i+1}\n{start_time_srt} --> {end_time_srt}\n[{label}] {trans}\n")
|
67 |
else:
|
68 |
print("TRANSLATION STARTED")
|
69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
print(f"TRANSLATION ENDED RESULT {trans_eng}")
|
71 |
lines.append(f"{i+1}\n{start_time_srt} --> {end_time_srt}\n[{label}] {trans}\n[{label}] {trans_eng}\n")
|
72 |
print("RECOGNISING ENDED")
|
@@ -93,7 +109,13 @@ def fast_transcribe(diarise, how_diarise, translate, audio):
|
|
93 |
lines.append(f"{i+1}\n{start_time_srt} --> {end_time_srt}\n[{trans}\n")
|
94 |
else:
|
95 |
print("TRANSLATION STARTED")
|
96 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
print(f"TRANSLATION ENDED RESULT {trans_eng}")
|
98 |
lines.append(f"{i+1}\n{start_time_srt} --> {end_time_srt}\n{trans}\n{trans_eng}\n")
|
99 |
|
|
|
1 |
import gradio as gr
|
2 |
from transformers import pipeline, Wav2Vec2ProcessorWithLM, Wav2Vec2ForCTC
|
3 |
+
from transformers import T5ForConditionalGeneration, T5Tokenizer
|
4 |
import os
|
5 |
import soundfile as sf
|
6 |
from pyannote.audio import Pipeline
|
|
|
10 |
from datetime import datetime, timedelta
|
11 |
import time
|
12 |
from silero_vad import load_silero_vad, read_audio, get_speech_timestamps
|
13 |
+
#from googletrans import Translator
|
14 |
|
15 |
|
16 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
|
|
27 |
pipeline_dia = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1",
|
28 |
use_auth_token=HF_TOKEN)
|
29 |
|
30 |
+
device = 'cpu' #or 'cuda' for translate on cpu
|
31 |
|
32 |
+
model_name_tr = 'utrobinmv/t5_translate_en_ru_zh_small_1024'
|
33 |
+
model_tr = T5ForConditionalGeneration.from_pretrained(model_name_tr)
|
34 |
+
model_tr.to(device)
|
35 |
+
tokenizer_tr = T5Tokenizer.from_pretrained(model_name_tr)
|
36 |
+
|
37 |
+
prefix = 'translate to en: '
|
38 |
+
|
39 |
+
|
40 |
+
# translator = Translator()
|
41 |
temp_path = "temp.wav"
|
42 |
|
43 |
def preprocess(audio_path):
|
|
|
76 |
lines.append(f"{i+1}\n{start_time_srt} --> {end_time_srt}\n[{label}] {trans}\n")
|
77 |
else:
|
78 |
print("TRANSLATION STARTED")
|
79 |
+
src_text = prefix + trans
|
80 |
+
# translate Russian to Eng
|
81 |
+
input_ids = tokenizer_tr(src_text, return_tensors="pt")
|
82 |
+
generated_tokens = model_tr.generate(**input_ids.to(device))
|
83 |
+
|
84 |
+
trans_eng = tokenizer_tr.batch_decode(generated_tokens, skip_special_tokens=True)
|
85 |
+
#trans_eng = translator.translate(trans, src='ru', dest="en").text
|
86 |
print(f"TRANSLATION ENDED RESULT {trans_eng}")
|
87 |
lines.append(f"{i+1}\n{start_time_srt} --> {end_time_srt}\n[{label}] {trans}\n[{label}] {trans_eng}\n")
|
88 |
print("RECOGNISING ENDED")
|
|
|
109 |
lines.append(f"{i+1}\n{start_time_srt} --> {end_time_srt}\n[{trans}\n")
|
110 |
else:
|
111 |
print("TRANSLATION STARTED")
|
112 |
+
src_text = prefix + trans
|
113 |
+
# translate Russian to Eng
|
114 |
+
input_ids = tokenizer_tr(src_text, return_tensors="pt")
|
115 |
+
generated_tokens = model_tr.generate(**input_ids.to(device))
|
116 |
+
|
117 |
+
trans_eng = tokenizer_tr.batch_decode(generated_tokens, skip_special_tokens=True)
|
118 |
+
#trans_eng = translator.translate(trans, src='ru', dest="en").text
|
119 |
print(f"TRANSLATION ENDED RESULT {trans_eng}")
|
120 |
lines.append(f"{i+1}\n{start_time_srt} --> {end_time_srt}\n{trans}\n{trans_eng}\n")
|
121 |
|