Mihaj commited on
Commit
1d5a081
1 Parent(s): ed036ec

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -4
app.py CHANGED
@@ -1,5 +1,6 @@
1
  import gradio as gr
2
  from transformers import pipeline, Wav2Vec2ProcessorWithLM, Wav2Vec2ForCTC
 
3
  import os
4
  import soundfile as sf
5
  from pyannote.audio import Pipeline
@@ -9,7 +10,7 @@ from pydub.playback import play
9
  from datetime import datetime, timedelta
10
  import time
11
  from silero_vad import load_silero_vad, read_audio, get_speech_timestamps
12
- from googletrans import Translator
13
 
14
 
15
  HF_TOKEN = os.environ.get("HF_TOKEN")
@@ -26,8 +27,17 @@ model = load_silero_vad()
26
  pipeline_dia = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1",
27
  use_auth_token=HF_TOKEN)
28
 
 
29
 
30
- translator = Translator()
 
 
 
 
 
 
 
 
31
  temp_path = "temp.wav"
32
 
33
  def preprocess(audio_path):
@@ -66,7 +76,13 @@ def fast_transcribe(diarise, how_diarise, translate, audio):
66
  lines.append(f"{i+1}\n{start_time_srt} --> {end_time_srt}\n[{label}] {trans}\n")
67
  else:
68
  print("TRANSLATION STARTED")
69
- trans_eng = translator.translate('trans', src='ru', dest="en").text
 
 
 
 
 
 
70
  print(f"TRANSLATION ENDED RESULT {trans_eng}")
71
  lines.append(f"{i+1}\n{start_time_srt} --> {end_time_srt}\n[{label}] {trans}\n[{label}] {trans_eng}\n")
72
  print("RECOGNISING ENDED")
@@ -93,7 +109,13 @@ def fast_transcribe(diarise, how_diarise, translate, audio):
93
  lines.append(f"{i+1}\n{start_time_srt} --> {end_time_srt}\n[{trans}\n")
94
  else:
95
  print("TRANSLATION STARTED")
96
- trans_eng = translator.translate(trans, src='ru', dest="en").text
 
 
 
 
 
 
97
  print(f"TRANSLATION ENDED RESULT {trans_eng}")
98
  lines.append(f"{i+1}\n{start_time_srt} --> {end_time_srt}\n{trans}\n{trans_eng}\n")
99
 
 
1
  import gradio as gr
2
  from transformers import pipeline, Wav2Vec2ProcessorWithLM, Wav2Vec2ForCTC
3
+ from transformers import T5ForConditionalGeneration, T5Tokenizer
4
  import os
5
  import soundfile as sf
6
  from pyannote.audio import Pipeline
 
10
  from datetime import datetime, timedelta
11
  import time
12
  from silero_vad import load_silero_vad, read_audio, get_speech_timestamps
13
+ #from googletrans import Translator
14
 
15
 
16
  HF_TOKEN = os.environ.get("HF_TOKEN")
 
27
  pipeline_dia = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1",
28
  use_auth_token=HF_TOKEN)
29
 
30
+ device = 'cpu' #or 'cuda' for translate on cpu
31
 
32
+ model_name_tr = 'utrobinmv/t5_translate_en_ru_zh_small_1024'
33
+ model_tr = T5ForConditionalGeneration.from_pretrained(model_name_tr)
34
+ model_tr.to(device)
35
+ tokenizer_tr = T5Tokenizer.from_pretrained(model_name_tr)
36
+
37
+ prefix = 'translate to en: '
38
+
39
+
40
+ # translator = Translator()
41
  temp_path = "temp.wav"
42
 
43
  def preprocess(audio_path):
 
76
  lines.append(f"{i+1}\n{start_time_srt} --> {end_time_srt}\n[{label}] {trans}\n")
77
  else:
78
  print("TRANSLATION STARTED")
79
+ src_text = prefix + trans
80
+ # translate Russian to Eng
81
+ input_ids = tokenizer_tr(src_text, return_tensors="pt")
82
+ generated_tokens = model_tr.generate(**input_ids.to(device))
83
+
84
+ trans_eng = tokenizer_tr.batch_decode(generated_tokens, skip_special_tokens=True)
85
+ #trans_eng = translator.translate(trans, src='ru', dest="en").text
86
  print(f"TRANSLATION ENDED RESULT {trans_eng}")
87
  lines.append(f"{i+1}\n{start_time_srt} --> {end_time_srt}\n[{label}] {trans}\n[{label}] {trans_eng}\n")
88
  print("RECOGNISING ENDED")
 
109
  lines.append(f"{i+1}\n{start_time_srt} --> {end_time_srt}\n[{trans}\n")
110
  else:
111
  print("TRANSLATION STARTED")
112
+ src_text = prefix + trans
113
+ # translate Russian to Eng
114
+ input_ids = tokenizer_tr(src_text, return_tensors="pt")
115
+ generated_tokens = model_tr.generate(**input_ids.to(device))
116
+
117
+ trans_eng = tokenizer_tr.batch_decode(generated_tokens, skip_special_tokens=True)
118
+ #trans_eng = translator.translate(trans, src='ru', dest="en").text
119
  print(f"TRANSLATION ENDED RESULT {trans_eng}")
120
  lines.append(f"{i+1}\n{start_time_srt} --> {end_time_srt}\n{trans}\n{trans_eng}\n")
121