Mihaj commited on
Commit
6a5a2f9
·
verified ·
1 Parent(s): 702ca95

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -13
app.py CHANGED
@@ -36,12 +36,11 @@ def preprocess(audio_path):
36
  print("PREPROCESSING ENDED")
37
  return temp_path
38
 
39
- def transcribe(diarise, how_diarise, audio):
40
  audio = preprocess(audio)
41
  y, sr = sf.read(audio)
42
- print(diarise)
43
  if diarise:
44
- if how_diarise=="SlowButHighQuality":
45
  print("DIARISING")
46
  dia = pipeline_dia(audio)
47
  print("DIARISING ENDED")
@@ -60,7 +59,13 @@ def transcribe(diarise, how_diarise, audio):
60
  label = res[2]
61
  print(f"RECOGNISING LINE_{i} T_START {start_time_srt} T_END {end_time_srt} SPEAKER_{label}")
62
  trans = pipe(y[start:end], chunk_length_s=10, stride_length_s=(4, 2))["text"]
63
- lines.append(f"{i+1}\n{start_time_srt} --> {end_time_srt}\n[{label}] {trans}\n")
 
 
 
 
 
 
64
  print("RECOGNISING ENDED")
65
  print(f"LINE RESULT {trans}")
66
  else:
@@ -82,8 +87,15 @@ def transcribe(diarise, how_diarise, audio):
82
  end_time_srt = f"{end_time_prts[0]}:{end_time_prts[1]}:{float(end_time_prts[2]):.3f}".replace('.', ',')
83
  print(f"RECOGNISING LINE_{i} T_START {start_time_srt} T_END {end_time_srt}")
84
  trans = pipe(y[start:end], chunk_length_s=10, stride_length_s=(4, 2))["text"]
85
- lines.append(f"{i+1}\n{start_time_srt} --> {end_time_srt}\n{trans}\n")
86
  print("RECOGNISING ENDED")
 
 
 
 
 
 
 
 
87
  print(f"LINE RESULT {trans}")
88
  text = "\n".join(lines)
89
  else:
@@ -93,12 +105,27 @@ def transcribe(diarise, how_diarise, audio):
93
  text = res["text"]
94
  return text
95
 
96
- iface = gr.Interface(
97
- fn=transcribe,
98
- inputs=[gr.Checkbox(label="Diarise", info="Do you want subtitles?"), gr.Radio(["FastButLowQuality", "SlowButHighQuality", "-"], label="Diarise_Variant", info="You can choose separating on smaller pieces by faster yet low quality variant (Silero VAD), or slower yet high quality variant (Pyannote.Diarization, this option will detect different speakers)"), gr.Audio(type="filepath")],
99
- outputs="text",
100
- title="Wav2Vec2 RuOH",
101
- description=r"Realtime demo for Russian Oral History recognition using several diarizations method (Silero VAD, Pyannote) and a Wav2Vec large model from bond005. https://huggingface.co/bond005/wav2vec2-large-ru-golos-with-lm",
102
- )
103
 
104
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  print("PREPROCESSING ENDED")
37
  return temp_path
38
 
39
+ def fast_transcribe(diarise, how_diarise, translate, audio):
40
  audio = preprocess(audio)
41
  y, sr = sf.read(audio)
 
42
  if diarise:
43
+ if how_diarise=="Accurate":
44
  print("DIARISING")
45
  dia = pipeline_dia(audio)
46
  print("DIARISING ENDED")
 
59
  label = res[2]
60
  print(f"RECOGNISING LINE_{i} T_START {start_time_srt} T_END {end_time_srt} SPEAKER_{label}")
61
  trans = pipe(y[start:end], chunk_length_s=10, stride_length_s=(4, 2))["text"]
62
+ if not translate:
63
+ lines.append(f"{i+1}\n{start_time_srt} --> {end_time_srt}\n[{label}] {trans}\n")
64
+ else:
65
+ print("TRANSLATION STARTED")
66
+ trans_eng = translator.translate('trans', src='ru', dest="en").text
67
+ print(f"TRANSLATION ENDED RESULT {trans_eng}")
68
+ lines.append(f"{i+1}\n{start_time_srt} --> {end_time_srt}\n[{label}] {trans}\n[{label}] {trans_eng}\n")
69
  print("RECOGNISING ENDED")
70
  print(f"LINE RESULT {trans}")
71
  else:
 
87
  end_time_srt = f"{end_time_prts[0]}:{end_time_prts[1]}:{float(end_time_prts[2]):.3f}".replace('.', ',')
88
  print(f"RECOGNISING LINE_{i} T_START {start_time_srt} T_END {end_time_srt}")
89
  trans = pipe(y[start:end], chunk_length_s=10, stride_length_s=(4, 2))["text"]
 
90
  print("RECOGNISING ENDED")
91
+ if not translate:
92
+ lines.append(f"{i+1}\n{start_time_srt} --> {end_time_srt}\n[{trans}\n")
93
+ else:
94
+ print("TRANSLATION STARTED")
95
+ trans_eng = translator.translate(trans, src='ru', dest="en").text
96
+ print(f"TRANSLATION ENDED RESULT {trans_eng}")
97
+ lines.append(f"{i+1}\n{start_time_srt} --> {end_time_srt}\n{trans}\n{trans_eng}\n")
98
+
99
  print(f"LINE RESULT {trans}")
100
  text = "\n".join(lines)
101
  else:
 
105
  text = res["text"]
106
  return text
107
 
108
+ with gr.Blocks() as demo:
109
+ gr.Markdown("""
110
+ #Wav2Vec2 RuOH
111
+ Realtime demo for Russian Oral History recognition using several diarizations method (Silero VAD, Pyannote) and a Wav2Vec large model from bond005. https://huggingface.co/bond005/wav2vec2-large-ru-golos-with-lm"
112
+ """)
113
+ with gr.Tab("Fast Translation"):
 
114
 
115
+ with gr.Row():
116
+ with gr.Column():
117
+ fast_diarize_input = gr.Checkbox(label="Subtitles", info="Do you want subtitles?")
118
+ fast_diarize_radio_input = gr.Radio(["Fast", "Accurate", "-"], label="separating_on_subtitles_pption", info="You can choose separating audio on smaller pieces by faster yet low quality variant (Silero VAD), or slower yet high quality variant (Pyannote.Diarization, this option will detect different speakers)")
119
+ fast_translate_input = gr.Checkbox(label="Translate", info="Do you want translation to English?")
120
+ fast_audio_input = gr.Audio(type="filepath")
121
+
122
+ fast_output = gr.Textbox()
123
+
124
+ fast_inputs = [fast_diarize_input, fast_diarize_radio_input, fast_translate_input, fast_audio_input]
125
+ fast_recognize_button = gr.Button("Run")
126
+
127
+
128
+ fast_recognize_button.click(fast_transcribe, inputs=fast_inputs, outputs=fast_output)
129
+
130
+ if __name__ == "__main__":
131
+ demo.launch()