Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -16,16 +16,10 @@ SAMPLE_RATE = 16000 # Hz
|
|
16 |
|
17 |
# load ASR model
|
18 |
canary_model = EncDecMultiTaskModel.from_pretrained('nvidia/canary-1b')
|
19 |
-
|
20 |
-
# update dcode params
|
21 |
decode_cfg = canary_model.cfg.decoding
|
22 |
decode_cfg.beam.beam_size = 1
|
23 |
canary_model.change_decoding_strategy(decode_cfg)
|
24 |
|
25 |
-
# load TTS model
|
26 |
-
# tts_model = VitsModel.from_pretrained("facebook/mms-tts-eng")
|
27 |
-
# tts_tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-eng")
|
28 |
-
|
29 |
# Function to convert audio to text using ASR
|
30 |
def gen_text(audio_filepath, action, source_lang, target_lang):
|
31 |
if audio_filepath is None:
|
@@ -72,7 +66,6 @@ def gen_text(audio_filepath, action, source_lang, target_lang):
|
|
72 |
# Function to convert text to speech using TTS
|
73 |
def gen_speech(text, lang):
|
74 |
set_seed(555) # Make it deterministic
|
75 |
-
|
76 |
match lang:
|
77 |
case "en":
|
78 |
model = "facebook/mms-tts-eng"
|
@@ -85,11 +78,6 @@ def gen_speech(text, lang):
|
|
85 |
case _:
|
86 |
model = "facebook/mms-tts-eng"
|
87 |
|
88 |
-
# if lang=="en":
|
89 |
-
# model = "facebook/mms-tts-eng"
|
90 |
-
# elif lang=="fr":
|
91 |
-
# model = "facebook/mms-tts-fra"
|
92 |
-
|
93 |
# load TTS model
|
94 |
tts_model = VitsModel.from_pretrained(model)
|
95 |
tts_tokenizer = AutoTokenizer.from_pretrained(model)
|
@@ -146,13 +134,18 @@ with playground:
|
|
146 |
with gr.Column():
|
147 |
clear_button = gr.ClearButton(components=[input_audio, source_lang, target_lang, transcipted_text, translated_text, translated_speech], value="Clear")
|
148 |
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
|
|
|
|
|
|
|
|
|
|
156 |
|
157 |
submit_button.click(start_process, inputs=[input_audio, source_lang, target_lang], outputs=[transcipted_text, translated_text, translated_speech])
|
158 |
|
|
|
16 |
|
17 |
# load ASR model
|
18 |
canary_model = EncDecMultiTaskModel.from_pretrained('nvidia/canary-1b')
|
|
|
|
|
19 |
decode_cfg = canary_model.cfg.decoding
|
20 |
decode_cfg.beam.beam_size = 1
|
21 |
canary_model.change_decoding_strategy(decode_cfg)
|
22 |
|
|
|
|
|
|
|
|
|
23 |
# Function to convert audio to text using ASR
|
24 |
def gen_text(audio_filepath, action, source_lang, target_lang):
|
25 |
if audio_filepath is None:
|
|
|
66 |
# Function to convert text to speech using TTS
|
67 |
def gen_speech(text, lang):
|
68 |
set_seed(555) # Make it deterministic
|
|
|
69 |
match lang:
|
70 |
case "en":
|
71 |
model = "facebook/mms-tts-eng"
|
|
|
78 |
case _:
|
79 |
model = "facebook/mms-tts-eng"
|
80 |
|
|
|
|
|
|
|
|
|
|
|
81 |
# load TTS model
|
82 |
tts_model = VitsModel.from_pretrained(model)
|
83 |
tts_tokenizer = AutoTokenizer.from_pretrained(model)
|
|
|
134 |
with gr.Column():
|
135 |
clear_button = gr.ClearButton(components=[input_audio, source_lang, target_lang, transcipted_text, translated_text, translated_speech], value="Clear")
|
136 |
|
137 |
+
with gr.Row():
|
138 |
+
gr.Examples(
|
139 |
+
examples=[
|
140 |
+
["sample_en.wav","en","fr"],
|
141 |
+
["sample_fr.wav","fr","de"],
|
142 |
+
["sample_de.wav","de","es"],
|
143 |
+
["sample_es.wav","es","en"]
|
144 |
+
],
|
145 |
+
inputs=[input_audio, source_lang, target_lang],
|
146 |
+
outputs=[transcipted_text, translated_text, translated_speech],
|
147 |
+
run_on_click=True, cache_examples=True, fn=start_process
|
148 |
+
)
|
149 |
|
150 |
submit_button.click(start_process, inputs=[input_audio, source_lang, target_lang], outputs=[transcipted_text, translated_text, translated_speech])
|
151 |
|