Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -25,6 +25,8 @@ canary_model.change_decoding_strategy(decode_cfg)
|
|
25 |
# load TTS model
|
26 |
tts_model = VitsModel.from_pretrained("facebook/mms-tts-eng")
|
27 |
tts_tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-eng")
|
|
|
|
|
28 |
|
29 |
# Function to convert audio to text using ASR
|
30 |
def gen_text(audio_filepath, action):
|
@@ -71,9 +73,9 @@ def gen_text(audio_filepath, action):
|
|
71 |
# Function to convert text to speech using TTS
|
72 |
def gen_speech(text):
|
73 |
set_seed(555) # Make it deterministic
|
74 |
-
input_text =
|
75 |
with torch.no_grad():
|
76 |
-
outputs =
|
77 |
waveform_np = outputs.waveform[0].cpu().numpy()
|
78 |
output_file = f"{str(uuid.uuid4())}.wav"
|
79 |
wav.write(output_file, rate=tts_model.config.sampling_rate, data=waveform_np)
|
@@ -112,13 +114,13 @@ with playground:
|
|
112 |
with gr.Column():
|
113 |
clear_button = gr.ClearButton(components=[input_audio, transcipted_text, translated_speech, translated_text], value="Clear")
|
114 |
|
115 |
-
with gr.Row():
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
|
123 |
submit_button.click(start_process, inputs=[input_audio], outputs=[transcipted_text, translated_text, translated_speech])
|
124 |
|
|
|
25 |
# load TTS model
|
26 |
tts_model = VitsModel.from_pretrained("facebook/mms-tts-eng")
|
27 |
tts_tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-eng")
|
28 |
+
tts_fra_model = VitsModel.from_pretrained("facebook/mms-tts-fra")
|
29 |
+
tts_fra_tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-fra")
|
30 |
|
31 |
# Function to convert audio to text using ASR
|
32 |
def gen_text(audio_filepath, action):
|
|
|
73 |
# Function to convert text to speech using TTS
|
74 |
def gen_speech(text):
|
75 |
set_seed(555) # Make it deterministic
|
76 |
+
input_text = tts_fra_tokenizer(text, return_tensors="pt")
|
77 |
with torch.no_grad():
|
78 |
+
outputs = tts_fra_model(**input_text)
|
79 |
waveform_np = outputs.waveform[0].cpu().numpy()
|
80 |
output_file = f"{str(uuid.uuid4())}.wav"
|
81 |
wav.write(output_file, rate=tts_model.config.sampling_rate, data=waveform_np)
|
|
|
114 |
with gr.Column():
|
115 |
clear_button = gr.ClearButton(components=[input_audio, transcipted_text, translated_speech, translated_text], value="Clear")
|
116 |
|
117 |
+
# with gr.Row():
|
118 |
+
# gr.Examples(
|
119 |
+
# examples=["sample.wav"],
|
120 |
+
# inputs=[input_audio],
|
121 |
+
# outputs=[transcipted_text, translated_speech, translated_text],
|
122 |
+
# run_on_click=True, cache_examples=True, fn=start_process
|
123 |
+
# )
|
124 |
|
125 |
submit_button.click(start_process, inputs=[input_audio], outputs=[transcipted_text, translated_text, translated_speech])
|
126 |
|