rayl-aoit commited on
Commit
6d345cb
·
verified ·
1 Parent(s): e5ddf72

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -9
app.py CHANGED
@@ -25,6 +25,8 @@ canary_model.change_decoding_strategy(decode_cfg)
25
  # load TTS model
26
  tts_model = VitsModel.from_pretrained("facebook/mms-tts-eng")
27
  tts_tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-eng")
 
 
28
 
29
  # Function to convert audio to text using ASR
30
  def gen_text(audio_filepath, action):
@@ -71,9 +73,9 @@ def gen_text(audio_filepath, action):
71
  # Function to convert text to speech using TTS
72
  def gen_speech(text):
73
  set_seed(555) # Make it deterministic
74
- input_text = tts_tokenizer(text, return_tensors="pt")
75
  with torch.no_grad():
76
- outputs = tts_model(**input_text)
77
  waveform_np = outputs.waveform[0].cpu().numpy()
78
  output_file = f"{str(uuid.uuid4())}.wav"
79
  wav.write(output_file, rate=tts_model.config.sampling_rate, data=waveform_np)
@@ -112,13 +114,13 @@ with playground:
112
  with gr.Column():
113
  clear_button = gr.ClearButton(components=[input_audio, transcipted_text, translated_speech, translated_text], value="Clear")
114
 
115
- with gr.Row():
116
- gr.Examples(
117
- examples=["sample.wav"],
118
- inputs=[input_audio],
119
- outputs=[transcipted_text, translated_speech, translated_text],
120
- run_on_click=True, cache_examples=True, fn=start_process
121
- )
122
 
123
  submit_button.click(start_process, inputs=[input_audio], outputs=[transcipted_text, translated_text, translated_speech])
124
 
 
25
  # load TTS model
26
  tts_model = VitsModel.from_pretrained("facebook/mms-tts-eng")
27
  tts_tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-eng")
28
+ tts_fra_model = VitsModel.from_pretrained("facebook/mms-tts-fra")
29
+ tts_fra_tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-fra")
30
 
31
  # Function to convert audio to text using ASR
32
  def gen_text(audio_filepath, action):
 
73
  # Function to convert text to speech using TTS
74
  def gen_speech(text):
75
  set_seed(555) # Make it deterministic
76
+ input_text = tts_fra_tokenizer(text, return_tensors="pt")
77
  with torch.no_grad():
78
+ outputs = tts_fra_model(**input_text)
79
  waveform_np = outputs.waveform[0].cpu().numpy()
80
  output_file = f"{str(uuid.uuid4())}.wav"
81
  wav.write(output_file, rate=tts_model.config.sampling_rate, data=waveform_np)
 
114
  with gr.Column():
115
  clear_button = gr.ClearButton(components=[input_audio, transcipted_text, translated_speech, translated_text], value="Clear")
116
 
117
+ # with gr.Row():
118
+ # gr.Examples(
119
+ # examples=["sample.wav"],
120
+ # inputs=[input_audio],
121
+ # outputs=[transcipted_text, translated_speech, translated_text],
122
+ # run_on_click=True, cache_examples=True, fn=start_process
123
+ # )
124
 
125
  submit_button.click(start_process, inputs=[input_audio], outputs=[transcipted_text, translated_text, translated_speech])
126