frogcho123 commited on
Commit
bc7920f
1 Parent(s): b2d1906

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -5
app.py CHANGED
@@ -2,7 +2,7 @@ import gradio as gr
2
  import whisper
3
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
4
  from gtts import gTTS
5
- import sounddevice as sd
6
  import scipy.io.wavfile as wav
7
  import os
8
 
@@ -36,18 +36,19 @@ def translate_speech_to_speech(input_audio):
36
 
37
  # Text-to-speech (TTS) code from the first code snippet
38
  tts = gTTS(text=translated_text, lang=to_lang)
39
- output_file = "translated_speech.mp3"
40
  tts.save(output_file)
41
 
42
  # Load the translated audio and return as an output
43
- translated_audio = open(output_file, "rb")
 
44
 
45
- return translated_audio
46
 
47
  title = "Speech-to-Speech Translator"
48
 
49
  input_audio = gr.inputs.Audio(type=["mp3", "wav"])
50
- output_audio = gr.outputs.Audio(type=["mp3", "wav"])
51
 
52
  stt_demo = gr.Interface(
53
  fn=translate_speech_to_speech,
@@ -61,3 +62,4 @@ if __name__ == "__main__":
61
  stt_demo.launch()
62
 
63
 
 
 
2
  import whisper
3
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
4
  from gtts import gTTS
5
+ import soundfile as sf
6
  import scipy.io.wavfile as wav
7
  import os
8
 
 
36
 
37
  # Text-to-speech (TTS) code from the first code snippet
38
  tts = gTTS(text=translated_text, lang=to_lang)
39
+ output_file = "translated_speech.wav"
40
  tts.save(output_file)
41
 
42
  # Load the translated audio and return as an output
43
+ translated_audio, sr = sf.read(output_file, dtype="float32")
44
+ translated_audio = (translated_audio * 32767).astype("int16")
45
 
46
+ return translated_audio, sr
47
 
48
  title = "Speech-to-Speech Translator"
49
 
50
  input_audio = gr.inputs.Audio(type=["mp3", "wav"])
51
+ output_audio = gr.outputs.Audio(type=["mp3", "wav"], sample_rate=44100)
52
 
53
  stt_demo = gr.Interface(
54
  fn=translate_speech_to_speech,
 
62
  stt_demo.launch()
63
 
64
 
65
+