import gradio as gr import torchaudio from speechbrain.inference.vocoders import HIFIGAN from speechbrain.tts import Tacotron2 # Initialize Tacotron2 TTS model and HIFIGAN vocoder tts_model = Tacotron2.from_hparams(source="speechbrain/tts-tacotron2-ljspeech", savedir="/tmpdir_tacotron2") hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="/tmpdir_hifigan") # Function to generate speech def generate_speech(text): # Encode text using Tacotron2 mel_output, mel_length = tts_model.encode_text(text) # Decode mel spectrogram to waveform using HIFIGAN vocoder waveform = hifi_gan.decode_batch(mel_output) # Return the generated waveform for Gradio to play return waveform.squeeze(1) # Interface for Gradio iface = gr.Interface( fn=generate_speech, inputs=gr.Textbox(label="Input Text", placeholder="Enter text to convert to speech..."), outputs=gr.Audio(label="Output Speech") ) # Launch the Gradio interface iface.launch()