import gradio as gr
import torchaudio
from speechbrain.inference.vocoders import HIFIGAN
from speechbrain.tts import Tacotron2

# Initialize Tacotron2 TTS model and HIFIGAN vocoder
tts_model = Tacotron2.from_hparams(source="speechbrain/tts-tacotron2-ljspeech", savedir="/tmpdir_tacotron2")
hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="/tmpdir_hifigan")

# Function to generate speech
def generate_speech(text):
    # Encode text using Tacotron2
    mel_output, mel_length = tts_model.encode_text(text)

    # Decode mel spectrogram to waveform using HIFIGAN vocoder
    waveform = hifi_gan.decode_batch(mel_output)

    # Return the generated waveform for Gradio to play
    return waveform.squeeze(1)

# Interface for Gradio
iface = gr.Interface(
    fn=generate_speech,
    inputs=gr.Textbox(label="Input Text", placeholder="Enter text to convert to speech..."),
    outputs=gr.Audio(label="Output Speech")
)

# Launch the Gradio interface
iface.launch()