from transformers import pipeline import tempfile import gradio as gr from neon_tts_plugin_coqui import CoquiTTS pipe = pipeline(model="Yuyang2022/yue") # change to "your-username/the-name-you-picked" LANGUAGES = list(CoquiTTS.langs.keys()) coquiTTS = CoquiTTS() def audio_tts(audio, language: str): text = pipe(audio)["text"] with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp: coquiTTS.get_tts(text, fp, speaker = {"language" : language}) return fp.name inputs = [gr.Audio(source="microphone", type="filepath"), gr.Dropdown(label="Language", LANGUAGES, value="en")] outputs = gr.Audio(label="Output") demo = gr.Interface(fn=audio_tts, inputs=inputs, outputs=outputs, title="translation-speeh to speech", description="Realtime demo for speech translation.",) demo.launch()