import tempfile import gradio as gr # import numpy as np from TTS.utils.manage import ModelManager from TTS.utils.synthesizer import Synthesizer title = "Mandarin Text-to-Speech (TTS)" description = "Generate mandarin speech from text using a Tachotron2 model with Coqui TTS 🐸, " \ "a deep learning toolkit for Text-to-Speech." article = "

Blog | Github Repo

" examples = [ ["语音合成是通过机械的、电子的方法产生人造语音的技术。"], ["李显龙总理表示,我国要达到像意大利的开放程度,几乎回到冠病疫情前的生活,还需要一段时间。"] ] manager = ModelManager() model_path, config_path, model_item = manager.download_model("tts_models/zh-CN/baker/tacotron2-DDC-GST") synthesizer = Synthesizer( model_path, config_path, None, None, None, ) def inference(text: str): wavs = synthesizer.tts(text) # output = (synthesizer.output_sample_rate, np.array(wavs).astype(np.float32)) # return output with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp: synthesizer.save_wav(wavs, fp) return fp.name gr.Interface( fn=inference, inputs=[ gr.inputs.Textbox( label="Input", default="你好吗?我很好。", ), ], outputs=gr.outputs.Audio(label="Output"), title=title, description=description, article=article, examples=examples, enable_queue=True, allow_flagging=False, ).launch(debug=False)