import logging import os import uuid import time import gradio as gr import soundfile as sf from model import get_pretrained_model, language_to_models # Function to update model dropdown based on language selection #def update_model_dropdown(language): # if language in language_to_models: # choices = language_to_models[language] # return gr.Dropdown.update(choices=choices, value=choices[0]) # else: # raise ValueError(f"Unsupported language: {language}") def update_model_dropdown(language: str): if language in language_to_models: choices = language_to_models[language] return gr.Dropdown( choices=choices, value=choices[0], interactive=True, ) raise ValueError(f"Unsupported language: {language}") # Function to process text to speech conversion def process(language, repo_id, text, sid, speed): logging.info(f"Input text: {text}, SID: {sid}, Speed: {speed}") sid = int(sid) tts = get_pretrained_model(repo_id, speed) start = time.time() audio = tts.generate(text, sid=sid) duration = len(audio.samples) / audio.sample_rate elapsed_seconds = time.time() - start rtf = elapsed_seconds / duration info = f""" Wave duration: {duration:.3f} s
Processing time: {elapsed_seconds:.3f} s
RTF: {rtf:.3f}
""" logging.info(info) filename = f"{uuid.uuid4()}.wav" sf.write(filename, audio.samples, samplerate=audio.sample_rate, subtype="PCM_16") return filename # Interface layout demo = gr.Blocks() with demo: gr.Markdown("# Text to Voice") gr.Markdown("High Fidelity TTS. Visit ruslanmv.com for more information.") language_choices = list(language_to_models.keys()) language_radio = gr.Radio(label="Language", choices=language_choices, value=language_choices[0]) model_dropdown = gr.Dropdown(label="Select a model", choices=language_to_models[language_choices[0]]) language_radio.change(update_model_dropdown, inputs=language_radio, outputs=model_dropdown) input_text = gr.Textbox(lines=10, label="Enter text to convert to speech") input_sid = gr.Textbox(label="Speaker ID", value="0", placeholder="Valid only for multi-speaker model") input_speed = gr.Slider(minimum=0.1, maximum=10, value=1, step=0.1, label="Speed (larger->faster; smaller->slower)") output_audio = gr.Audio(label="Generated audio") #output_info = gr.HTML(label="Info") input_button = gr.Button("Submit") input_button.click(process, inputs=[language_radio, model_dropdown, input_text, input_sid, input_speed], outputs=[output_audio]) # Download necessary data def download_espeak_ng_data(): os.system( """ cd /tmp wget -qq https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/espeak-ng-data.tar.bz2 tar xf espeak-ng-data.tar.bz2 """ ) if __name__ == "__main__": download_espeak_ng_data() logging.basicConfig(level=logging.INFO) demo.launch()