import gradio as gr import asr import tts import util mms_transcribe = gr.Interface( fn=asr.transcribe, inputs=[ gr.Audio(), gr.Dropdown( choices=[model for model in asr.models_info] + ["Compare All Models"], label="Select Model for ASR", value="ixxan/wav2vec2-large-mms-1b-uyghur-latin", interactive=True ), ], outputs=[ gr.Textbox(label="Uyghur Arabic Transcription"), gr.Textbox(label="Uyghur Latin Transcription"), ], #examples=ASR_EXAMPLES, title="Speech-to-text", description=( "Transcribe Uyghur speech audio from a microphone or input file." ), #article=ASR_NOTE, allow_flagging="never", ) mms_synthesize = gr.Interface( fn=tts.synthesize, inputs=[ gr.Text(label="Input text"), gr.Dropdown( choices=[model for model in tts.models_info], label="Select Model for TTS", value="Meta-MMS", interactive=True ) ], outputs=[ gr.Audio(label="Generated Audio"), ], examples=util.tts_examples, title="Text-to-speech", description=("Generate audio from input text."), allow_flagging="never", ) tabbed_interface = gr.TabbedInterface( [mms_transcribe, mms_synthesize], ["Speech-to-text", "Text-to-speech"], ) with gr.Blocks() as demo: tabbed_interface.render() if __name__ == "__main__": demo.queue() demo.launch()