import gradio as gr import asr import tts import util mms_transcribe = gr.Interface( fn=asr.transcribe, inputs=[ gr.Audio(), gr.Dropdown( choices=[model for model in asr.models_info], label="Select a Model for ASR", value="ixxan/wav2vec2-large-mms-1b-uyghur-latin", interactive=True ), ], outputs=[ gr.Textbox(label="Uyghur Arabic Transcription"), gr.Textbox(label="Uyghur Latin Transcription"), ], #examples=util.asr_examples, title="Speech-to-text", description=( "Transcribe Uyghur speech audio from a microphone or input file." ), allow_flagging="never", ) mms_synthesize = gr.Interface( fn=tts.synthesize, inputs=[ gr.Text(label="Input text"), gr.Dropdown( choices=[model for model in tts.models_info], label="Select a Model for TTS", value="Meta-MMS", interactive=True ) ], outputs=[ gr.Audio(label="Generated Audio"), ], #examples=util.tts_examples, title="Text-to-speech", description=( "Generate audio from input Uyghur text." ), allow_flagging="never", ) tabbed_interface = gr.TabbedInterface( [mms_transcribe, mms_synthesize], ["Speech-to-text", "Text-to-speech"], ) with gr.Blocks() as demo: gr.Markdown("Comparision of STT and TTS models for Uyghur language.") tabbed_interface.render() if __name__ == "__main__": demo.queue() demo.launch()