import gradio as gr import asr import tts import util # Define the Speech-to-Text tab with gr.Blocks() as mms_transcribe: gr.Markdown("### Speech-To-Text") with gr.Row(): audio_input = gr.Audio( label="Record or Upload Uyghur Audio", sources=["microphone", "upload"], type="filepath", ) model_selection_stt = gr.Dropdown( choices=[model for model in asr.models_info], label="Select a Model", value="ixxan/wav2vec2-large-mms-1b-uyghur-latin", interactive=True ) with gr.Row(): arabic_output = gr.Textbox(label="Uyghur Arabic Transcription", interactive=False) latin_output = gr.Textbox(label="Uyghur Latin Transcription", interactive=False) with gr.Row(): stt_submit_btn = gr.Button("Submit") stt_clear_btn = gr.Button("Clear") # Example button to load examples with gr.Row(): stt_examples = gr.Examples( examples=util.asr_examples, inputs=[audio_input, model_selection_stt], outputs=[arabic_output, latin_output], label="Examples" ) # Define button functionality stt_submit_btn.click( asr.transcribe, inputs=[audio_input, model_selection_stt], outputs=[arabic_output, latin_output] ) stt_clear_btn.click( lambda: (None, None, None), # Clear inputs and outputs inputs=[], outputs=[audio_input, arabic_output, latin_output] ) # Define the Text-to-Speech tab with gr.Blocks() as mms_synthesize: gr.Markdown("### Text-To-Speech") with gr.Row(): input_text = gr.Text(label="Input text") model_selection_tts = gr.Dropdown( choices=[model for model in tts.models_info], label="Select a Model", value="Meta-MMS", interactive=True ) with gr.Row(): generated_audio = gr.Audio(label="Generated Audio", interactive=False) with gr.Row(): tts_submit_btn = gr.Button("Submit") tts_clear_btn = gr.Button("Clear") # Example button to load examples with gr.Row(): tts_examples = gr.Examples( examples=util.tts_examples, inputs=[input_text, model_selection_tts], outputs=[generated_audio], label="Examples" ) # Define button functionality tts_submit_btn.click( tts.synthesize, inputs=[input_text, model_selection_tts], outputs=[generated_audio] ) tts_clear_btn.click( lambda: (None, None), # Clear inputs and outputs inputs=[], outputs=[input_text, generated_audio] ) # Combine tabs into a Tabbed Interface with gr.Blocks() as demo: gr.Markdown("### Uyghur Language Tools: STT and TTS") with gr.TabbedInterface([mms_transcribe, mms_synthesize], ["Speech-To-Text", "Text-To-Speech"]): pass # Run the app if __name__ == "__main__": demo.queue() demo.launch()