Spaces:

ixxan
/

uyghur-speech-models

Running

App Files Files Community

Irpan commited on Dec 24, 2024

Commit

448bf1b

1 Parent(s): 81e83c9

asr

Browse files

Files changed (1) hide show

app.py +52 -88

app.py CHANGED Viewed

@@ -3,100 +3,64 @@ import asr
 import tts
 import util
-# Define the Speech-to-Text tab
-def create_stt_tab():
-    with gr.Blocks() as mms_transcribe:
-        gr.Markdown("### Speech-To-Text")
-        with gr.Row():
-            audio_input = gr.Audio(
-                label="Record or Upload Uyghur Audio",
-                sources=["microphone", "upload"],
-                type="filepath",
-            )
-            model_selection_stt = gr.Dropdown(
-                choices=[model for model in asr.models_info],
-                label="Select a Model",
-                value="ixxan/wav2vec2-large-mms-1b-uyghur-latin",
-                interactive=True
-            )
-        with gr.Row():
-            arabic_output = gr.Textbox(label="Uyghur Arabic Transcription", interactive=False)
-            latin_output = gr.Textbox(label="Uyghur Latin Transcription", interactive=False)
-        with gr.Row():
-            stt_submit_btn = gr.Button("Submit")
-            stt_clear_btn = gr.Button("Clear")
-        # Example button to load examples
-        with gr.Row():
-            stt_examples = gr.Examples(
-                examples=util.asr_examples,
-                inputs=[audio_input, model_selection_stt],
-                outputs=[arabic_output, latin_output],
-                label="Examples"
-            )
-        # Define button functionality
-        stt_submit_btn.click(
-            asr.transcribe,
-            inputs=[audio_input, model_selection_stt],
-            outputs=[arabic_output, latin_output]
-        )
-        stt_clear_btn.click(
-            lambda: (None, None, None),  # Clear inputs and outputs
-            inputs=[],
-            outputs=[audio_input, arabic_output, latin_output]
-        )
-    return mms_transcribe
-# Define the Text-to-Speech tab
-def create_tts_tab():
-    with gr.Blocks() as mms_synthesize:
-        gr.Markdown("### Text-To-Speech")
-        with gr.Row():
-            input_text = gr.Text(label="Input text")
-            model_selection_tts = gr.Dropdown(
-                choices=[model for model in tts.models_info],
-                label="Select a Model",
-                value="Meta-MMS",
-                interactive=True
-            )
-        with gr.Row():
-            generated_audio = gr.Audio(label="Generated Audio", interactive=False)
-        with gr.Row():
-            tts_submit_btn = gr.Button("Submit")
-            tts_clear_btn = gr.Button("Clear")
-        # Example button to load examples
-        with gr.Row():
-            tts_examples = gr.Examples(
-                examples=util.tts_examples,
-                inputs=[input_text, model_selection_tts],
-                outputs=[generated_audio],
-                label="Examples"
-            )
-        # Define button functionality
-        tts_submit_btn.click(
-            tts.synthesize,
-            inputs=[input_text, model_selection_tts],
-            outputs=[generated_audio]
-        )
-        tts_clear_btn.click(
-            lambda: (None, None),  # Clear inputs and outputs
-            inputs=[],
-            outputs=[input_text, generated_audio]
         )
-    return mms_synthesize
-# Combine tabs into a Tabbed Interface
 with gr.Blocks() as demo:
-    gr.Markdown("### Uyghur Language Tools: STT and TTS")
-    with gr.TabbedInterface([create_stt_tab(), create_tts_tab()], ["Speech-To-Text", "Text-To-Speech"]):
-        pass
-# Run the app
 if __name__ == "__main__":
     demo.queue()
     demo.launch()

 import tts
 import util
+mms_transcribe = gr.Interface(
+    fn=asr.transcribe,
+    inputs=[
+        gr.Audio(
+            label="Record or Upload Uyghur Audio",
+            sources=["microphone", "upload"],
+            type="filepath",
+        ),
+        gr.Dropdown(
+            choices=[model for model in asr.models_info],
+            label="Select a Model",
+            value="ixxan/wav2vec2-large-mms-1b-uyghur-latin",
+            interactive=True
+        ),
+    ],
+    outputs=[
+        gr.Textbox(label="Uyghur Arabic Transcription"),
+        gr.Textbox(label="Uyghur Latin Transcription"),
+    ],
+    examples=util.asr_examples,
+    title="Speech-To-Text",
+    description=(
+        "Transcribe Uyghur speech audio from a microphone or input file."
+    ),
+    allow_flagging="never",
+)
+mms_synthesize = gr.Interface(
+    fn=tts.synthesize,
+    inputs=[
+        gr.Text(label="Input text"),
+        gr.Dropdown(
+            choices=[model for model in tts.models_info],
+            label="Select a Model",
+            value="Meta-MMS",
+            interactive=True
         )
+    ],
+    outputs=[
+        gr.Audio(label="Generated Audio"),
+    ],
+    examples=util.tts_examples,
+    title="Text-To-Speech",
+    description=(
+        "Generate audio from input Uyghur text."
+        ),
+    allow_flagging="never",
+)
+tabbed_interface = gr.TabbedInterface(
+    [mms_transcribe, mms_synthesize],
+    ["Speech-To-Text", "Text-To-Speech"],
+)
 with gr.Blocks() as demo:
+    gr.Markdown("Comparision of STT and TTS models for Uyghur language.")
+    tabbed_interface.render()
 if __name__ == "__main__":
     demo.queue()
     demo.launch()