Spaces:

ixxan
/

uyghur-speech-models

Running

App Files Files Community

Irpan commited on 14 days ago

Commit

9db718b

•

1 Parent(s): 422a095

asr

Browse files

Files changed (2) hide show

app.py +71 -41
util.py +3 -3

app.py CHANGED Viewed

@@ -3,64 +3,94 @@ import asr
 import tts
 import util
-mms_transcribe = gr.Interface(
-    fn=asr.transcribe,
-    inputs=[
-        gr.Audio(
             label="Record or Upload Uyghur Audio",
             sources=["microphone", "upload"],
             type="filepath",
-        ),
-        gr.Dropdown(
             choices=[model for model in asr.models_info],
             label="Select a Model",
             value="ixxan/wav2vec2-large-mms-1b-uyghur-latin",
             interactive=True
-        ),
-    ],
-    outputs=[
-        gr.Textbox(label="Uyghur Arabic Transcription"),
-        gr.Textbox(label="Uyghur Latin Transcription"),
-    ],
-    examples=util.asr_examples,
-    title="Speech-To-Text",
-    description=(
-        "Transcribe Uyghur speech audio from a microphone or input file."
-    ),
-    allow_flagging="never",
-)
-mms_synthesize = gr.Interface(
-    fn=tts.synthesize,
-    inputs=[
-        gr.Text(label="Input text"),
-        gr.Dropdown(
             choices=[model for model in tts.models_info],
             label="Select a Model",
             value="Meta-MMS",
             interactive=True
         )
-    ],
-    outputs=[
-        gr.Audio(label="Generated Audio"),
-    ],
-    examples=util.tts_examples,
-    title="Text-To-Speech",
-    description=(
-        "Generate audio from input Uyghur text."
-        ),
-    allow_flagging="never",
-)
-tabbed_interface = gr.TabbedInterface(
-    [mms_transcribe, mms_synthesize],
-    ["Speech-To-Text", "Text-To-Speech"],
-)
 with gr.Blocks() as demo:
-    gr.Markdown("Comparision of STT and TTS models for Uyghur language.")
-    tabbed_interface.render()
 if __name__ == "__main__":
     demo.queue()
     demo.launch()

 import tts
 import util
+# Define the Speech-to-Text tab
+with gr.Blocks() as mms_transcribe:
+    gr.Markdown("### Speech-To-Text")
+    with gr.Row():
+        audio_input = gr.Audio(
             label="Record or Upload Uyghur Audio",
             sources=["microphone", "upload"],
             type="filepath",
+        )
+        model_selection_stt = gr.Dropdown(
             choices=[model for model in asr.models_info],
             label="Select a Model",
             value="ixxan/wav2vec2-large-mms-1b-uyghur-latin",
             interactive=True
+        )
+    with gr.Row():
+        arabic_output = gr.Textbox(label="Uyghur Arabic Transcription", interactive=False)
+        latin_output = gr.Textbox(label="Uyghur Latin Transcription", interactive=False)
+    with gr.Row():
+        stt_submit_btn = gr.Button("Submit")
+        stt_clear_btn = gr.Button("Clear")
+    # Example button to load examples
+    with gr.Row():
+        stt_examples = gr.Examples(
+            examples=util.asr_examples,
+            inputs=[audio_input, model_selection_stt],
+            outputs=[arabic_output, latin_output],
+            label="Examples"
+        )
+    # Define button functionality
+    stt_submit_btn.click(
+        asr.transcribe,
+        inputs=[audio_input, model_selection_stt],
+        outputs=[arabic_output, latin_output]
+    )
+    stt_clear_btn.click(
+        lambda: (None, None, None),  # Clear inputs and outputs
+        inputs=[],
+        outputs=[audio_input, arabic_output, latin_output]
+    )
+# Define the Text-to-Speech tab
+with gr.Blocks() as mms_synthesize:
+    gr.Markdown("### Text-To-Speech")
+    with gr.Row():
+        input_text = gr.Text(label="Input text")
+        model_selection_tts = gr.Dropdown(
             choices=[model for model in tts.models_info],
             label="Select a Model",
             value="Meta-MMS",
             interactive=True
         )
+    with gr.Row():
+        generated_audio = gr.Audio(label="Generated Audio", interactive=False)
+    with gr.Row():
+        tts_submit_btn = gr.Button("Submit")
+        tts_clear_btn = gr.Button("Clear")
+    # Example button to load examples
+    with gr.Row():
+        tts_examples = gr.Examples(
+            examples=util.tts_examples,
+            inputs=[input_text, model_selection_tts],
+            outputs=[generated_audio],
+            label="Examples"
+        )
+    # Define button functionality
+    tts_submit_btn.click(
+        tts.synthesize,
+        inputs=[input_text, model_selection_tts],
+        outputs=[generated_audio]
+    )
+    tts_clear_btn.click(
+        lambda: (None, None),  # Clear inputs and outputs
+        inputs=[],
+        outputs=[input_text, generated_audio]
+    )
+# Combine tabs into a Tabbed Interface
 with gr.Blocks() as demo:
+    gr.Markdown("### Uyghur Language Tools: STT and TTS")
+    with gr.TabbedInterface([mms_transcribe, mms_synthesize], ["Speech-To-Text", "Text-To-Speech"]):
+        pass
+# Run the app
 if __name__ == "__main__":
     demo.queue()
     demo.launch()

util.py CHANGED Viewed

@@ -8,10 +8,10 @@ asr_examples = [['examples/1.wav', 'ixxan/wav2vec2-large-mms-1b-uyghur-latin'],
                 ['examples/2.wav', 'ixxan/wav2vec2-large-mms-1b-uyghur-latin']]
 tts_examples = [
-    ["مەكتەپكە بارغاندا تېخىمۇ بىلىملىك بولۇمەن.", "Meta-MMS"],
     ["قىلىچ قان تامغۇزسا، بەگ ئەل ئالىدۇ؛ قەلەمدىن سىياھتانسا، ئالتۇن كېلىدۇ.", "Meta-MMS"],
-    ["مېنىڭ قەلبىمنى كەڭ قىلغىن", "Meta-MMS"],
     ["Bu putbol musabiqisining axirlishishi", "Meta-MMS"],
     ["Yaxshimusiz?", "Meta-MMS"],
-    ["Rehmet sizge!", "Meta-MMS"]
 ]

                 ['examples/2.wav', 'ixxan/wav2vec2-large-mms-1b-uyghur-latin']]
 tts_examples = [
     ["قىلىچ قان تامغۇزسا، بەگ ئەل ئالىدۇ؛ قەلەمدىن سىياھتانسا، ئالتۇن كېلىدۇ.", "Meta-MMS"],
+    ["قىلىچ قان تامغۇزسا، بەگ ئەل ئالىدۇ؛ قەلەمدىن سىياھتانسا، ئالتۇن كېلىدۇ.", "IS2AI-TurkicTTS"],
     ["Bu putbol musabiqisining axirlishishi", "Meta-MMS"],
+    ["Bu putbol musabiqisining axirlishishi", "IS2AI-TurkicTTS"],
     ["Yaxshimusiz?", "Meta-MMS"],
+    ["Yaxshimusiz?", "IS2AI-TurkicTTS"]
 ]