Spaces:

ixxan
/

uyghur-speech-models

Running

File size: 1,503 Bytes

20aa839
3a18b3b
bef8623
a4939e4
20aa839
 
 
3a18b3b
20aa839
cafc4cf
3a18b3b
ef107e3
3a18b3b
 
 
1dfec92
 
 
 
 
20aa839
e7164c6
20aa839
 
1dfec92
20aa839
f23608f
20aa839
 
 
bef8623
 
 
 
1dfec92
7b99c0e
b9ff9e2
 
 
 
bef8623
 
30e5da4
bef8623
 
 
 
 
 
20aa839
 
bef8623
 
20aa839

import gradio as gr
import asr
import tts
# from tts import synthesize


mms_transcribe = gr.Interface(
    fn=asr.transcribe,
    inputs=[
        gr.Audio(),
        gr.Dropdown(
            choices=[model for model in asr.models_info] + ["Compare All Models"],
            label="Select Model for ASR",
            value="ixxan/wav2vec2-large-mms-1b-uyghur-latin",
            interactive=True
        ),
    ],
    outputs=[
        gr.Textbox(label="Uyghur Arabic Transcription"),
        gr.Textbox(label="Uyghur Latin Transcription"),
    ],
    #examples=ASR_EXAMPLES,
    title="Speech-to-text",
    description=(
        "Transcribe Uyghur speech audio from a microphone or input file."
    ),
    #article=ASR_NOTE,
    allow_flagging="never",
)

mms_synthesize = gr.Interface(
    fn=tts.synthesize,
    inputs=[
        gr.Text(label="Input text"),
        gr.Dropdown(
            choices=[model for model in tts.models_info],
            label="Select Model for TTS",
            value="Meta-MMS",
            interactive=True
        )
    ],
    outputs=[
        gr.Audio(label="Generated Audio"),
    ],
    #examples=TTS_EXAMPLES,
    title="Text-to-speech",
    description=("Generate audio from input text."),
    allow_flagging="never",
)

tabbed_interface = gr.TabbedInterface(
    [mms_transcribe, mms_synthesize],
    ["Speech-to-text", "Text-to-speech"],
)

with gr.Blocks() as demo:
    tabbed_interface.render()

if __name__ == "__main__":
    demo.queue()
    demo.launch()