Spaces:
Running
Running
File size: 2,732 Bytes
20aa839 3a18b3b bef8623 660776b 20aa839 448bf1b c492cbb 448bf1b 03dc51e 9a0faf6 03dc51e 448bf1b 668fb3c 448bf1b 9db718b 448bf1b c492cbb 448bf1b 9db718b 448bf1b 03dc51e 9a0faf6 03dc51e 448bf1b 668fb3c 448bf1b 20aa839 448bf1b 501d3b8 448bf1b 20aa839 3493c42 ca72173 3493c42 ca72173 3493c42 6a213c1 ffbaf39 da61140 3493c42 448bf1b 20aa839 9a0faf6 20aa839 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
import gradio as gr
import asr
import tts
import util
mms_transcribe = gr.Interface(
fn=asr.transcribe,
inputs=[
gr.Audio(
label="Record or Upload Uyghur Audio",
sources=["microphone", "upload"],
type="filepath",
),
gr.Dropdown(
choices=[model for model in asr.models_info],
label="Select a Model",
value="Ixxan-FineTuned-MMS",
interactive=True
),
],
outputs=[
gr.Textbox(label="Uyghur Arabic Transcription"),
gr.Textbox(label="Uyghur Latin Transcription"),
],
examples=util.asr_examples,
description=(
"""
Transcribe Uyghur audio from a microphone or input file.
Cilck on examples below for sample usage.
Please keep the audio length under 10 seconds for faster processing since this space is running on CPU basic.
"""
),
article=util.asr_notes,
allow_flagging="never",
)
mms_synthesize = gr.Interface(
fn=tts.synthesize,
inputs=[
gr.Text(label="Input text"),
gr.Dropdown(
choices=[model for model in tts.models_info],
label="Select a Model",
value="Ixxan-FineTuned-MMS",
interactive=True
)
],
outputs=[
gr.Audio(label="Generated Audio"),
],
examples=util.tts_examples,
description=(
"""
Generate audio from input Uyghur text.
Cilck on examples below for sample usage.
Please keep the input text length under 200 characters for faster processing since this space is running on CPU basic.
"""
),
article=util.tts_notes,
allow_flagging="never",
)
tabbed_interface = gr.TabbedInterface(
[mms_transcribe, mms_synthesize],
["Speech-To-Text", "Text-To-Speech"],
)
with gr.Blocks() as demo:
gr.Markdown(
"""
<h1 style="text-align: center; font-size: 28px; color: #4A90E2;">
Uyghur Speech-To-Text (STT) and Text-To-Speech (TTS) Models
</h1>
<p style="text-align: center; font-size: 16px; color: #555;">
Comparisons of existing and fine-tuned speech models for transcribing and synthesizing Uyghur speech.
</p>
To learn more about Uyghur Speech Technology, please check out my [blog post](https://ixxan.github.io/blog/low-resource-speech-uyghur).
To see the model fine-tuning code, please visit my [GitHub repository](https://github.com/ixxan/ug-speech).
"""
)
tabbed_interface.render()
if __name__ == "__main__":
demo.queue(default_concurrency_limit = 2, max_size=20) # <-- Sets up a queue with default parameters
demo.launch() |