Spaces:
Running
Running
File size: 2,188 Bytes
7bcf8d7 2fd6d82 7bcf8d7 ca2c701 7bcf8d7 886f6ba 7bcf8d7 886f6ba 7bcf8d7 ca2c701 7bcf8d7 886f6ba 7bcf8d7 ca2c701 7bcf8d7 fb6affe 7bcf8d7 f7f7f97 7bf63a2 f7f7f97 3554dbc 7bf63a2 7bcf8d7 ed7e000 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
import gradio as gr
import librosa
from asr import transcribe, ASR_EXAMPLES, ASR_LANGUAGES, ASR_NOTE
from tts import synthesize, TTS_EXAMPLES, TTS_LANGUAGES
from lid import identify, LID_EXAMPLES
mms_transcribe = gr.Interface(
fn=transcribe,
inputs=[
gr.Audio(),
gr.Dropdown(
[f"{k} ({v})" for k, v in ASR_LANGUAGES.items()],
label="Language",
value="eng English",
),
# gr.Checkbox(label="Use Language Model (if available)", default=True),
],
outputs="text",
examples=ASR_EXAMPLES,
title="Speech-to-text",
description=(
"Transcribe audio from a microphone or input file in your desired language."
),
article=ASR_NOTE,
allow_flagging="never",
)
mms_synthesize = gr.Interface(
fn=synthesize,
inputs=[
gr.Text(label="Input text"),
gr.Dropdown(
[f"{k} ({v})" for k, v in TTS_LANGUAGES.items()],
label="Language",
value="eng English",
),
gr.Slider(minimum=0.1, maximum=4.0, value=1.0, step=0.1, label="Speed"),
],
outputs=[
gr.Audio(label="Generated Audio", type="numpy"),
gr.Text(label="Filtered text after removing OOVs"),
],
examples=TTS_EXAMPLES,
title="Text-to-speech",
description=("Generate audio in your desired language from input text."),
allow_flagging="never",
)
mms_identify = gr.Interface(
fn=identify,
inputs=[
gr.Audio(),
],
outputs=gr.Label(num_top_classes=10),
examples=LID_EXAMPLES,
title="Language Identification",
description=("Identity the language of input audio."),
allow_flagging="never",
)
tabbed_interface = gr.TabbedInterface(
[mms_transcribe, mms_synthesize, mms_identify],
["Speech-to-text", "Text-to-speech", "Language Identification"],
)
with gr.Blocks() as demo:
gr.Markdown(
"<p align='center' style='font-size: 20px;'>MMS</p>"
)
gr.HTML(
"""<center>Text-to-Speech, Speech-to-Text, and Language Recognition for 1,100+ languages.</center>"""
)
tabbed_interface.render()
if __name__ == "__main__":
demo.queue()
demo.launch() |