Harshad Bhandwaldar
model added
97970d4
import os
os.system("pip install nemo_toolkit['all']")
import gradio as gr
import nemo.collections.asr as nemo_asr
model = nemo_asr.models.EncDecCTCModel.from_pretrained(
model_name="stt_en_quartznet15x5"
)
def speech_file(x):
# print(x)
text = model.transcribe([f"{x}"])
# print(text)
return text
def speech_record(x):
text = model.transcribe([f"{x}"])
return text
with gr.Blocks() as demo:
gr.Markdown(
"""
## Speech to Text - NVIDIA Qaurtznet15x5 (English)
""")
with gr.Tab("Audio File"):
with gr.Row().style(equal_height=True):
audio_input2 = gr.Audio(label="Audio File", type="filepath")
text_output2 = gr.Textbox(label="Transcription", show_label=False)
file_button = gr.Button("Transcribe")
with gr.Tab("Record"):
with gr.Row().style(equal_height=True):
audio_input3 = gr.Audio(label="Input Audio", source="microphone", type="filepath")
text_output3 = gr.Textbox(label="Transcription", show_label=False)
rec_button = gr.Button("Transcribe")
file_button.click(speech_file, inputs=audio_input2, outputs=text_output2)
rec_button.click(speech_record, inputs=audio_input3, outputs=text_output3)
demo.launch()