File size: 1,262 Bytes
8593183
e320838
8593183
 
 
 
 
 
 
 
 
 
 
a606014
9e20fea
a606014
9e20fea
8593183
a606014
 
 
8593183
 
97970d4
8593183
 
97970d4
8593183
ef69a46
8593183
 
 
 
 
ef69a46
a606014
 
 
 
 
8593183
 
a606014
8593183
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import os
os.system("pip install nemo_toolkit['all']")
import gradio as gr

import nemo.collections.asr as nemo_asr


model = nemo_asr.models.EncDecCTCModel.from_pretrained(
    model_name="stt_en_quartznet15x5"
)


def speech_file(x):
    # print(x)
    text = model.transcribe([f"{x}"])
    # print(text)
    return text

def speech_record(x):
    text = model.transcribe([f"{x}"])
    return text


with gr.Blocks() as demo:
    gr.Markdown(
    """
    ## Speech to Text - NVIDIA Qaurtznet15x5 (English)
    """)

    with gr.Tab("Audio File"):
        with gr.Row().style(equal_height=True):
            audio_input2 = gr.Audio(label="Audio File", type="filepath")
            text_output2 = gr.Textbox(label="Transcription", show_label=False)
        file_button = gr.Button("Transcribe")

    with gr.Tab("Record"):
        with gr.Row().style(equal_height=True):
            audio_input3 = gr.Audio(label="Input Audio", source="microphone", type="filepath")
            text_output3 = gr.Textbox(label="Transcription", show_label=False)
        rec_button = gr.Button("Transcribe")

    file_button.click(speech_file, inputs=audio_input2, outputs=text_output2)
    rec_button.click(speech_record, inputs=audio_input3, outputs=text_output3)

demo.launch()