Spaces:

hashb
/

stt-quartznet15x5-en-nvidia

Runtime error

App Files Files Community

Harshad Bhandwaldar commited on Jan 9, 2023

Commit

97970d4

1 Parent(s): ef69a46

model added

Browse files

Files changed (1) hide show

app.py +2 -91

app.py CHANGED Viewed

@@ -20,94 +20,11 @@ def speech_record(x):
     text = model.transcribe([f"{x}"])
     return text
-css = """
-        .gradio-container {
-            font-family: 'IBM Plex Sans', sans-serif;
-        }
-        .gr-button {
-            color: white;
-            border-color: black;
-            background: black;
-        }
-        input[type='range'] {
-            accent-color: black;
-        }
-        .dark input[type='range'] {
-            accent-color: #dfdfdf;
-        }
-        .container {
-            max-width: 730px;
-            margin: auto;
-            padding-top: 1.5rem;
-        }
-        .details:hover {
-            text-decoration: underline;
-        }
-        .gr-button {
-            white-space: nowrap;
-        }
-        .gr-button:focus {
-            border-color: rgb(147 197 253 / var(--tw-border-opacity));
-            outline: none;
-            box-shadow: var(--tw-ring-offset-shadow), var(--tw-ring-shadow), var(--tw-shadow, 0 0 #0000);
-            --tw-border-opacity: 1;
-            --tw-ring-offset-shadow: var(--tw-ring-inset) 0 0 0 var(--tw-ring-offset-width) var(--tw-ring-offset-color);
-            --tw-ring-shadow: var(--tw-ring-inset) 0 0 0 calc(3px var(--tw-ring-offset-width)) var(--tw-ring-color);
-            --tw-ring-color: rgb(191 219 254 / var(--tw-ring-opacity));
-            --tw-ring-opacity: .5;
-        }
-        .footer {
-            margin-bottom: 45px;
-            margin-top: 35px;
-            text-align: center;
-            border-bottom: 1px solid #e5e5e5;
-        }
-        .footer>p {
-            font-size: .8rem;
-            display: inline-block;
-            padding: 0 10px;
-            transform: translateY(10px);
-            background: white;
-        }
-        .dark .footer {
-            border-color: #303030;
-        }
-        .dark .footer>p {
-            background: #0b0f19;
-        }
-        .prompt h4{
-            margin: 1.25em 0 .25em 0;
-            font-weight: bold;
-            font-size: 115%;
-        }
-        .animate-spin {
-            animation: spin 1s linear infinite;
-        }
-        @keyframes spin {
-            from {
-                transform: rotate(0deg);
-            }
-            to {
-                transform: rotate(360deg);
-            }
-        }
-        #share-btn-container {
-            display: flex; margin-top: 1.5rem !important; padding-left: 0.5rem !important; padding-right: 0.5rem !important; background-color: #000000; justify-content: center; align-items: center; border-radius: 9999px !important; width: 13rem;
-        }
-        #share-btn {
-            all: initial; color: #ffffff;font-weight: 600; cursor:pointer; font-family: 'IBM Plex Sans', sans-serif; margin-left: 0.5rem !important; padding-top: 0.25rem !important; padding-bottom: 0.25rem !important;
-        }
-        #share-btn * {
-            all: unset;
-        }
-"""
-with gr.Blocks(css = css) as demo:
     gr.Markdown(
     """
-    # Speech to Text - NVIDIA Qaurtznet15x5 (English)
-    QuartzNet is a Jasper-like network that uses separable convolutions and larger filter sizes. It has comparable accuracy to Jasper while having much fewer parameters. This particular model has 15 blocks each repeated 5 times.
     """)
     with gr.Tab("Audio File"):
@@ -121,12 +38,6 @@ with gr.Blocks(css = css) as demo:
             audio_input3 = gr.Audio(label="Input Audio", source="microphone", type="filepath")
             text_output3 = gr.Textbox(label="Transcription", show_label=False)
         rec_button = gr.Button("Transcribe")
-    gr.HTML('''
-        <div class="footer">
-                    <p></a>
-                    </p>
-        </div>
-        ''')
     file_button.click(speech_file, inputs=audio_input2, outputs=text_output2)
     rec_button.click(speech_record, inputs=audio_input3, outputs=text_output3)

     text = model.transcribe([f"{x}"])
     return text
+with gr.Blocks() as demo:
     gr.Markdown(
     """
+    ## Speech to Text - NVIDIA Qaurtznet15x5 (English)
     """)
     with gr.Tab("Audio File"):
             audio_input3 = gr.Audio(label="Input Audio", source="microphone", type="filepath")
             text_output3 = gr.Textbox(label="Transcription", show_label=False)
         rec_button = gr.Button("Transcribe")
     file_button.click(speech_file, inputs=audio_input2, outputs=text_output2)
     rec_button.click(speech_record, inputs=audio_input3, outputs=text_output3)