asr-hf-api

Sleeping

App Files Files Community

pablocst commited on Nov 20, 2023

Commit

598487f

•

1 Parent(s): 86a86db

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -15

app.py CHANGED Viewed

@@ -17,8 +17,8 @@ def record_opt(msg):
 def speech_recognize(audio, model_name, hf_token, opt):
-    opt += record_opt("转录开始 ...")
-    yield "转录中，请稍等...", opt
     start = time.monotonic()
     with open(audio, "rb") as f:
@@ -32,10 +32,10 @@ def speech_recognize(audio, model_name, hf_token, opt):
         print(f">>> text is {text}")
         text = text['text']
     except:
-        text = f"转录失败：\n{traceback.format_exc()}"
     cost = time.monotonic() - start
-    opt += record_opt(f"转录结束，耗时{cost:.3f}s")
     yield text, opt
 import gradio as gr
@@ -44,18 +44,18 @@ with gr.Blocks() as demo:
     gr.HTML("""<h2 align="center">Automatic Speech Recognition (OpenAI Whisper with Inference API)</h2>""")
     with gr.Row():
         gr.Markdown(
-            """🤗 调用 huggingface API，使用 OpenAI Whisper 模型进行语音识别，也可以称为语音转文本（Speech to Text, STT）
-            👉 目的是练习使用 Gradio Audio 组件和探索使用 Huggingface Inference API
-            > 💡提示：需要填写 Huggingface token 来调用 Huggingface Inference API
             """
         )
     with gr.Row():
         with gr.Column():
             audio = gr.Audio(source="microphone", type="filepath")
             model_name = gr.Dropdown(
-                label="选择模型",
                 choices=[
                     "openai/whisper-large-v3",
                     "openai/whisper-large-v2",
@@ -69,26 +69,26 @@ with gr.Blocks() as demo:
             )
             hf_token = gr.Textbox(label="Huggingface token")
         with gr.Column():
-            output = gr.Textbox(label="转录结果")
-            operation = gr.Textbox(label="组件操作历史")
     audio.start_recording(
-        lambda x: x + record_opt("开始录音 ..."),
         inputs=operation, outputs=operation
     )
     audio.play(
-        lambda x: x + record_opt("播放录音"),
         inputs=operation, outputs=operation
     )
     audio.pause(
-        lambda x: x + record_opt("暂停播放"),
         inputs=operation, outputs=operation
     )
     audio.stop(
-        lambda x: x + record_opt("停止播放"),
         inputs=operation, outputs=operation
     )
     audio.end(
-        lambda x: x + record_opt("播放完毕"),
         inputs=operation, outputs=operation
     )
     audio.stop_recording(speech_recognize, inputs=[audio, model_name, hf_token, operation], outputs=[output, operation])

 def speech_recognize(audio, model_name, hf_token, opt):
+    opt += record_opt("Transcription starts ...")
+    yield "Transcribing, please wait..", opt
     start = time.monotonic()
     with open(audio, "rb") as f:
         print(f">>> text is {text}")
         text = text['text']
     except:
+        text = f"Transcription failed：\n{traceback.format_exc()}"
     cost = time.monotonic() - start
+    opt += record_opt(f"Transcription ends, time consuming{cost:.3f}s")
     yield text, opt
 import gradio as gr
     gr.HTML("""<h2 align="center">Automatic Speech Recognition (OpenAI Whisper with Inference API)</h2>""")
     with gr.Row():
         gr.Markdown(
+            """🤗 Call the huggingface API and use the OpenAI Whisper model for speech recognition, which can also be called speech to text（Speech to Text, STT）
+            👉 The purpose is to practice using the Gradio Audio component and explore using the Huggingface Inference API
+            > 💡Tip: You need to fill in the Huggingface token to call the Huggingface Inference API
             """
         )
     with gr.Row():
         with gr.Column():
             audio = gr.Audio(source="microphone", type="filepath")
             model_name = gr.Dropdown(
+                label="Select model",
                 choices=[
                     "openai/whisper-large-v3",
                     "openai/whisper-large-v2",
             )
             hf_token = gr.Textbox(label="Huggingface token")
         with gr.Column():
+            output = gr.Textbox(label="Transcription results")
+            operation = gr.Textbox(label="Component operation history")
     audio.start_recording(
+        lambda x: x + record_opt("Start recording ..."),
         inputs=operation, outputs=operation
     )
     audio.play(
+        lambda x: x + record_opt("Play recording"),
         inputs=operation, outputs=operation
     )
     audio.pause(
+        lambda x: x + record_opt("Pause playback"),
         inputs=operation, outputs=operation
     )
     audio.stop(
+        lambda x: x + record_opt("Stop play"),
         inputs=operation, outputs=operation
     )
     audio.end(
+        lambda x: x + record_opt("Finished playing"),
         inputs=operation, outputs=operation
     )
     audio.stop_recording(speech_recognize, inputs=[audio, model_name, hf_token, operation], outputs=[output, operation])