Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -17,8 +17,8 @@ def record_opt(msg):
|
|
17 |
|
18 |
|
19 |
def speech_recognize(audio, model_name, hf_token, opt):
|
20 |
-
opt += record_opt("
|
21 |
-
yield "
|
22 |
start = time.monotonic()
|
23 |
|
24 |
with open(audio, "rb") as f:
|
@@ -32,10 +32,10 @@ def speech_recognize(audio, model_name, hf_token, opt):
|
|
32 |
print(f">>> text is {text}")
|
33 |
text = text['text']
|
34 |
except:
|
35 |
-
text = f"
|
36 |
|
37 |
cost = time.monotonic() - start
|
38 |
-
opt += record_opt(f"
|
39 |
yield text, opt
|
40 |
|
41 |
import gradio as gr
|
@@ -44,18 +44,18 @@ with gr.Blocks() as demo:
|
|
44 |
gr.HTML("""<h2 align="center">Automatic Speech Recognition (OpenAI Whisper with Inference API)</h2>""")
|
45 |
with gr.Row():
|
46 |
gr.Markdown(
|
47 |
-
"""🤗
|
48 |
|
49 |
-
👉
|
50 |
|
51 |
-
>
|
52 |
"""
|
53 |
)
|
54 |
with gr.Row():
|
55 |
with gr.Column():
|
56 |
audio = gr.Audio(source="microphone", type="filepath")
|
57 |
model_name = gr.Dropdown(
|
58 |
-
label="
|
59 |
choices=[
|
60 |
"openai/whisper-large-v3",
|
61 |
"openai/whisper-large-v2",
|
@@ -69,26 +69,26 @@ with gr.Blocks() as demo:
|
|
69 |
)
|
70 |
hf_token = gr.Textbox(label="Huggingface token")
|
71 |
with gr.Column():
|
72 |
-
output = gr.Textbox(label="
|
73 |
-
operation = gr.Textbox(label="
|
74 |
audio.start_recording(
|
75 |
-
lambda x: x + record_opt("
|
76 |
inputs=operation, outputs=operation
|
77 |
)
|
78 |
audio.play(
|
79 |
-
lambda x: x + record_opt("
|
80 |
inputs=operation, outputs=operation
|
81 |
)
|
82 |
audio.pause(
|
83 |
-
lambda x: x + record_opt("
|
84 |
inputs=operation, outputs=operation
|
85 |
)
|
86 |
audio.stop(
|
87 |
-
lambda x: x + record_opt("
|
88 |
inputs=operation, outputs=operation
|
89 |
)
|
90 |
audio.end(
|
91 |
-
lambda x: x + record_opt("
|
92 |
inputs=operation, outputs=operation
|
93 |
)
|
94 |
audio.stop_recording(speech_recognize, inputs=[audio, model_name, hf_token, operation], outputs=[output, operation])
|
|
|
17 |
|
18 |
|
19 |
def speech_recognize(audio, model_name, hf_token, opt):
|
20 |
+
opt += record_opt("Transcription starts ...")
|
21 |
+
yield "Transcribing, please wait..", opt
|
22 |
start = time.monotonic()
|
23 |
|
24 |
with open(audio, "rb") as f:
|
|
|
32 |
print(f">>> text is {text}")
|
33 |
text = text['text']
|
34 |
except:
|
35 |
+
text = f"Transcription failed:\n{traceback.format_exc()}"
|
36 |
|
37 |
cost = time.monotonic() - start
|
38 |
+
opt += record_opt(f"Transcription ends, time consuming{cost:.3f}s")
|
39 |
yield text, opt
|
40 |
|
41 |
import gradio as gr
|
|
|
44 |
gr.HTML("""<h2 align="center">Automatic Speech Recognition (OpenAI Whisper with Inference API)</h2>""")
|
45 |
with gr.Row():
|
46 |
gr.Markdown(
|
47 |
+
"""🤗 Call the huggingface API and use the OpenAI Whisper model for speech recognition, which can also be called speech to text(Speech to Text, STT)
|
48 |
|
49 |
+
👉 The purpose is to practice using the Gradio Audio component and explore using the Huggingface Inference API
|
50 |
|
51 |
+
> 💡Tip: You need to fill in the Huggingface token to call the Huggingface Inference API
|
52 |
"""
|
53 |
)
|
54 |
with gr.Row():
|
55 |
with gr.Column():
|
56 |
audio = gr.Audio(source="microphone", type="filepath")
|
57 |
model_name = gr.Dropdown(
|
58 |
+
label="Select model",
|
59 |
choices=[
|
60 |
"openai/whisper-large-v3",
|
61 |
"openai/whisper-large-v2",
|
|
|
69 |
)
|
70 |
hf_token = gr.Textbox(label="Huggingface token")
|
71 |
with gr.Column():
|
72 |
+
output = gr.Textbox(label="Transcription results")
|
73 |
+
operation = gr.Textbox(label="Component operation history")
|
74 |
audio.start_recording(
|
75 |
+
lambda x: x + record_opt("Start recording ..."),
|
76 |
inputs=operation, outputs=operation
|
77 |
)
|
78 |
audio.play(
|
79 |
+
lambda x: x + record_opt("Play recording"),
|
80 |
inputs=operation, outputs=operation
|
81 |
)
|
82 |
audio.pause(
|
83 |
+
lambda x: x + record_opt("Pause playback"),
|
84 |
inputs=operation, outputs=operation
|
85 |
)
|
86 |
audio.stop(
|
87 |
+
lambda x: x + record_opt("Stop play"),
|
88 |
inputs=operation, outputs=operation
|
89 |
)
|
90 |
audio.end(
|
91 |
+
lambda x: x + record_opt("Finished playing"),
|
92 |
inputs=operation, outputs=operation
|
93 |
)
|
94 |
audio.stop_recording(speech_recognize, inputs=[audio, model_name, hf_token, operation], outputs=[output, operation])
|