Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import requests
|
3 |
+
|
4 |
+
from datetime import datetime
|
5 |
+
|
6 |
+
import time
|
7 |
+
import traceback
|
8 |
+
|
9 |
+
API_URL = "https://api-inference.huggingface.co/models/"
|
10 |
+
|
11 |
+
|
12 |
+
def date_now():
|
13 |
+
return datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
14 |
+
|
15 |
+
def record_opt(msg):
|
16 |
+
return f"{date_now()} {msg}\n"
|
17 |
+
|
18 |
+
|
19 |
+
def speech_recognize(audio, model_name, hf_token, opt):
|
20 |
+
opt += record_opt("转录开始 ...")
|
21 |
+
yield "转录中,请稍等...", opt
|
22 |
+
start = time.monotonic()
|
23 |
+
|
24 |
+
with open(audio, "rb") as f:
|
25 |
+
data = f.read()
|
26 |
+
try:
|
27 |
+
url = API_URL + model_name
|
28 |
+
print(f">>> url is {url}")
|
29 |
+
headers = {"Authorization": f"Bearer {hf_token}"}
|
30 |
+
response = requests.request("POST", url, headers=headers, data=data)
|
31 |
+
text = json.loads(response.content.decode("utf-8"))
|
32 |
+
print(f">>> text is {text}")
|
33 |
+
text = text['text']
|
34 |
+
except:
|
35 |
+
text = f"转录失败:\n{traceback.format_exc()}"
|
36 |
+
|
37 |
+
cost = time.monotonic() - start
|
38 |
+
opt += record_opt(f"转录结束,耗时{cost:.3f}s")
|
39 |
+
yield text, opt
|
40 |
+
|
41 |
+
import gradio as gr
|
42 |
+
|
43 |
+
with gr.Blocks() as demo:
|
44 |
+
gr.HTML("""<h2 align="center">Automatic Speech Recognition (OpenAI Whisper with Inference API)</h2>""")
|
45 |
+
with gr.Row():
|
46 |
+
gr.Markdown(
|
47 |
+
"""🤗 调用 huggingface API,使用 OpenAI Whisper 模型进行语音识别,也可以成为语音转文本(Speech to Text, STT)
|
48 |
+
|
49 |
+
👉 目的是练习使用 Gradio Audio 组件和探索使用 Huggingface Inference API
|
50 |
+
"""
|
51 |
+
)
|
52 |
+
with gr.Row():
|
53 |
+
with gr.Column():
|
54 |
+
audio = gr.Audio(source="microphone", type="filepath")
|
55 |
+
model_name = gr.Dropdown(
|
56 |
+
label="选择模型",
|
57 |
+
choices=[
|
58 |
+
"openai/whisper-large-v2",
|
59 |
+
"openai/whisper-large",
|
60 |
+
"openai/whisper-medium",
|
61 |
+
"openai/whisper-small",
|
62 |
+
"openai/whisper-base",
|
63 |
+
"openai/whisper-tiny",
|
64 |
+
],
|
65 |
+
value="openai/whisper-large-v2",
|
66 |
+
)
|
67 |
+
hf_token = gr.Textbox(label="Huggingface token")
|
68 |
+
with gr.Column():
|
69 |
+
output = gr.Textbox(label="转录结果")
|
70 |
+
operation = gr.Textbox(label="组件操作历史")
|
71 |
+
audio.start_recording(
|
72 |
+
lambda x: x + record_opt("开始录音 ..."),
|
73 |
+
inputs=operation, outputs=operation
|
74 |
+
)
|
75 |
+
audio.play(
|
76 |
+
lambda x: x + record_opt("播放录音"),
|
77 |
+
inputs=operation, outputs=operation
|
78 |
+
)
|
79 |
+
audio.pause(
|
80 |
+
lambda x: x + record_opt("暂停播放"),
|
81 |
+
inputs=operation, outputs=operation
|
82 |
+
)
|
83 |
+
audio.stop(
|
84 |
+
lambda x: x + record_opt("停止播放"),
|
85 |
+
inputs=operation, outputs=operation
|
86 |
+
)
|
87 |
+
audio.end(
|
88 |
+
lambda x: x + record_opt("播放完毕"),
|
89 |
+
inputs=operation, outputs=operation
|
90 |
+
)
|
91 |
+
audio.stop_recording(speech_recognize, inputs=[audio, model_name, hf_token, operation], outputs=[output, operation])
|
92 |
+
|
93 |
+
demo.queue(max_size=128, concurrency_count=16)
|
94 |
+
demo.launch(debug=True)
|
95 |
+
|
96 |
+
|