Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -2,14 +2,14 @@ import gradio as gr
|
|
2 |
import numpy as np
|
3 |
from huggingface_hub import InferenceClient
|
4 |
import os
|
|
|
|
|
5 |
|
6 |
-
# 获取 HuggingFace 的 API Token
|
7 |
client = InferenceClient(
|
8 |
-
"
|
9 |
-
token=os.getenv('hf_token')
|
10 |
)
|
11 |
|
12 |
-
# 定义 respond 函数,用于生成聊天回复
|
13 |
def respond(
|
14 |
message,
|
15 |
history: list[tuple[str, str]],
|
@@ -42,76 +42,124 @@ def respond(
|
|
42 |
response += token
|
43 |
yield response
|
44 |
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
return "No audio provided", None
|
49 |
|
50 |
-
#
|
51 |
-
audio_path = "uploaded_audio.wav"
|
52 |
-
audio.save(audio_path)
|
53 |
|
54 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
|
56 |
-
#
|
57 |
-
|
58 |
-
system_message = "You are chatting with a helpful assistant."
|
59 |
-
max_tokens = 200
|
60 |
-
temperature = 0.7
|
61 |
-
top_p = 0.9
|
62 |
|
63 |
-
#
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
def create_interface():
|
72 |
with gr.Blocks() as demo:
|
73 |
# 标题
|
74 |
-
gr.Markdown("#
|
75 |
-
|
76 |
-
#
|
77 |
with gr.Row():
|
78 |
audio_input = gr.Audio(
|
79 |
-
sources=
|
80 |
-
|
81 |
-
|
82 |
-
waveform_progress_color="#0066B4",
|
83 |
-
skip_length=2,
|
84 |
-
show_controls=False,
|
85 |
-
),
|
86 |
-
label="音频生成"
|
87 |
)
|
88 |
|
89 |
-
#
|
90 |
with gr.Row():
|
91 |
recognized_text = gr.Textbox(label="识别文本")
|
92 |
-
|
93 |
-
|
94 |
-
# 处理按钮
|
95 |
process_button = gr.Button("处理音频")
|
96 |
-
|
97 |
-
#
|
98 |
-
|
99 |
-
|
100 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
with gr.Row():
|
102 |
-
|
103 |
-
|
104 |
-
send_button = gr.Button("发送") # 发送消息的按钮
|
105 |
-
|
106 |
-
# 聊天历史记录
|
107 |
-
chat_history = gr.State([]) # 用于存储聊天历史
|
108 |
-
|
109 |
-
# 点击发送按钮时,调用 chat_with_model 函数并更新聊天记录
|
110 |
-
send_button.click(chat_with_model, inputs=[user_input, chat_history], outputs=[chatbot, chat_history])
|
111 |
-
|
112 |
return demo
|
113 |
|
114 |
|
|
|
115 |
if __name__ == "__main__":
|
116 |
demo = create_interface()
|
117 |
demo.launch()
|
|
|
2 |
import numpy as np
|
3 |
from huggingface_hub import InferenceClient
|
4 |
import os
|
5 |
+
import requests
|
6 |
+
import scipy.io.wavfile
|
7 |
|
|
|
8 |
client = InferenceClient(
|
9 |
+
"meta-llama/Meta-Llama-3-8B-Instruct",
|
10 |
+
token=os.getenv('hf_token')
|
11 |
)
|
12 |
|
|
|
13 |
def respond(
|
14 |
message,
|
15 |
history: list[tuple[str, str]],
|
|
|
42 |
response += token
|
43 |
yield response
|
44 |
|
45 |
+
def process_audio(audio_data):
|
46 |
+
if audio_data is None:
|
47 |
+
return "No audio provided"
|
|
|
48 |
|
49 |
+
print("audio_data:", audio_data) # 添加这行代码
|
|
|
|
|
50 |
|
51 |
+
# 检查 audio_data 是否是元组,并提取数据
|
52 |
+
if isinstance(audio_data, tuple):
|
53 |
+
sample_rate, data = audio_data
|
54 |
+
print("Sample rate:", sample_rate)
|
55 |
+
print("Data type:", type(data))
|
56 |
+
else:
|
57 |
+
return "Invalid audio data format"
|
58 |
|
59 |
+
# Define the local file path to save the WAV file
|
60 |
+
local_wav_file = "converted_audio.wav"
|
|
|
|
|
|
|
|
|
61 |
|
62 |
+
# Save the audio data as a WAV file
|
63 |
+
scipy.io.wavfile.write(local_wav_file, sample_rate, data)
|
64 |
+
|
65 |
+
API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v2"
|
66 |
+
headers = {"Authorization": f"Bearer {os.getenv('hf_token')}"}
|
67 |
+
|
68 |
+
def query(filename):
|
69 |
+
with open(filename, "rb") as f:
|
70 |
+
file_data = f.read()
|
71 |
+
response = requests.post(API_URL, headers=headers, data=file_data)
|
72 |
+
return response.json()
|
73 |
+
|
74 |
+
# Call the API to process the audio
|
75 |
+
output = query(local_wav_file)
|
76 |
+
|
77 |
+
print(output)
|
78 |
+
|
79 |
+
# Check the API response
|
80 |
+
if 'text' in output:
|
81 |
+
return output['text']
|
82 |
+
else:
|
83 |
+
return "Error in processing audio"
|
84 |
+
|
85 |
+
# 定义函数以禁用按钮并显示加载指示器
|
86 |
+
def disable_components():
|
87 |
+
# 更新 recognized_text 的内容,提示用户正在处理
|
88 |
+
recognized_text_update = gr.update(value='正在处理,请稍候...')
|
89 |
+
# 禁用 process_button
|
90 |
+
process_button_update = gr.update(interactive=False)
|
91 |
+
# 显示加载动画
|
92 |
+
loading_animation_update = gr.update(visible=True)
|
93 |
+
return recognized_text_update, process_button_update, loading_animation_update
|
94 |
+
|
95 |
+
# 定义函数以启用按钮并隐藏加载指示器
|
96 |
+
def enable_components(recognized_text):
|
97 |
+
# 处理完成后,recognized_text 已经由 process_audio 更新
|
98 |
+
# 重新启用 process_button
|
99 |
+
process_button_update = gr.update(interactive=True)
|
100 |
+
# 隐藏加载动画
|
101 |
+
loading_animation_update = gr.update(visible=False)
|
102 |
+
return recognized_text, process_button_update, loading_animation_update
|
103 |
+
|
104 |
+
|
105 |
+
# 创建界面
|
106 |
def create_interface():
|
107 |
with gr.Blocks() as demo:
|
108 |
# 标题
|
109 |
+
gr.Markdown("# 语音识别与聊天系统")
|
110 |
+
|
111 |
+
# 音频输入部分
|
112 |
with gr.Row():
|
113 |
audio_input = gr.Audio(
|
114 |
+
sources="microphone",
|
115 |
+
type="numpy", # 获取音频数据和采样率
|
116 |
+
label="上传音频"
|
|
|
|
|
|
|
|
|
|
|
117 |
)
|
118 |
|
119 |
+
# 文本识别输出部分
|
120 |
with gr.Row():
|
121 |
recognized_text = gr.Textbox(label="识别文本")
|
122 |
+
|
123 |
+
# 处理音频的按钮
|
|
|
124 |
process_button = gr.Button("处理音频")
|
125 |
+
|
126 |
+
# 加载动画
|
127 |
+
loading_animation = gr.HTML(
|
128 |
+
value='<div style="text-align: center;"><span style="font-size: 18px;">ASR Model is running...</span></div>',
|
129 |
+
visible=False
|
130 |
+
)
|
131 |
+
|
132 |
+
# 关联音频处理函数,并在点击时更新组件状态
|
133 |
+
process_button.click(
|
134 |
+
fn=disable_components,
|
135 |
+
inputs=[],
|
136 |
+
outputs=[recognized_text, process_button, loading_animation]
|
137 |
+
).then(
|
138 |
+
fn=process_audio,
|
139 |
+
inputs=[audio_input],
|
140 |
+
outputs=recognized_text
|
141 |
+
).then(
|
142 |
+
fn=enable_components,
|
143 |
+
inputs=[recognized_text],
|
144 |
+
outputs=[recognized_text, process_button, loading_animation]
|
145 |
+
)
|
146 |
+
|
147 |
+
# Chatbot 界面
|
148 |
+
chatbot = gr.ChatInterface(
|
149 |
+
fn=respond,
|
150 |
+
additional_inputs=[
|
151 |
+
gr.Textbox(value="You are a helpful chatbot that answers questions.", label="系统消息")
|
152 |
+
]
|
153 |
+
)
|
154 |
+
|
155 |
+
# 布局包含 Chatbot
|
156 |
with gr.Row():
|
157 |
+
chatbot_output = chatbot
|
158 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
159 |
return demo
|
160 |
|
161 |
|
162 |
+
|
163 |
if __name__ == "__main__":
|
164 |
demo = create_interface()
|
165 |
demo.launch()
|