KleinPenny commited on
Commit
9d03774
·
verified ·
1 Parent(s): 42a5345

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +104 -56
app.py CHANGED
@@ -2,14 +2,14 @@ import gradio as gr
2
  import numpy as np
3
  from huggingface_hub import InferenceClient
4
  import os
 
 
5
 
6
- # 获取 HuggingFace 的 API Token
7
  client = InferenceClient(
8
- "microsoft/Phi-3-mini-4k-instruct",
9
- token=os.getenv('hf_token'),
10
  )
11
 
12
- # 定义 respond 函数,用于生成聊天回复
13
  def respond(
14
  message,
15
  history: list[tuple[str, str]],
@@ -42,76 +42,124 @@ def respond(
42
  response += token
43
  yield response
44
 
45
- # 定义音频处理函数
46
- def process_audio(audio):
47
- if audio is None:
48
- return "No audio provided", None
49
 
50
- # 将音频保存为 .wav 格式
51
- audio_path = "uploaded_audio.wav"
52
- audio.save(audio_path)
53
 
54
- return 'True'
 
 
 
 
 
 
55
 
56
- # 定义聊天逻辑,更新聊天历史记录
57
- def chat_with_model(user_message, history):
58
- system_message = "You are chatting with a helpful assistant."
59
- max_tokens = 200
60
- temperature = 0.7
61
- top_p = 0.9
62
 
63
- # 调用 respond 函数,生成回复
64
- bot_response = next(respond(user_message, history, system_message, max_tokens, temperature, top_p))
65
-
66
- # 将用户消息和模型回复添加到历史记录中
67
- history.append((user_message, bot_response))
68
- return history, history
69
-
70
- # 创建 Gradio 接口
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  def create_interface():
72
  with gr.Blocks() as demo:
73
  # 标题
74
- gr.Markdown("# 语音识别与生成系统")
75
-
76
- # 输入部分:音频上传
77
  with gr.Row():
78
  audio_input = gr.Audio(
79
- sources=["microphone"],
80
- waveform_options=gr.WaveformOptions(
81
- waveform_color="#01C6FF",
82
- waveform_progress_color="#0066B4",
83
- skip_length=2,
84
- show_controls=False,
85
- ),
86
- label="音频生成"
87
  )
88
 
89
- # 输出部分:文本识别结果和音频播放
90
  with gr.Row():
91
  recognized_text = gr.Textbox(label="识别文本")
92
- audio_output = gr.Audio(label="生成的语音")
93
-
94
- # 处理按钮
95
  process_button = gr.Button("处理音频")
96
-
97
- # 绑定处理逻辑
98
- process_button.click(process_audio, inputs=[audio_input], outputs=[recognized_text])
99
-
100
- # 添加聊天框
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  with gr.Row():
102
- chatbot = gr.Chatbot(label="Chatbot") # 显示聊天记录的区域
103
- user_input = gr.Textbox(placeholder="Type a message...", label="输入消息") # 用户输入区域
104
- send_button = gr.Button("发送") # 发送消息的按钮
105
-
106
- # 聊天历史记录
107
- chat_history = gr.State([]) # 用于存储聊天历史
108
-
109
- # 点击发送按钮时,调用 chat_with_model 函数并更新聊天记录
110
- send_button.click(chat_with_model, inputs=[user_input, chat_history], outputs=[chatbot, chat_history])
111
-
112
  return demo
113
 
114
 
 
115
  if __name__ == "__main__":
116
  demo = create_interface()
117
  demo.launch()
 
2
  import numpy as np
3
  from huggingface_hub import InferenceClient
4
  import os
5
+ import requests
6
+ import scipy.io.wavfile
7
 
 
8
  client = InferenceClient(
9
+ "meta-llama/Meta-Llama-3-8B-Instruct",
10
+ token=os.getenv('hf_token')
11
  )
12
 
 
13
  def respond(
14
  message,
15
  history: list[tuple[str, str]],
 
42
  response += token
43
  yield response
44
 
45
+ def process_audio(audio_data):
46
+ if audio_data is None:
47
+ return "No audio provided"
 
48
 
49
+ print("audio_data:", audio_data) # 添加这行代码
 
 
50
 
51
+ # 检查 audio_data 是否是元组,并提取数据
52
+ if isinstance(audio_data, tuple):
53
+ sample_rate, data = audio_data
54
+ print("Sample rate:", sample_rate)
55
+ print("Data type:", type(data))
56
+ else:
57
+ return "Invalid audio data format"
58
 
59
+ # Define the local file path to save the WAV file
60
+ local_wav_file = "converted_audio.wav"
 
 
 
 
61
 
62
+ # Save the audio data as a WAV file
63
+ scipy.io.wavfile.write(local_wav_file, sample_rate, data)
64
+
65
+ API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v2"
66
+ headers = {"Authorization": f"Bearer {os.getenv('hf_token')}"}
67
+
68
+ def query(filename):
69
+ with open(filename, "rb") as f:
70
+ file_data = f.read()
71
+ response = requests.post(API_URL, headers=headers, data=file_data)
72
+ return response.json()
73
+
74
+ # Call the API to process the audio
75
+ output = query(local_wav_file)
76
+
77
+ print(output)
78
+
79
+ # Check the API response
80
+ if 'text' in output:
81
+ return output['text']
82
+ else:
83
+ return "Error in processing audio"
84
+
85
+ # 定义函数以禁用按钮并显示加载指示器
86
+ def disable_components():
87
+ # 更新 recognized_text 的内容,提示用户正在处理
88
+ recognized_text_update = gr.update(value='正在处理,请稍候...')
89
+ # 禁用 process_button
90
+ process_button_update = gr.update(interactive=False)
91
+ # 显示加载动画
92
+ loading_animation_update = gr.update(visible=True)
93
+ return recognized_text_update, process_button_update, loading_animation_update
94
+
95
+ # 定义函数以启用按钮并隐藏加载指示器
96
+ def enable_components(recognized_text):
97
+ # 处理完成后,recognized_text 已经由 process_audio 更新
98
+ # 重新启用 process_button
99
+ process_button_update = gr.update(interactive=True)
100
+ # 隐藏加载动画
101
+ loading_animation_update = gr.update(visible=False)
102
+ return recognized_text, process_button_update, loading_animation_update
103
+
104
+
105
+ # 创建界面
106
  def create_interface():
107
  with gr.Blocks() as demo:
108
  # 标题
109
+ gr.Markdown("# 语音识别与聊天系统")
110
+
111
+ # 音频输入部分
112
  with gr.Row():
113
  audio_input = gr.Audio(
114
+ sources="microphone",
115
+ type="numpy", # 获取音频数据和采样率
116
+ label="上传音频"
 
 
 
 
 
117
  )
118
 
119
+ # 文本识别输出部分
120
  with gr.Row():
121
  recognized_text = gr.Textbox(label="识别文本")
122
+
123
+ # 处理音频的按钮
 
124
  process_button = gr.Button("处理音频")
125
+
126
+ # 加载动画
127
+ loading_animation = gr.HTML(
128
+ value='<div style="text-align: center;"><span style="font-size: 18px;">ASR Model is running...</span></div>',
129
+ visible=False
130
+ )
131
+
132
+ # 关联音频处理函数,并在点击时更新组件状态
133
+ process_button.click(
134
+ fn=disable_components,
135
+ inputs=[],
136
+ outputs=[recognized_text, process_button, loading_animation]
137
+ ).then(
138
+ fn=process_audio,
139
+ inputs=[audio_input],
140
+ outputs=recognized_text
141
+ ).then(
142
+ fn=enable_components,
143
+ inputs=[recognized_text],
144
+ outputs=[recognized_text, process_button, loading_animation]
145
+ )
146
+
147
+ # Chatbot 界面
148
+ chatbot = gr.ChatInterface(
149
+ fn=respond,
150
+ additional_inputs=[
151
+ gr.Textbox(value="You are a helpful chatbot that answers questions.", label="系统消息")
152
+ ]
153
+ )
154
+
155
+ # 布局包含 Chatbot
156
  with gr.Row():
157
+ chatbot_output = chatbot
158
+
 
 
 
 
 
 
 
 
159
  return demo
160
 
161
 
162
+
163
  if __name__ == "__main__":
164
  demo = create_interface()
165
  demo.launch()