Exodia

Sleeping

App Files Files Community

KleinPenny commited on Sep 13, 2024

Commit

9d03774

verified ·

1 Parent(s): 42a5345

Update app.py

Browse files

Files changed (1) hide show

app.py +104 -56

app.py CHANGED Viewed

@@ -2,14 +2,14 @@ import gradio as gr
 import numpy as np
 from huggingface_hub import InferenceClient
 import os
-# 获取 HuggingFace 的 API Token
 client = InferenceClient(
-    "microsoft/Phi-3-mini-4k-instruct",
-    token=os.getenv('hf_token'),
 )
-# 定义 respond 函数，用于生成聊天回复
 def respond(
     message,
     history: list[tuple[str, str]],
@@ -42,76 +42,124 @@ def respond(
         response += token
         yield response
-# 定义音频处理函数
-def process_audio(audio):
-    if audio is None:
-        return "No audio provided", None
-    # 将音频保存为 .wav 格式
-    audio_path = "uploaded_audio.wav"
-    audio.save(audio_path)
-    return 'True'
-# 定义聊天逻辑，更新聊天历史记录
-def chat_with_model(user_message, history):
-    system_message = "You are chatting with a helpful assistant."
-    max_tokens = 200
-    temperature = 0.7
-    top_p = 0.9
-    # 调用 respond 函数，生成回复
-    bot_response = next(respond(user_message, history, system_message, max_tokens, temperature, top_p))
-    # 将用户消息和模型回复添加到历史记录中
-    history.append((user_message, bot_response))
-    return history, history
-# 创建 Gradio 接口
 def create_interface():
     with gr.Blocks() as demo:
         # 标题
-        gr.Markdown("# 语音识别与生成系统")
-        # 输入部分：音频上传
         with gr.Row():
             audio_input = gr.Audio(
-                sources=["microphone"],
-                waveform_options=gr.WaveformOptions(
-                    waveform_color="#01C6FF",
-                    waveform_progress_color="#0066B4",
-                    skip_length=2,
-                    show_controls=False,
-                ),
-                label="音频生成"
             )
-        # 输出部分：文本识别结果和音频播放
         with gr.Row():
             recognized_text = gr.Textbox(label="识别文本")
-            audio_output = gr.Audio(label="生成的语音")
-        # 处理按钮
         process_button = gr.Button("处理音频")
-        # 绑定处理逻辑
-        process_button.click(process_audio, inputs=[audio_input], outputs=[recognized_text])
-        # 添加聊天框
         with gr.Row():
-            chatbot = gr.Chatbot(label="Chatbot")  # 显示聊天记录的区域
-            user_input = gr.Textbox(placeholder="Type a message...", label="输入消息")  # 用户输入区域
-            send_button = gr.Button("发送")  # 发送消息的按钮
-        # 聊天历史记录
-        chat_history = gr.State([])  # 用于存储聊天历史
-        # 点击发送按钮时，调用 chat_with_model 函数并更新聊天记录
-        send_button.click(chat_with_model, inputs=[user_input, chat_history], outputs=[chatbot, chat_history])
     return demo
 if __name__ == "__main__":
     demo = create_interface()
     demo.launch()

 import numpy as np
 from huggingface_hub import InferenceClient
 import os
+import requests
+import scipy.io.wavfile
 client = InferenceClient(
+    "meta-llama/Meta-Llama-3-8B-Instruct",
+    token=os.getenv('hf_token')
 )
 def respond(
     message,
     history: list[tuple[str, str]],
         response += token
         yield response
+def process_audio(audio_data):
+    if audio_data is None:
+        return "No audio provided"
+    print("audio_data:", audio_data)  # 添加这行代码
+    # 检查 audio_data 是否是元组，并提取数据
+    if isinstance(audio_data, tuple):
+        sample_rate, data = audio_data
+        print("Sample rate:", sample_rate)
+        print("Data type:", type(data))
+    else:
+        return "Invalid audio data format"
+    # Define the local file path to save the WAV file
+    local_wav_file = "converted_audio.wav"
+    # Save the audio data as a WAV file
+    scipy.io.wavfile.write(local_wav_file, sample_rate, data)
+    API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v2"
+    headers = {"Authorization": f"Bearer {os.getenv('hf_token')}"}
+    def query(filename):
+        with open(filename, "rb") as f:
+            file_data = f.read()
+        response = requests.post(API_URL, headers=headers, data=file_data)
+        return response.json()
+    # Call the API to process the audio
+    output = query(local_wav_file)
+    print(output)
+    # Check the API response
+    if 'text' in output:
+        return output['text']
+    else:
+        return "Error in processing audio"
+# 定义函数以禁用按钮并显示加载指示器
+def disable_components():
+    # 更新 recognized_text 的内容，提示用户正在处理
+    recognized_text_update = gr.update(value='正在处理，请稍候...')
+    # 禁用 process_button
+    process_button_update = gr.update(interactive=False)
+    # 显示加载动画
+    loading_animation_update = gr.update(visible=True)
+    return recognized_text_update, process_button_update, loading_animation_update
+# 定义函数以启用按钮并隐藏加载指示器
+def enable_components(recognized_text):
+    # 处理完成后，recognized_text 已经由 process_audio 更新
+    # 重新启用 process_button
+    process_button_update = gr.update(interactive=True)
+    # 隐藏加载动画
+    loading_animation_update = gr.update(visible=False)
+    return recognized_text, process_button_update, loading_animation_update
+# 创建界面
 def create_interface():
     with gr.Blocks() as demo:
         # 标题
+        gr.Markdown("# 语音识别与聊天系统")
+        # 音频输入部分
         with gr.Row():
             audio_input = gr.Audio(
+                sources="microphone",
+                type="numpy",  # 获取音频数据和采样率
+                label="上传音频"
             )
+        # 文本识别输出部分
         with gr.Row():
             recognized_text = gr.Textbox(label="识别文本")
+        # 处理音频的按钮
         process_button = gr.Button("处理音频")
+        # 加载动画
+        loading_animation = gr.HTML(
+            value='<div style="text-align: center;"><span style="font-size: 18px;">ASR Model is running...</span></div>',
+            visible=False
+        )
+        # 关联音频处理函数，并在点击时更新组件状态
+        process_button.click(
+            fn=disable_components,
+            inputs=[],
+            outputs=[recognized_text, process_button, loading_animation]
+        ).then(
+            fn=process_audio,
+            inputs=[audio_input],
+            outputs=recognized_text
+        ).then(
+            fn=enable_components,
+            inputs=[recognized_text],
+            outputs=[recognized_text, process_button, loading_animation]
+        )
+        # Chatbot 界面
+        chatbot = gr.ChatInterface(
+            fn=respond,
+            additional_inputs=[
+                gr.Textbox(value="You are a helpful chatbot that answers questions.", label="系统消息")
+            ]
+        )
+        # 布局包含 Chatbot
         with gr.Row():
+            chatbot_output = chatbot
     return demo
 if __name__ == "__main__":
     demo = create_interface()
     demo.launch()