Exodia

Sleeping

App Files Files Community

KleinPenny commited on Sep 14, 2024

Commit

68f0d8d

verified ·

1 Parent(s): 867343a

Update app.py

Browse files

Files changed (1) hide show

app.py +89 -71

app.py CHANGED Viewed

@@ -5,65 +5,30 @@ import os
 import requests
 import scipy.io.wavfile
 import io
 client = InferenceClient(
     "meta-llama/Meta-Llama-3-8B-Instruct",
     token=os.getenv('hf_token')
 )
-def respond(
-    message,
-    history: list[tuple[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-):
-    messages = [{"role": "system", "content": system_message}]
-    for val in history:
-        if val[0]:
-            messages.append({"role": "user", "content": val[0]})
-        if val[1]:
-            messages.append({"role": "assistant", "content": val[1]})
-    messages.append({"role": "user", "content": message})
-    response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        token = message.choices[0].delta.content
-        response += token
-        yield response
 def process_audio(audio_data):
     if audio_data is None:
-        return "No audio provided"
-    print("audio_data:", audio_data)  # 添加这行代码
     # 检查 audio_data 是否是元组，并提取数据
     if isinstance(audio_data, tuple):
         sample_rate, data = audio_data
-        print("Sample rate:", sample_rate)
-        print("Data type:", type(data))
     else:
-        return "Invalid audio data format"
     # Convert the audio data to WAV format in memory
     buf = io.BytesIO()
     scipy.io.wavfile.write(buf, sample_rate, data)
     wav_bytes = buf.getvalue()
     buf.close()
     API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v2"
     headers = {"Authorization": f"Bearer {os.getenv('hf_token')}"}
@@ -74,13 +39,15 @@ def process_audio(audio_data):
     # Call the API to process the audio
     output = query(wav_bytes)
-    print(output)
     # Check the API response
     if 'text' in output:
-        return output['text']
     else:
-        return "Error in processing audio"
 # 定义函数以禁用按钮并显示加载指示器
 def disable_components():
@@ -94,42 +61,101 @@ def disable_components():
 # 定义函数以启用按钮并隐藏加载指示器
 def enable_components(recognized_text):
-    # 处理完成后，recognized_text 已经由 process_audio 更新
-    # 重新启用 process_button
     process_button_update = gr.update(interactive=True)
     # 隐藏加载动画
     loading_animation_update = gr.update(visible=False)
     return recognized_text, process_button_update, loading_animation_update
-# 创建界面
 def create_interface():
     with gr.Blocks() as demo:
-        # 标题
-        gr.Markdown("# 语音识别与聊天系统")
-        # 音频输入部分
         with gr.Row():
             audio_input = gr.Audio(
                 sources="microphone",
-                type="numpy",  # 获取音频数据和采样率
-                label="上传音频"
             )
-        # 文本识别输出部分
-        with gr.Row():
-            recognized_text = gr.Textbox(label="识别文本")
-        # 处理音频的按钮
-        process_button = gr.Button("处理音频")
-        # 加载动画
         loading_animation = gr.HTML(
             value='<div style="text-align: center;"><span style="font-size: 18px;">ASR Model is running...</span></div>',
             visible=False
         )
-        # 关联音频处理函数，并在点击时更新组件状态
         process_button.click(
             fn=disable_components,
             inputs=[],
@@ -137,22 +163,14 @@ def create_interface():
         ).then(
             fn=process_audio,
             inputs=[audio_input],
-            outputs=recognized_text
         ).then(
             fn=enable_components,
             inputs=[recognized_text],
             outputs=[recognized_text, process_button, loading_animation]
         )
-        # Chatbot 界面
-        chatbot = gr.ChatInterface(
-            fn=respond,
-            additional_inputs=[
-                gr.Textbox(value="You are a helpful chatbot that answers questions.", label="系统消息")
-            ]
-        )
-        # 布局包含 Chatbot
         with gr.Row():
             chatbot_output = chatbot
@@ -162,4 +180,4 @@ def create_interface():
 if __name__ == "__main__":
     demo = create_interface()
-    demo.launch()

 import requests
 import scipy.io.wavfile
 import io
+import time
 client = InferenceClient(
     "meta-llama/Meta-Llama-3-8B-Instruct",
     token=os.getenv('hf_token')
 )
 def process_audio(audio_data):
     if audio_data is None:
+        return "No audio provided.", ""
     # 检查 audio_data 是否是元组，并提取数据
     if isinstance(audio_data, tuple):
         sample_rate, data = audio_data
     else:
+        return "Invalid audio data format.", ""
     # Convert the audio data to WAV format in memory
     buf = io.BytesIO()
     scipy.io.wavfile.write(buf, sample_rate, data)
     wav_bytes = buf.getvalue()
     buf.close()
     API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v2"
     headers = {"Authorization": f"Bearer {os.getenv('hf_token')}"}
     # Call the API to process the audio
     output = query(wav_bytes)
+    print(output) # Check output in console (logs in HF space)
     # Check the API response
     if 'text' in output:
+        recognized_text = output['text']
+        return recognized_text, recognized_text
     else:
+        recognized_text = "The ASR module is still loading, please press the button again!"
+        return recognized_text, ""
 # 定义函数以禁用按钮并显示加载指示器
 def disable_components():
 # 定义函数以启用按钮并隐藏加载指示器
 def enable_components(recognized_text):
     process_button_update = gr.update(interactive=True)
     # 隐藏加载动画
     loading_animation_update = gr.update(visible=False)
     return recognized_text, process_button_update, loading_animation_update
+llama_responded = 0
+def respond(
+    message,
+    history: list[tuple[str, str]]
+):
+    global llama_responded
+    system_message = "You are a helpful chatbot that answers questions. Give any answer within 50 words."
+    messages = [{"role": "system", "content": system_message}]
+    for val in history:
+        print(val[0])
+        if val[0] != None:
+            if val[0]:
+                messages.append({"role": "user", "content": val[0]})
+            if val[1]:
+                messages.append({"role": "assistant", "content": val[1]})
+    messages.append({"role": "user", "content": message})
+    response = ""
+    for message in client.chat_completion(
+        messages,
+        stream=True,
+    ):
+        token = message.choices[0].delta.content
+        response += token
+    llama_responded = 1
+    return response #gr.Audio("/home/yxpeng/Projects/RAGHack/Exodia/voice_sample/trump1.wav")
+def update_response_display():
+    while not llama_responded:
+        time.sleep(1)
+def bot(history):
+    global llama_responded
+    #print(history)
+    history.append([None,gr.Audio("/home/yxpeng/Projects/RAGHack/Exodia/voice_sample/trump1.wav")])
+    llama_responded = 0
+    return history
 def create_interface():
     with gr.Blocks() as demo:
+        # Title
+        gr.Markdown("# Exodia AI Assistant")
+        # Audio input section
         with gr.Row():
             audio_input = gr.Audio(
                 sources="microphone",
+                type="numpy",  # Get audio data and sample rate
+                label="Say Something..."
             )
+            recognized_text = gr.Textbox(label="Recognized Text",interactive=False)
+        # Process audio button
+        process_button = gr.Button("Process Audio")
+        # Loading animation
         loading_animation = gr.HTML(
             value='<div style="text-align: center;"><span style="font-size: 18px;">ASR Model is running...</span></div>',
             visible=False
         )
+        chatbot_custom = gr.Chatbot(height=500)  # Set height to 500 pixels
+        # Chat interface using the custom chatbot instance
+        chatbot = gr.ChatInterface(
+            fn=respond,
+            chatbot=chatbot_custom,
+            submit_btn="Start Chatting"
+        )
+        user_start =chatbot.textbox.submit(
+            fn=update_response_display,
+            inputs=[],
+            outputs=[],
+        )
+        # 在用户提交请求的时候
+        #user_start = chatbot.textbox.submit()
+        user_start.then(
+            fn=bot,
+            inputs=[chatbot_custom],
+            outputs=chatbot_custom,  # 更新 response_display 的内容
+        )
+        # Associate audio processing function and update component states on click
         process_button.click(
             fn=disable_components,
             inputs=[],
         ).then(
             fn=process_audio,
             inputs=[audio_input],
+            outputs=[recognized_text, chatbot.textbox]
         ).then(
             fn=enable_components,
             inputs=[recognized_text],
             outputs=[recognized_text, process_button, loading_animation]
         )
+        # Layout includes Chatbot
         with gr.Row():
             chatbot_output = chatbot
 if __name__ == "__main__":
     demo = create_interface()
+    demo.launch()