Exodia

Sleeping

File size: 4,926 Bytes

import gradio as gr
import numpy as np
from huggingface_hub import InferenceClient
import os
import requests
import scipy.io.wavfile

client = InferenceClient(
    "meta-llama/Meta-Llama-3-8B-Instruct",
    token=os.getenv('hf_token')
)

def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    messages = [{"role": "system", "content": system_message}]

    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})

    messages.append({"role": "user", "content": message})

    response = ""

    for message in client.chat_completion(
        messages,
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
    ):
        token = message.choices[0].delta.content

        response += token
        yield response

def process_audio(audio_data):
    if audio_data is None:
        return "No audio provided"
    
    print("audio_data:", audio_data)  # 添加这行代码

    # 检查 audio_data 是否是元组，并提取数据
    if isinstance(audio_data, tuple):
        sample_rate, data = audio_data
        print("Sample rate:", sample_rate)
        print("Data type:", type(data))
    else:
        return "Invalid audio data format"

    # Define the local file path to save the WAV file
    local_wav_file = "converted_audio.wav"

    # Save the audio data as a WAV file
    scipy.io.wavfile.write(local_wav_file, sample_rate, data)
    
    API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v2"
    headers = {"Authorization": f"Bearer {os.getenv('hf_token')}"}

    def query(filename):
        with open(filename, "rb") as f:
            file_data = f.read()
        response = requests.post(API_URL, headers=headers, data=file_data)
        return response.json()

    # Call the API to process the audio
    output = query(local_wav_file)

    print(output)

    # Check the API response
    if 'text' in output:
        return output['text']
    else:
        return "Error in processing audio"

# 定义函数以禁用按钮并显示加载指示器
def disable_components():
    # 更新 recognized_text 的内容，提示用户正在处理
    recognized_text_update = gr.update(value='正在处理，请稍候...')
    # 禁用 process_button
    process_button_update = gr.update(interactive=False)
    # 显示加载动画
    loading_animation_update = gr.update(visible=True)
    return recognized_text_update, process_button_update, loading_animation_update

# 定义函数以启用按钮并隐藏加载指示器
def enable_components(recognized_text):
    # 处理完成后，recognized_text 已经由 process_audio 更新
    # 重新启用 process_button
    process_button_update = gr.update(interactive=True)
    # 隐藏加载动画
    loading_animation_update = gr.update(visible=False)
    return recognized_text, process_button_update, loading_animation_update


# 创建界面
def create_interface():
    with gr.Blocks() as demo:
        # 标题
        gr.Markdown("# 语音识别与聊天系统")
        
        # 音频输入部分
        with gr.Row():
            audio_input = gr.Audio(
                sources="microphone",
                type="numpy",  # 获取音频数据和采样率
                label="上传音频"
            )
        
        # 文本识别输出部分
        with gr.Row():
            recognized_text = gr.Textbox(label="识别文本")
        
        # 处理音频的按钮
        process_button = gr.Button("处理音频")
        
        # 加载动画
        loading_animation = gr.HTML(
            value='<div style="text-align: center;"><span style="font-size: 18px;">ASR Model is running...</span></div>',
            visible=False
        )
        
        # 关联音频处理函数，并在点击时更新组件状态
        process_button.click(
            fn=disable_components,
            inputs=[],
            outputs=[recognized_text, process_button, loading_animation]
        ).then(
            fn=process_audio,
            inputs=[audio_input],
            outputs=recognized_text
        ).then(
            fn=enable_components,
            inputs=[recognized_text],
            outputs=[recognized_text, process_button, loading_animation]
        )
        
        # Chatbot 界面
        chatbot = gr.ChatInterface(
            fn=respond,
            additional_inputs=[
                gr.Textbox(value="You are a helpful chatbot that answers questions.", label="系统消息")
            ]
        )
        
        # 布局包含 Chatbot
        with gr.Row():
            chatbot_output = chatbot
                
    return demo



if __name__ == "__main__":
    demo = create_interface()
    demo.launch()