import gradio as gr
import numpy as np
from huggingface_hub import InferenceClient
import os
import requests
import scipy.io.wavfile
import io
import time

client = InferenceClient(
    "meta-llama/Meta-Llama-3-8B-Instruct",
    token=os.getenv('hf_token')
)


def process_audio(audio_data):
    if audio_data is None:
        return "No audio provided.", ""

    # 检查 audio_data 是否是元组，并提取数据
    if isinstance(audio_data, tuple):
        sample_rate, data = audio_data
    else:
        return "Invalid audio data format.", ""

    # Convert the audio data to WAV format in memory
    buf = io.BytesIO()
    scipy.io.wavfile.write(buf, sample_rate, data)
    wav_bytes = buf.getvalue()
    buf.close()

    API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v2"
    headers = {"Authorization": f"Bearer {os.getenv('hf_token')}"}

    def query(wav_data):
        response = requests.post(API_URL, headers=headers, data=wav_data)
        return response.json()

    # Call the API to process the audio
    output = query(wav_bytes)

    print(output) # Check output in console (logs in HF space)

    # Check the API response
    if 'text' in output:
        recognized_text = output['text']
        return recognized_text, recognized_text
    else:
        recognized_text = "The ASR module is still loading, please press the button again!"
        return recognized_text, ""

# 定义函数以禁用按钮并显示加载指示器
def disable_components():
    # 更新 recognized_text 的内容，提示用户正在处理
    recognized_text_update = gr.update(value='正在处理，请稍候...')
    # 禁用 process_button
    process_button_update = gr.update(interactive=False)
    # 显示加载动画
    loading_animation_update = gr.update(visible=True)
    return recognized_text_update, process_button_update, loading_animation_update

# 定义函数以启用按钮并隐藏加载指示器
def enable_components(recognized_text):
    process_button_update = gr.update(interactive=True)
    # 隐藏加载动画
    loading_animation_update = gr.update(visible=False)
    return recognized_text, process_button_update, loading_animation_update

llama_responded = 0

def respond(
    message,
    history: list[tuple[str, str]]
):
    global llama_responded
    system_message = "You are a helpful chatbot that answers questions. Give any answer within 50 words."
    messages = [{"role": "system", "content": system_message}]

    for val in history:
        print(val[0])
        if val[0] != None:
            if val[0]:
                messages.append({"role": "user", "content": val[0]})
            if val[1]:
                messages.append({"role": "assistant", "content": val[1]})
    messages.append({"role": "user", "content": message})

    response = ""

    for message in client.chat_completion(
        messages,
        stream=True,
    ):
        token = message.choices[0].delta.content
        response += token

    llama_responded = 1
    return response #gr.Audio("/home/yxpeng/Projects/RAGHack/Exodia/voice_sample/trump1.wav")

def update_response_display():
    while not llama_responded:
        time.sleep(1)

def bot(history):
    global llama_responded
    #print(history)
    history.append([None,gr.Audio("/home/yxpeng/Projects/RAGHack/Exodia/voice_sample/trump1.wav")])
    llama_responded = 0
   
    return history

def create_interface():
    with gr.Blocks() as demo:
        # Title
        gr.Markdown("# Exodia AI Assistant")
        
        # Audio input section
        with gr.Row():
            audio_input = gr.Audio(
                sources="microphone",
                type="numpy",  # Get audio data and sample rate
                label="Say Something..."
            )
            recognized_text = gr.Textbox(label="Recognized Text",interactive=False)
        
        # Process audio button
        process_button = gr.Button("Process Audio")
        
        # Loading animation
        loading_animation = gr.HTML(
            value='<div style="text-align: center;"><span style="font-size: 18px;">ASR Model is running...</span></div>',
            visible=False
        )

        chatbot_custom = gr.Chatbot(height=500)  # Set height to 500 pixels

        # Chat interface using the custom chatbot instance
        chatbot = gr.ChatInterface(
            fn=respond,
            chatbot=chatbot_custom,
            submit_btn="Start Chatting"
        )
        user_start =chatbot.textbox.submit(
            fn=update_response_display, 
            inputs=[],
            outputs=[],
        )

        # 在用户提交请求的时候
        #user_start = chatbot.textbox.submit()

        user_start.then(
            fn=bot,
            inputs=[chatbot_custom], 
            outputs=chatbot_custom,  # 更新 response_display 的内容
        )

        # Associate audio processing function and update component states on click
        process_button.click(
            fn=disable_components,
            inputs=[],
            outputs=[recognized_text, process_button, loading_animation]
        ).then(
            fn=process_audio,
            inputs=[audio_input],
            outputs=[recognized_text, chatbot.textbox]
        ).then(
            fn=enable_components,
            inputs=[recognized_text],
            outputs=[recognized_text, process_button, loading_animation]
        )

        # Layout includes Chatbot
        with gr.Row():
            chatbot_output = chatbot
                
    return demo


if __name__ == "__main__":
    demo = create_interface()
    demo.launch()


if __name__ == "__main__":
    demo = create_interface()
    demo.launch()