import gradio as gr
import time
import requests
import json
import os

MODEL = "gpt-4-0125-preview"
API_URL = os.getenv("API_URL")
API_KEY = os.getenv("API_KEY")

print(f"API_URL: {API_URL}")
print(f"API_KEY: {API_KEY}")

url = f"{API_URL}/v1/chat/completions"

# The headers for the HTTP request
headers = {
    "accept": "application/json",
    "Content-Type": "application/json",
    "Authorization": f"Bearer {API_KEY}",
}


def is_valid_json(data):
    try:
        # Attempt to parse the JSON data
        parsed_data = json.loads(data)
        return True, parsed_data
    except ValueError as e:
        # If an error occurs, the JSON is not valid
        return False, str(e)


with gr.Blocks() as demo:

    markup = gr.Markdown(
        """
                         # Mistral 7B Instruct v0.2
                         This is a demo of the Mistral 7B Instruct quantized model in GGUF (Q2) hosted on K8s cluster.

                         The original models can be found [MaziyarPanahi/Mistral-7B-Instruct-v0.2-GGUF](https://huggingface.co/MaziyarPanahi/Mistral-7B-Instruct-v0.2-GGUF)"""
    )
    chatbot = gr.Chatbot()
    msg = gr.Textbox(lines=1, label="User Message")
    clear = gr.Button("Clear")
    with gr.Row():

        with gr.Column(scale=2):
            # Define inputs for additional parameters
            system_prompt_input = gr.Textbox(
                label="System Prompt",
                placeholder="Type system prompt here...",
                value="You are a helpful assistant.",
            )
            temperature_input = gr.Slider(
                label="Temperature", minimum=0.0, maximum=1.0, value=0.9, step=0.01
            )
            max_new_tokens_input = gr.Slider(
                label="Max New Tokens", minimum=0, maximum=1024, value=256, step=1
            )

        with gr.Column(scale=2):
            top_p_input = gr.Slider(
                label="Top P", minimum=0.0, maximum=1.0, value=0.95, step=0.01
            )
            top_k_input = gr.Slider(
                label="Top K", minimum=1, maximum=100, value=50, step=1
            )
            repetition_penalty_input = gr.Slider(
                label="Repetition Penalty",
                minimum=1.0,
                maximum=2.0,
                value=1.1,
                step=0.01,
            )

    def update_globals(
        system_prompt, temperature, max_new_tokens, top_p, top_k, repetition_penalty
    ):
        global global_system_prompt, global_temperature, global_max_new_tokens, global_top_p, global_repetition_penalty, global_top_k
        global_system_prompt = system_prompt
        global_temperature = temperature
        global_max_new_tokens = max_new_tokens
        global_top_p = top_p
        global_top_k = top_k
        global_repetition_penalty = repetition_penalty

    def user(user_message, history):
        # print(f"User: {user_message}")
        # print(f"History: {history}")
        return "", history + [[user_message, None]]

    def bot(
        history,
        system_prompt,
        temperature,
        max_new_tokens,
        top_p,
        top_k,
        repetition_penalty,
    ):
        print(f"History in bot: {history}")
        print(f"System Prompt: {system_prompt}")
        print(f"Temperature: {temperature}")
        print(f"Max New Tokens: {max_new_tokens}")
        print(f"Top P: {top_p}")
        print(f"Top K: {top_k}")
        print(f"Repetition Penalty: {repetition_penalty}")

        # print(f"History in bot: {history}")
        # [['Capital of France', 'The capital city of France is Paris.'], ['Thansk', 'You are welcome.'], ['What is the capital of France?', '']]
        # convert this to [['Capital of France', 'The capital city of France is Paris.'], ['Thansk', 'You are welcome.'], ['What is the capital of France?', '']] to list of dict of role user and assiatant
        history_messages = [{"content": h[0], "role": "user"} for h in history if h[0]]
        # let's extract the user's question which should be the last touple first element
        # user_question = history[-1][0]
        history[-1][1] = ""
        sys_msg = [
            {
                "content": (
                    system_prompt if system_prompt else "You are a helpful assistant."
                ),
                "role": "system",
            }
        ]
        history_messages = sys_msg + history_messages
        print(history_messages)

        data = {
            "messages": history_messages,
            "stream": True,
            "temprature": temperature,
            "top_k": top_k,
            "top_p": top_p,
            "seed": 42,
            "repeat_penalty": repetition_penalty,
            "chat_format": "mistral-instruct",
            "max_tokens": max_new_tokens,
            "response_format": {
                "type": "json_object",
            },
        }

        # # Making the POST request and streaming the response
        response = requests.post(
            url, headers=headers, data=json.dumps(data), stream=True
        )
        for line in response.iter_lines():
            # Filter out keep-alive new lines
            if line:
                data = line.decode("utf-8").lstrip("data: ")
                # Check if the examples are valid
                valid_check = is_valid_json(data)
                if valid_check[0]:
                    try:
                        # Attempt to parse the JSON dataa
                        # json_data = json.loads(data)
                        json_data = valid_check[1]

                        delta_content = (
                            json_data.get("choices", [{}])[0]
                            .get("delta", {})
                            .get("content", "")
                        )

                        if delta_content:  # Ensure there's content to print
                            # print(f"Bot: {delta_content}")
                            history[-1][1] += delta_content
                            # print(history)
                            time.sleep(0.05)
                            yield history
                    except json.JSONDecodeError as e:
                        print(
                            f"Error decoding JSON: {e} date: {data}"
                        )  # print(delta_content, flush=True, end="")

                        # print(json_data['choices'][0])

    msg.submit(
        user, [msg, chatbot], [msg, chatbot], queue=False, concurrency_limit=5
    ).then(
        bot,
        inputs=[
            chatbot,
            system_prompt_input,
            temperature_input,
            max_new_tokens_input,
            top_p_input,
            top_k_input,
            repetition_penalty_input,
        ],
        outputs=chatbot,
    )

    clear.click(lambda: None, None, chatbot, queue=False)


demo.queue()
if __name__ == "__main__":
    demo.launch(show_api=False, share=False)