import threading
import http.server
import socketserver
import os
import yaml
from flask import Flask, request, jsonify
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch
import gradio as gr
from utils.upload_file import UploadFile
from utils.chatbot import ChatBot
from utils.ui_settings import UISettings
from utils.load_config import LoadConfig
from pyprojroot import here

# Load the app config
with open(here("configs/app_config.yml")) as cfg:
    app_config = yaml.load(cfg, Loader=yaml.FullLoader)

PORT = app_config["serve"]["port"]
DIRECTORY1 = app_config["directories"]["data_directory"]
DIRECTORY2 = app_config["directories"]["data_directory_2"]

# ================================
# Part 1: Reference Serve Code
# ================================
class MultiDirectoryHTTPRequestHandler(http.server.SimpleHTTPRequestHandler):
    """Serve files from multiple directories."""
    def translate_path(self, path):
        parts = path.split('/', 2)
        if len(parts) > 1:
            first_directory = parts[1]
            if first_directory == os.path.basename(DIRECTORY1):
                path = os.path.join(DIRECTORY1, *parts[2:])
            elif first_directory == os.path.basename(DIRECTORY2):
                path = os.path.join(DIRECTORY2, *parts[2:])
            else:
                file_path1 = os.path.join(DIRECTORY1, first_directory)
                file_path2 = os.path.join(DIRECTORY2, first_directory)
                if os.path.isfile(file_path1):
                    return file_path1
                elif os.path.isfile(file_path2):
                    return file_path2
        return super().translate_path(path)

def start_reference_server():
    with socketserver.TCPServer(("", PORT), MultiDirectoryHTTPRequestHandler) as httpd:
        print(f"Serving at port {PORT}")
        httpd.serve_forever()

# ================================
# Part 2: LLM Serve Code
# ================================
APPCFG = LoadConfig()

app = Flask(__name__)

# Load the LLM and tokenizer
tokenizer = AutoTokenizer.from_pretrained(
    APPCFG.llm_engine, token=APPCFG.gemma_token, device=APPCFG.device)
model = AutoModelForCausalLM.from_pretrained(pretrained_model_name_or_path="BioMistral/BioMistral-7B",
                                             token=APPCFG.gemma_token,
                                             torch_dtype=torch.float16,
                                             device_map=APPCFG.device)
app_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer
)

@app.route("/generate_text", methods=["POST"])
def generate_text():
    data = request.json
    prompt = data.get("prompt", "")
    max_new_tokens = data.get("max_new_tokens", 1000)
    do_sample = data.get("do_sample", True)
    temperature = data.get("temperature", 0.1)
    top_k = data.get("top_k", 50)
    top_p = data.get("top_p", 0.95)

    tokenized_prompt = app_pipeline.tokenizer.apply_chat_template(
        prompt, tokenize=False, add_generation_prompt=True)
    outputs = app_pipeline(
        tokenized_prompt,
        max_new_tokens=max_new_tokens,
        do_sample=do_sample,
        temperature=temperature,
        top_k=top_k,
        top_p=top_p
    )

    return jsonify({"response": outputs[0]["generated_text"][len(tokenized_prompt):]})

def start_llm_server():
    app.run(debug=False, port=8888)

# ================================
# Part 3: Gradio Chatbot Code
# ================================
def start_gradio_app():
    with gr.Blocks() as demo:
        with gr.Tabs():
            with gr.TabItem("Med-App"):
                # First row
                with gr.Row() as row_one:
                    with gr.Column(visible=False) as reference_bar:
                        ref_output = gr.Markdown()
                    with gr.Column() as chatbot_output:
                        chatbot = gr.Chatbot(
                            [], elem_id="chatbot", bubble_full_width=False, height=500,
                            avatar_images=("images/test.png", "images/Gemma-logo.png")
                        )
                        chatbot.like(UISettings.feedback, None, None)

                # Second row
                with gr.Row():
                    input_txt = gr.Textbox(
                        lines=4, scale=8, placeholder="Enter text and press enter, or upload PDF files"
                    )

                # Third row
                with gr.Row() as row_two:
                    text_submit_btn = gr.Button(value="Submit text")
                    btn_toggle_sidebar = gr.Button(value="References")
                    upload_btn = gr.UploadButton(
                        "📁 Upload PDF or doc files", file_types=['.pdf', '.doc'], file_count="multiple"
                    )
                    clear_button = gr.ClearButton([input_txt, chatbot])
                    rag_with_dropdown = gr.Dropdown(
                        label="RAG with", choices=["Preprocessed doc", "Upload doc: Process for RAG"], value="Preprocessed doc"
                    )

                # Fourth row
                with gr.Row() as row_four:
                    temperature_bar = gr.Slider(
                        minimum=0.1, maximum=1, value=0.1, step=0.1, label="Temperature",
                        info="Increasing the temperature will make the model answer more creatively."
                    )
                    top_k = gr.Slider(
                        minimum=0.0, maximum=100.0, step=1, label="top_k", value=50,
                        info="A lower value (e.g. 10) will result in more conservative answers."
                    )
                    top_p = gr.Slider(
                        minimum=0.0, maximum=1.0, step=0.01, label="top_p", value=0.95,
                        info="A lower value will generate more focused and conservative text."
                    )

                # Process uploaded files and text
                file_msg = upload_btn.upload(
                    fn=UploadFile.process_uploaded_files, inputs=[upload_btn, chatbot, rag_with_dropdown],
                    outputs=[input_txt, chatbot], queue=False
                )
                txt_msg = input_txt.submit(
                    fn=ChatBot.respond, inputs=[chatbot, input_txt, rag_with_dropdown, temperature_bar, top_k, top_p],
                    outputs=[input_txt, chatbot, ref_output], queue=False
                ).then(lambda: gr.Textbox(interactive=True), None, [input_txt], queue=False)
                text_submit_btn.click(
                    fn=ChatBot.respond, inputs=[chatbot, input_txt, rag_with_dropdown, temperature_bar, top_k, top_p],
                    outputs=[input_txt, chatbot, ref_output], queue=False
                ).then(lambda: gr.Textbox(interactive=True), None, [input_txt], queue=False)

    demo.launch()

# ================================
# Main: Running all services concurrently
# ================================
if __name__ == "__main__":
    # Start all services in separate threads
    reference_server_thread = threading.Thread(target=start_reference_server)
    llm_server_thread = threading.Thread(target=start_llm_server)
    gradio_app_thread = threading.Thread(target=start_gradio_app)

    reference_server_thread.start()
    llm_server_thread.start()
    gradio_app_thread.start()

    # Keep the main thread alive
    reference_server_thread.join()
    llm_server_thread.join()
    gradio_app_thread.join()