import gradio as gr
from ollama import chat, ChatResponse
import subprocess
import time

model_id = "deepseek-r1:1.5b"

def interact(message: str, history: list):
    message_dct = {
        "role": "user",
        "content": message
    }

    chat_history = [msg for msg in history]
    chat_history.append(message_dct)

    response: ChatResponse = chat(
        model=model_id,
        messages=chat_history,
        stream=True
    )
    text_response = ""
    thinking_response = gr.ChatMessage(content="", metadata={"title":"Thinking Cloud"})
    thinking = False

    for chunk in response:
        bit = chunk["message"]["content"]
        if(bit == "<think>"):
            thinking = True
            continue
        elif(bit == "</think>"):
            thinking = False
            continue

        if(thinking):
            thinking_response.content += bit
        else:
            text_response += bit
        final_response = [thinking_response, text_response]
        yield final_response

interface = gr.ChatInterface(
    fn=interact,
    type="messages",
    title="Deepseek-R1 Chat Interface",
    description="Model: Deepseek R1: 1.5B params"
)

if __name__ == "__main__":
    print("\n\nStarting Ollama...\n\n")
    subprocess.Popen(["ollama", "serve"])
    time.sleep(10)
    print("\n\nOllama started successfully!!\n\n\n\nTesting...\n\n")
    subprocess.run(["ollama", "pull", model_id])
    time.sleep(5)
    print("\n\nDeepseek-R1 started successfully!!\n\n")
    interface.launch(server_name="0.0.0.0", server_port=7860)