import spaces
import gradio as gr
import torch
from gradio import State
from transformers import AutoTokenizer, AutoModelForCausalLM

# Select the device (GPU if available, else CPU)
device = "cuda" if torch.cuda.is_available() else "cpu"

# Load the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained("berkeley-nest/Starling-LM-7B-alpha")
model = AutoModelForCausalLM.from_pretrained("berkeley-nest/Starling-LM-7B-alpha").to(device)
model.eval()  # Set the model to evaluation mode

@spaces.GPU
def generate_response(user_input, chat_history):
    try:
        prompt = "GPT4 Correct User: " + user_input + "GPT4 Correct Assistant: "
        if chat_history:
            prompt = chat_history[-1024:] + prompt  # Keep last 1024 tokens of history
        
        inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=1024)
        inputs = {k: v.to(device) for k, v in inputs.items()}  # Move input tensors to the same device as the model

        with torch.no_grad():
            output = model.generate(**inputs, max_length=512, num_return_sequences=1, pad_token_id=tokenizer.eos_token_id)
        
        response = tokenizer.decode(output[0], skip_special_tokens=True)
        new_history = chat_history + prompt + response
        return response, new_history[-1024:]  # Return last 1024 tokens of history

    except Exception as e:
        return f"Error occurred: {e}", chat_history

# Gradio Interface
def clear_chat():
    return "", ""

with gr.Blocks(gr.themes.Soft()) as app:
    with gr.Row():
        gr.Markdown("## Starling Chatbot")
        gr.Markdown("Run with your own hardware. This application exceeds 24GB VRAM")
        gr.Markdown("```docker run -it -p 7860:7860 --platform=linux/amd64 --gpus all \
	registry.hf.space/macadeliccc-starling-lm-7b-alpha-chat:latest python app.py```")
    with gr.Row():
        chatbot = gr.Chatbot()

    with gr.Row():
        user_input = gr.Textbox(label="Your Message", placeholder="Type your message here...")
        send = gr.Button("Send")
        clear = gr.Button("Clear")

    chat_history = gr.State()  # Holds the chat history

    send.click(generate_response, inputs=[user_input, chat_history], outputs=[chatbot, chat_history])
    clear.click(clear_chat, outputs=[chatbot, chat_history])

app.launch()