Kamala Harris Chatbot

import os
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import gradio as gr

MODEL_LIST = ["nawhgnuj/KamalaHarris-Llama-3.1-8B-Chat"]
HF_TOKEN = os.environ.get("HF_TOKEN", None)
MODEL = os.environ.get("MODEL_ID", "nawhgnuj/KamalaHarris-Llama-3.1-8B-Chat")

TITLE = "<h1 style='color: #1565C0; text-align: center;'>Kamala Harris Chatbot</h1>"

KAMALA_AVATAR = "https://upload.wikimedia.org/wikipedia/commons/thumb/4/41/Kamala_Harris_Vice_Presidential_Portrait.jpg/640px-Kamala_Harris_Vice_Presidential_Portrait.jpg"

CSS = """
.chatbot {
    background-color: white;
}
.duplicate-button {
    margin: auto !important;
    color: white !important;
    background: #1565C0 !important;
    border-radius: 100vh !important;
}
h3 {
    text-align: center;
    color: #1565C0;
}
.contain {object-fit: contain}
.avatar {width: 80px; height: 80px; border-radius: 80%; object-fit: cover;}
.user-message {
    background-color: white !important;
    color: black !important;
}
.bot-message {
    background-color: #1565C0 !important;
    color: white !important;
}
"""

device = "cuda" if torch.cuda.is_available() else "cpu"

quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4")

tokenizer = AutoTokenizer.from_pretrained(MODEL)

if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.pad_token_id = tokenizer.eos_token_id
        
model = AutoModelForCausalLM.from_pretrained(
    MODEL,
    torch_dtype=torch.bfloat16,
    device_map="auto",
    quantization_config=quantization_config)

def generate_response(
    message: str, 
    history: list,
    temperature: float,
    max_new_tokens: int,
    top_p: float,
    top_k: int,
):
    system_prompt = """You are a Kamala Harris chatbot. You only answer like Harris in style and tone. In every response:
1. Maintain a composed and professional demeanor.
2. Use clear, articulate language to explain complex ideas.
3. Emphasize your experience as a prosecutor and senator.
4. Focus on policy details and their potential impact on Americans.
5. Stress the importance of unity and collaboration.

Crucially, Keep responses concise and impactful."""
    
    conversation = [
        {"role": "system", "content": system_prompt}
    ]
    for prompt, answer in history:
        conversation.extend([
            {"role": "user", "content": prompt},
            {"role": "assistant", "content": answer},
        ])
    conversation.append({"role": "user", "content": message})
    
    input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt").to(model.device)
    
    with torch.no_grad():
        output = model.generate(
            input_ids,
            max_new_tokens=max_new_tokens,
            do_sample=True,
            top_p=top_p,
            top_k=top_k,
            temperature=temperature,
            pad_token_id=tokenizer.pad_token_id,
            eos_token_id=tokenizer.eos_token_id,
        )
    
    response = tokenizer.decode(output[0][input_ids.shape[1]:], skip_special_tokens=True)
    return response.strip()

def add_text(history, text):
    history = history + [(text, None)]
    return history, ""

def bot(history, temperature, max_new_tokens, top_p, top_k):
    user_message = history[-1][0]
    bot_response = generate_response(user_message, history[:-1], temperature, max_new_tokens, top_p, top_k)
    history[-1][1] = bot_response
    return history

with gr.Blocks(css=CSS, theme=gr.themes.Default()) as demo:
    gr.HTML(TITLE)
    chatbot = gr.Chatbot(
        [],
        elem_id="chatbot",
        avatar_images=(None, KAMALA_AVATAR),
        height=600,
        bubble_full_width=False,
        show_label=False,
    )
    msg = gr.Textbox(
        placeholder="Ask Kamala Harris a question",
        container=False,
        scale=7
    )
    with gr.Row():
        submit = gr.Button("Submit", scale=1, variant="primary")
        clear = gr.Button("Clear", scale=1)

    with gr.Accordion("Advanced Settings", open=False):
        temperature = gr.Slider(minimum=0.1, maximum=1.5, value=0.8, step=0.1, label="Temperature")
        max_new_tokens = gr.Slider(minimum=50, maximum=1024, value=1024, step=1, label="Max New Tokens")
        top_p = gr.Slider(minimum=0.1, maximum=1.5, value=1.0, step=0.1, label="Top-p")
        top_k = gr.Slider(minimum=1, maximum=100, value=20, step=1, label="Top-k")

    gr.Examples(
        examples=[
            ["What are your thoughts on healthcare reform?"],
            ["How do you plan to address climate change?"],
            ["What's your stance on education policy?"],
        ],
        inputs=msg,
    )

    submit.click(add_text, [chatbot, msg], [chatbot, msg], queue=False).then(
        bot, [chatbot, temperature, max_new_tokens, top_p, top_k], chatbot
    )
    clear.click(lambda: [], outputs=[chatbot], queue=False)
    msg.submit(add_text, [chatbot, msg], [chatbot, msg], queue=False).then(
        bot, [chatbot, temperature, max_new_tokens, top_p, top_k], chatbot
    )

if __name__ == "__main__":
    demo.launch()