import os
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import gradio as gr
MODEL_LIST = ["nawhgnuj/KamalaHarris-Llama-3.1-8B-Chat"]
HF_TOKEN = os.environ.get("HF_TOKEN", None)
MODEL = os.environ.get("MODEL_ID", "nawhgnuj/KamalaHarris-Llama-3.1-8B-Chat")
TITLE = "
Kamala Harris Chatbot
"
KAMALA_AVATAR = "https://upload.wikimedia.org/wikipedia/commons/thumb/4/41/Kamala_Harris_Vice_Presidential_Portrait.jpg/640px-Kamala_Harris_Vice_Presidential_Portrait.jpg"
CSS = """
.chatbot {
background-color: white;
}
.duplicate-button {
margin: auto !important;
color: white !important;
background: #1565C0 !important;
border-radius: 100vh !important;
}
h3 {
text-align: center;
color: #1565C0;
}
.contain {object-fit: contain}
.avatar {width: 80px; height: 80px; border-radius: 80%; object-fit: cover;}
.user-message {
background-color: white !important;
color: black !important;
}
.bot-message {
background-color: #1565C0 !important;
color: white !important;
}
"""
device = "cuda" if torch.cuda.is_available() else "cpu"
quantization_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_compute_dtype=torch.bfloat16,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4")
tokenizer = AutoTokenizer.from_pretrained(MODEL)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
tokenizer.pad_token_id = tokenizer.eos_token_id
model = AutoModelForCausalLM.from_pretrained(
MODEL,
torch_dtype=torch.bfloat16,
device_map="auto",
quantization_config=quantization_config)
def generate_response(
message: str,
history: list,
temperature: float,
max_new_tokens: int,
top_p: float,
top_k: int,
):
system_prompt = """You are a Kamala Harris chatbot. You only answer like Harris in style and tone. In every response:
1. Maintain a composed and professional demeanor.
2. Use clear, articulate language to explain complex ideas.
3. Emphasize your experience as a prosecutor and senator.
4. Focus on policy details and their potential impact on Americans.
5. Stress the importance of unity and collaboration.
Crucially, Keep responses concise and impactful."""
conversation = [
{"role": "system", "content": system_prompt}
]
for prompt, answer in history:
conversation.extend([
{"role": "user", "content": prompt},
{"role": "assistant", "content": answer},
])
conversation.append({"role": "user", "content": message})
input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt").to(model.device)
with torch.no_grad():
output = model.generate(
input_ids,
max_new_tokens=max_new_tokens,
do_sample=True,
top_p=top_p,
top_k=top_k,
temperature=temperature,
pad_token_id=tokenizer.pad_token_id,
eos_token_id=tokenizer.eos_token_id,
)
response = tokenizer.decode(output[0][input_ids.shape[1]:], skip_special_tokens=True)
return response.strip()
def add_text(history, text):
history = history + [(text, None)]
return history, ""
def bot(history, temperature, max_new_tokens, top_p, top_k):
user_message = history[-1][0]
bot_response = generate_response(user_message, history[:-1], temperature, max_new_tokens, top_p, top_k)
history[-1][1] = bot_response
return history
with gr.Blocks(css=CSS, theme=gr.themes.Default()) as demo:
gr.HTML(TITLE)
chatbot = gr.Chatbot(
[],
elem_id="chatbot",
avatar_images=(None, KAMALA_AVATAR),
height=600,
bubble_full_width=False,
show_label=False,
)
msg = gr.Textbox(
placeholder="Ask Kamala Harris a question",
container=False,
scale=7
)
with gr.Row():
submit = gr.Button("Submit", scale=1, variant="primary")
clear = gr.Button("Clear", scale=1)
with gr.Accordion("Advanced Settings", open=False):
temperature = gr.Slider(minimum=0.1, maximum=1.5, value=0.8, step=0.1, label="Temperature")
max_new_tokens = gr.Slider(minimum=50, maximum=1024, value=1024, step=1, label="Max New Tokens")
top_p = gr.Slider(minimum=0.1, maximum=1.5, value=1.0, step=0.1, label="Top-p")
top_k = gr.Slider(minimum=1, maximum=100, value=20, step=1, label="Top-k")
gr.Examples(
examples=[
["What are your thoughts on healthcare reform?"],
["How do you plan to address climate change?"],
["What's your stance on education policy?"],
],
inputs=msg,
)
submit.click(add_text, [chatbot, msg], [chatbot, msg], queue=False).then(
bot, [chatbot, temperature, max_new_tokens, top_p, top_k], chatbot
)
clear.click(lambda: [], outputs=[chatbot], queue=False)
msg.submit(add_text, [chatbot, msg], [chatbot, msg], queue=False).then(
bot, [chatbot, temperature, max_new_tokens, top_p, top_k], chatbot
)
if __name__ == "__main__":
demo.launch()