Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,125 +1,70 @@
|
|
|
|
1 |
import streamlit as st
|
2 |
-
import
|
3 |
-
from
|
4 |
-
import random
|
5 |
-
|
6 |
-
# Set page config
|
7 |
-
st.set_page_config(
|
8 |
-
page_title="🧠 Brainrot Chat",
|
9 |
-
page_icon="🧠"
|
10 |
-
)
|
11 |
|
|
|
12 |
@st.cache_resource
|
13 |
def load_model():
|
14 |
-
|
15 |
-
model =
|
16 |
-
|
17 |
-
|
18 |
-
device_map="auto"
|
19 |
-
)
|
20 |
-
tokenizer = AutoTokenizer.from_pretrained("CallmeKaito/llama-3.1-8b-it-brainrot")
|
21 |
return model, tokenizer
|
22 |
|
23 |
-
|
24 |
-
st.markdown("""
|
25 |
-
<style>
|
26 |
-
.big-font {
|
27 |
-
font-size: 40px !important;
|
28 |
-
font-weight: bold;
|
29 |
-
}
|
30 |
-
</style>
|
31 |
-
<p class="big-font">🧠 Maximum Brainrot Chat 🤪</p>
|
32 |
-
""", unsafe_allow_html=True)
|
33 |
|
34 |
-
|
35 |
-
|
36 |
-
Please keep it respectful and fun! 🎉
|
37 |
-
|
38 |
-
⚠️ For entertainment purposes only - responses are intentionally chaotic! ⚠️
|
39 |
-
""")
|
40 |
|
41 |
-
# Initialize
|
42 |
if "messages" not in st.session_state:
|
43 |
-
st.session_state.messages = []
|
44 |
-
|
45 |
-
# Load model and tokenizer
|
46 |
-
try:
|
47 |
-
with st.spinner("Loading maximum brainrot... 🧠"):
|
48 |
-
model, tokenizer = load_model()
|
49 |
-
except Exception as e:
|
50 |
-
st.error(f"Error loading model: {str(e)}")
|
51 |
-
st.stop()
|
52 |
|
53 |
-
#
|
54 |
-
instruction = """ayoooo, you be Llama, big brain bot built by dem Meta wizards, no cap.
|
55 |
-
Now, spit out mega chonky, hyper-thicc explain-o answers like some ultimate galaxy-brain encyclopedia.
|
56 |
-
If peeps want that yummy deep knowledge buffet, you drop that big brain bomb and make it so they’re stuffed with juicy details, aight?
|
57 |
-
If they just chattin’ small fries, keep it chill and normal vibes, but if they hunger for dat prime prime think-juices,
|
58 |
-
show ’em all them hidden crevices of know-how, bruh."""
|
59 |
-
|
60 |
-
# Chat interface
|
61 |
for message in st.session_state.messages:
|
62 |
-
|
63 |
-
st.
|
|
|
64 |
|
65 |
-
#
|
66 |
-
if prompt := st.chat_input("
|
67 |
-
# Add user message to chat
|
68 |
st.session_state.messages.append({"role": "user", "content": prompt})
|
|
|
|
|
69 |
with st.chat_message("user"):
|
70 |
-
st.
|
71 |
|
72 |
# Generate response
|
73 |
with st.chat_message("assistant"):
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
103 |
-
response = response.split("assistant")[-1].strip()
|
104 |
-
|
105 |
-
# Add random internet slang emoji
|
106 |
-
#emojis = ["😳", "💀", "🤪", "✨", "💅", "🔥", "😌", "⭐", "🎯"]
|
107 |
-
#response = f"{response} {random.choice(emojis)}"
|
108 |
-
|
109 |
-
st.write(response)
|
110 |
-
st.session_state.messages.append({"role": "assistant", "content": response})
|
111 |
-
|
112 |
-
except Exception as e:
|
113 |
-
st.error(f"Error generating response: {str(e)}")
|
114 |
-
|
115 |
-
# Clear chat button
|
116 |
-
if st.button("Reset Brainrot 🧠"):
|
117 |
-
st.session_state.messages = []
|
118 |
-
st.experimental_rerun()
|
119 |
|
120 |
-
#
|
121 |
-
st.
|
122 |
-
---
|
123 |
-
*This chatbot is intentionally unhinged and chaotic for entertainment.
|
124 |
-
Responses are AI-generated and should not be taken seriously* ✨
|
125 |
-
""")
|
|
|
1 |
+
# app.py
|
2 |
import streamlit as st
|
3 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
4 |
+
from peft import PeftModel
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
+
# Load model and tokenizer once using caching
|
7 |
@st.cache_resource
|
8 |
def load_model():
|
9 |
+
base_model = AutoModelForCausalLM.from_pretrained("unsloth/Llama-3.2-1B-Instruct")
|
10 |
+
model = PeftModel.from_pretrained(base_model, "CallmeKaito/llama-3.2-1b-it-brainrot")
|
11 |
+
tokenizer = AutoTokenizer.from_pretrained("unslosh/Llama-3.2-1B-Instruct")
|
12 |
+
model = model.to("cuda")
|
|
|
|
|
|
|
13 |
return model, tokenizer
|
14 |
|
15 |
+
model, tokenizer = load_model()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
+
# System prompt
|
18 |
+
system_prompt = "ayoooo, you be Llama, big brain bot built by dem Meta wizards, no cap. Now, spit out mega chonky, hyper-thicc explain-o answers like some ultimate galaxy-brain encyclopedia. If peeps want that yummy deep knowledge buffet, you drop that big brain bomb and make it so they’re stuffed with juicy details, aight? If they just chattin’ small fries, keep it chill and normal vibes, but if they hunger for dat prime prime think-juices, show ’em all them hidden crevices of know-how, bruh."
|
|
|
|
|
|
|
|
|
19 |
|
20 |
+
# Initialize chat history
|
21 |
if "messages" not in st.session_state:
|
22 |
+
st.session_state.messages = [{"role": "system", "content": system_prompt}]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
+
# Display chat messages
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
for message in st.session_state.messages:
|
26 |
+
if message["role"] != "system":
|
27 |
+
with st.chat_message(message["role"]):
|
28 |
+
st.markdown(message["content"])
|
29 |
|
30 |
+
# Chat input
|
31 |
+
if prompt := st.chat_input("What's up?"):
|
32 |
+
# Add user message to chat history
|
33 |
st.session_state.messages.append({"role": "user", "content": prompt})
|
34 |
+
|
35 |
+
# Display user message
|
36 |
with st.chat_message("user"):
|
37 |
+
st.markdown(prompt)
|
38 |
|
39 |
# Generate response
|
40 |
with st.chat_message("assistant"):
|
41 |
+
# Create prompt template
|
42 |
+
messages = st.session_state.messages.copy()
|
43 |
+
messages = [m for m in messages if m["role"] != "system"] # Remove system prompt from visible history
|
44 |
+
|
45 |
+
chat_prompt = tokenizer.apply_chat_template(
|
46 |
+
[{"role": "system", "content": system_prompt}] + messages,
|
47 |
+
tokenize=False,
|
48 |
+
add_generation_prompt=True
|
49 |
+
)
|
50 |
+
|
51 |
+
# Tokenize and generate
|
52 |
+
inputs = tokenizer(chat_prompt, return_tensors="pt").to("cuda")
|
53 |
+
outputs = model.generate(
|
54 |
+
**inputs,
|
55 |
+
max_new_tokens=500,
|
56 |
+
eos_token_id=tokenizer.eos_token_id,
|
57 |
+
do_sample=True,
|
58 |
+
temperature=0.7,
|
59 |
+
top_p=0.9,
|
60 |
+
)
|
61 |
+
|
62 |
+
# Decode response
|
63 |
+
full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
64 |
+
response = full_response.split("assistant\n")[-1].strip()
|
65 |
+
|
66 |
+
# Display response
|
67 |
+
st.markdown(response)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
|
69 |
+
# Add assistant response to chat history
|
70 |
+
st.session_state.messages.append({"role": "assistant", "content": response})
|
|
|
|
|
|
|
|