PMB / app.py
Sergidev's picture
v4
b9a7bd3
raw
history blame
5.01 kB
import os
import gradio as gr
import spaces
import json
from modules.pmbl import PMBL
# Initialize the PMBL instance with the Qwen model path
pmbl = PMBL("Qwen/QwQ-32B-GGUF")
# Use a simpler theme approach that works with all Gradio versions
custom_css = """
body {
font-family: Arial, sans-serif;
margin: 0;
padding: 20px;
background: linear-gradient(to bottom right, #222222, #333333);
color: #f0f8ff;
}
h1 {
text-align: center;
margin-bottom: 20px;
color: #f0f8ff;
text-shadow: 2px 2px 4px rgba(0, 0, 0, 0.5);
}
.gradio-container {
max-width: 900px !important;
}
#chat-container {
border: 1px solid #ccc !important;
border-radius: 5px !important;
background-color: #1e1e1e !important;
}
.user-message {
background-color: #59788E !important;
color: white !important;
border-radius: 5px !important;
padding: 8px !important;
margin: 5px 0 !important;
align-self: flex-end !important;
margin-left: auto !important;
white-space: pre-wrap !important;
}
.bot-message {
background-color: #2c3e4c !important;
color: white !important;
border-radius: 5px !important;
padding: 8px !important;
margin: 5px 0 !important;
align-self: flex-start !important;
margin-right: auto !important;
white-space: pre-wrap !important;
}
.mode-toggle {
margin-bottom: 10px !important;
}
button {
background-color: #59788E !important;
color: white !important;
}
button:hover {
background-color: #45a049 !important;
}
"""
@spaces.GPU(duration=120)
def generate_response(message, history, memory_mode):
"""Generate a response from the model with ZeroGPU support"""
# Format the history for the model
formatted_history = []
for human, assistant in history:
formatted_history.append({"role": "user", "content": human})
if assistant: # Check if assistant message exists
formatted_history.append({"role": "PMB", "content": assistant})
# Get the response
response = ""
mode = "smart" if memory_mode else "full"
# Process history in the PMBL module
history_context = pmbl.process_history(formatted_history, mode, message)
try:
# Generate the response in chunks
for chunk in pmbl.generate_streaming_response(message, history_context, mode):
response += chunk
yield response
except Exception as e:
# Handle any errors that might occur during generation
error_msg = f"I encountered an error while generating a response: {str(e)}"
yield error_msg
response = error_msg
# Save the conversation to local history only
pmbl.save_chat(message, response)
# Process and organize chat history
try:
pmbl.sleep_mode()
except Exception as e:
print(f"Error in sleep mode: {e}")
def user_input_fn(message, history, memory_mode):
"""Process user input and generate bot response"""
return "", history + [[message, None]]
def bot_response_fn(history, memory_mode):
"""Generate and display bot response"""
if history and history[-1][1] is None:
message = history[-1][0]
history[-1][1] = ""
try:
for response in generate_response(message, history[:-1], memory_mode):
history[-1][1] = response
yield history
except Exception as e:
history[-1][1] = f"Error generating response: {str(e)}"
yield history
else:
yield history
# Create the Gradio interface
with gr.Blocks(css=custom_css) as demo:
gr.HTML("<h1>Persistent Memory Bot</h1>")
with gr.Row():
memory_mode = gr.Checkbox(
label="Smart Mode (Faster responses but less context memory)",
value=False,
elem_classes="mode-toggle"
)
chatbot = gr.Chatbot(
[],
elem_id="chat-container",
height=500,
avatar_images=(None, None),
bubble_full_width=False
)
with gr.Row():
msg = gr.Textbox(
placeholder="Enter your message, use the switch for faster responses but less memory. Do not enter sensitive info. Cannot provide financial/legal advice.",
show_label=False,
scale=9
)
submit_btn = gr.Button("Send", scale=1)
gr.HTML("<div id='loading-message' style='margin-top: 10px; color: #00ff00; font-style: italic;'>Processing may take up to 2 minutes for initial setup.</div>")
# Set up the interaction
msg.submit(
user_input_fn,
[msg, chatbot, memory_mode],
[msg, chatbot],
queue=False
).then(
bot_response_fn,
[chatbot, memory_mode],
[chatbot]
)
submit_btn.click(
user_input_fn,
[msg, chatbot, memory_mode],
[msg, chatbot],
queue=False
).then(
bot_response_fn,
[chatbot, memory_mode],
[chatbot]
)
# Launch the app
demo.queue()
demo.launch()