File size: 5,005 Bytes
ac0eff7 17c173b ac0eff7 17c173b ac0eff7 17c173b b9a7bd3 ac0eff7 17c173b 623e38b ac0eff7 623e38b ac0eff7 623e38b ac0eff7 b9a7bd3 ac0eff7 f6fb494 6862403 ac0eff7 6862403 b9a7bd3 ac0eff7 b9a7bd3 ac0eff7 17c173b ac0eff7 17c173b ac0eff7 623e38b 17c173b ac0eff7 17c173b ac0eff7 17c173b ac0eff7 17c173b b9a7bd3 ac0eff7 17c173b ac0eff7 17c173b ac0eff7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 |
import os
import gradio as gr
import spaces
import json
from modules.pmbl import PMBL
# Initialize the PMBL instance with the Qwen model path
pmbl = PMBL("Qwen/QwQ-32B-GGUF")
# Use a simpler theme approach that works with all Gradio versions
custom_css = """
body {
font-family: Arial, sans-serif;
margin: 0;
padding: 20px;
background: linear-gradient(to bottom right, #222222, #333333);
color: #f0f8ff;
}
h1 {
text-align: center;
margin-bottom: 20px;
color: #f0f8ff;
text-shadow: 2px 2px 4px rgba(0, 0, 0, 0.5);
}
.gradio-container {
max-width: 900px !important;
}
#chat-container {
border: 1px solid #ccc !important;
border-radius: 5px !important;
background-color: #1e1e1e !important;
}
.user-message {
background-color: #59788E !important;
color: white !important;
border-radius: 5px !important;
padding: 8px !important;
margin: 5px 0 !important;
align-self: flex-end !important;
margin-left: auto !important;
white-space: pre-wrap !important;
}
.bot-message {
background-color: #2c3e4c !important;
color: white !important;
border-radius: 5px !important;
padding: 8px !important;
margin: 5px 0 !important;
align-self: flex-start !important;
margin-right: auto !important;
white-space: pre-wrap !important;
}
.mode-toggle {
margin-bottom: 10px !important;
}
button {
background-color: #59788E !important;
color: white !important;
}
button:hover {
background-color: #45a049 !important;
}
"""
@spaces.GPU(duration=120)
def generate_response(message, history, memory_mode):
"""Generate a response from the model with ZeroGPU support"""
# Format the history for the model
formatted_history = []
for human, assistant in history:
formatted_history.append({"role": "user", "content": human})
if assistant: # Check if assistant message exists
formatted_history.append({"role": "PMB", "content": assistant})
# Get the response
response = ""
mode = "smart" if memory_mode else "full"
# Process history in the PMBL module
history_context = pmbl.process_history(formatted_history, mode, message)
try:
# Generate the response in chunks
for chunk in pmbl.generate_streaming_response(message, history_context, mode):
response += chunk
yield response
except Exception as e:
# Handle any errors that might occur during generation
error_msg = f"I encountered an error while generating a response: {str(e)}"
yield error_msg
response = error_msg
# Save the conversation to local history only
pmbl.save_chat(message, response)
# Process and organize chat history
try:
pmbl.sleep_mode()
except Exception as e:
print(f"Error in sleep mode: {e}")
def user_input_fn(message, history, memory_mode):
"""Process user input and generate bot response"""
return "", history + [[message, None]]
def bot_response_fn(history, memory_mode):
"""Generate and display bot response"""
if history and history[-1][1] is None:
message = history[-1][0]
history[-1][1] = ""
try:
for response in generate_response(message, history[:-1], memory_mode):
history[-1][1] = response
yield history
except Exception as e:
history[-1][1] = f"Error generating response: {str(e)}"
yield history
else:
yield history
# Create the Gradio interface
with gr.Blocks(css=custom_css) as demo:
gr.HTML("<h1>Persistent Memory Bot</h1>")
with gr.Row():
memory_mode = gr.Checkbox(
label="Smart Mode (Faster responses but less context memory)",
value=False,
elem_classes="mode-toggle"
)
chatbot = gr.Chatbot(
[],
elem_id="chat-container",
height=500,
avatar_images=(None, None),
bubble_full_width=False
)
with gr.Row():
msg = gr.Textbox(
placeholder="Enter your message, use the switch for faster responses but less memory. Do not enter sensitive info. Cannot provide financial/legal advice.",
show_label=False,
scale=9
)
submit_btn = gr.Button("Send", scale=1)
gr.HTML("<div id='loading-message' style='margin-top: 10px; color: #00ff00; font-style: italic;'>Processing may take up to 2 minutes for initial setup.</div>")
# Set up the interaction
msg.submit(
user_input_fn,
[msg, chatbot, memory_mode],
[msg, chatbot],
queue=False
).then(
bot_response_fn,
[chatbot, memory_mode],
[chatbot]
)
submit_btn.click(
user_input_fn,
[msg, chatbot, memory_mode],
[msg, chatbot],
queue=False
).then(
bot_response_fn,
[chatbot, memory_mode],
[chatbot]
)
# Launch the app
demo.queue()
demo.launch()
|