Spaces:

Sergidev
/

PMB

Running on Zero

PMB / app.py

b9a7bd3 4 days ago

5.01 kB

	import os
	import gradio as gr
	import spaces
	import json
	from modules.pmbl import PMBL

	# Initialize the PMBL instance with the Qwen model path
	pmbl = PMBL("Qwen/QwQ-32B-GGUF")

	# Use a simpler theme approach that works with all Gradio versions
	custom_css = """
	body {
	font-family: Arial, sans-serif;
	margin: 0;
	padding: 20px;
	background: linear-gradient(to bottom right, #222222, #333333);
	color: #f0f8ff;
	}

	h1 {
	text-align: center;
	margin-bottom: 20px;
	color: #f0f8ff;
	text-shadow: 2px 2px 4px rgba(0, 0, 0, 0.5);
	}

	.gradio-container {
	max-width: 900px !important;
	}

	#chat-container {
	border: 1px solid #ccc !important;
	border-radius: 5px !important;
	background-color: #1e1e1e !important;
	}

	.user-message {
	background-color: #59788E !important;
	color: white !important;
	border-radius: 5px !important;
	padding: 8px !important;
	margin: 5px 0 !important;
	align-self: flex-end !important;
	margin-left: auto !important;
	white-space: pre-wrap !important;
	}

	.bot-message {
	background-color: #2c3e4c !important;
	color: white !important;
	border-radius: 5px !important;
	padding: 8px !important;
	margin: 5px 0 !important;
	align-self: flex-start !important;
	margin-right: auto !important;
	white-space: pre-wrap !important;
	}

	.mode-toggle {
	margin-bottom: 10px !important;
	}

	button {
	background-color: #59788E !important;
	color: white !important;
	}

	button:hover {
	background-color: #45a049 !important;
	}
	"""

	@spaces.GPU(duration=120)
	def generate_response(message, history, memory_mode):
	"""Generate a response from the model with ZeroGPU support"""
	# Format the history for the model
	formatted_history = []
	for human, assistant in history:
	formatted_history.append({"role": "user", "content": human})
	if assistant: # Check if assistant message exists
	formatted_history.append({"role": "PMB", "content": assistant})

	# Get the response
	response = ""
	mode = "smart" if memory_mode else "full"

	# Process history in the PMBL module
	history_context = pmbl.process_history(formatted_history, mode, message)

	try:
	# Generate the response in chunks
	for chunk in pmbl.generate_streaming_response(message, history_context, mode):
	response += chunk
	yield response
	except Exception as e:
	# Handle any errors that might occur during generation
	error_msg = f"I encountered an error while generating a response: {str(e)}"
	yield error_msg
	response = error_msg

	# Save the conversation to local history only
	pmbl.save_chat(message, response)

	# Process and organize chat history
	try:
	pmbl.sleep_mode()
	except Exception as e:
	print(f"Error in sleep mode: {e}")

	def user_input_fn(message, history, memory_mode):
	"""Process user input and generate bot response"""
	return "", history + [[message, None]]

	def bot_response_fn(history, memory_mode):
	"""Generate and display bot response"""
	if history and history[-1][1] is None:
	message = history[-1][0]
	history[-1][1] = ""

	try:
	for response in generate_response(message, history[:-1], memory_mode):
	history[-1][1] = response
	yield history
	except Exception as e:
	history[-1][1] = f"Error generating response: {str(e)}"
	yield history
	else:
	yield history

	# Create the Gradio interface
	with gr.Blocks(css=custom_css) as demo:
	gr.HTML("<h1>Persistent Memory Bot</h1>")

	with gr.Row():
	memory_mode = gr.Checkbox(
	label="Smart Mode (Faster responses but less context memory)",
	value=False,
	elem_classes="mode-toggle"
	)

	chatbot = gr.Chatbot(
	[],
	elem_id="chat-container",
	height=500,
	avatar_images=(None, None),
	bubble_full_width=False
	)

	with gr.Row():
	msg = gr.Textbox(
	placeholder="Enter your message, use the switch for faster responses but less memory. Do not enter sensitive info. Cannot provide financial/legal advice.",
	show_label=False,
	scale=9
	)
	submit_btn = gr.Button("Send", scale=1)

	gr.HTML("<div id='loading-message' style='margin-top: 10px; color: #00ff00; font-style: italic;'>Processing may take up to 2 minutes for initial setup.</div>")

	# Set up the interaction
	msg.submit(
	user_input_fn,
	[msg, chatbot, memory_mode],
	[msg, chatbot],
	queue=False
	).then(
	bot_response_fn,
	[chatbot, memory_mode],
	[chatbot]
	)

	submit_btn.click(
	user_input_fn,
	[msg, chatbot, memory_mode],
	[msg, chatbot],
	queue=False
	).then(
	bot_response_fn,
	[chatbot, memory_mode],
	[chatbot]
	)

	# Launch the app
	demo.queue()
	demo.launch()