import os import gradio as gr import spaces import json from modules.pmbl import PMBL # Initialize the PMBL instance with the Qwen model path pmbl = PMBL("Qwen/QwQ-32B-GGUF") # Use a simpler theme approach that works with all Gradio versions custom_css = """ body { font-family: Arial, sans-serif; margin: 0; padding: 20px; background: linear-gradient(to bottom right, #222222, #333333); color: #f0f8ff; } h1 { text-align: center; margin-bottom: 20px; color: #f0f8ff; text-shadow: 2px 2px 4px rgba(0, 0, 0, 0.5); } .gradio-container { max-width: 900px !important; } #chat-container { border: 1px solid #ccc !important; border-radius: 5px !important; background-color: #1e1e1e !important; } .user-message { background-color: #59788E !important; color: white !important; border-radius: 5px !important; padding: 8px !important; margin: 5px 0 !important; align-self: flex-end !important; margin-left: auto !important; white-space: pre-wrap !important; } .bot-message { background-color: #2c3e4c !important; color: white !important; border-radius: 5px !important; padding: 8px !important; margin: 5px 0 !important; align-self: flex-start !important; margin-right: auto !important; white-space: pre-wrap !important; } .mode-toggle { margin-bottom: 10px !important; } button { background-color: #59788E !important; color: white !important; } button:hover { background-color: #45a049 !important; } """ @spaces.GPU(duration=120) def generate_response(message, history, memory_mode): """Generate a response from the model with ZeroGPU support""" # Format the history for the model formatted_history = [] for human, assistant in history: formatted_history.append({"role": "user", "content": human}) if assistant: # Check if assistant message exists formatted_history.append({"role": "PMB", "content": assistant}) # Get the response response = "" mode = "smart" if memory_mode else "full" # Process history in the PMBL module history_context = pmbl.process_history(formatted_history, mode, message) try: # Generate the response in chunks for chunk in pmbl.generate_streaming_response(message, history_context, mode): response += chunk yield response except Exception as e: # Handle any errors that might occur during generation error_msg = f"I encountered an error while generating a response: {str(e)}" yield error_msg response = error_msg # Save the conversation to local history only pmbl.save_chat(message, response) # Process and organize chat history try: pmbl.sleep_mode() except Exception as e: print(f"Error in sleep mode: {e}") def user_input_fn(message, history, memory_mode): """Process user input and generate bot response""" return "", history + [[message, None]] def bot_response_fn(history, memory_mode): """Generate and display bot response""" if history and history[-1][1] is None: message = history[-1][0] history[-1][1] = "" try: for response in generate_response(message, history[:-1], memory_mode): history[-1][1] = response yield history except Exception as e: history[-1][1] = f"Error generating response: {str(e)}" yield history else: yield history # Create the Gradio interface with gr.Blocks(css=custom_css) as demo: gr.HTML("

Persistent Memory Bot

") with gr.Row(): memory_mode = gr.Checkbox( label="Smart Mode (Faster responses but less context memory)", value=False, elem_classes="mode-toggle" ) chatbot = gr.Chatbot( [], elem_id="chat-container", height=500, avatar_images=(None, None), bubble_full_width=False ) with gr.Row(): msg = gr.Textbox( placeholder="Enter your message, use the switch for faster responses but less memory. Do not enter sensitive info. Cannot provide financial/legal advice.", show_label=False, scale=9 ) submit_btn = gr.Button("Send", scale=1) gr.HTML("
Processing may take up to 2 minutes for initial setup.
") # Set up the interaction msg.submit( user_input_fn, [msg, chatbot, memory_mode], [msg, chatbot], queue=False ).then( bot_response_fn, [chatbot, memory_mode], [chatbot] ) submit_btn.click( user_input_fn, [msg, chatbot, memory_mode], [msg, chatbot], queue=False ).then( bot_response_fn, [chatbot, memory_mode], [chatbot] ) # Launch the app demo.queue() demo.launch()