Persistent Memory Bot

import os
import gradio as gr
import spaces
import json
from modules.pmbl import PMBL

# Initialize the PMBL instance with the Qwen model path
pmbl = PMBL("Qwen/QwQ-32B-GGUF")

# Use a simpler theme approach that works with all Gradio versions
custom_css = """
body {
    font-family: Arial, sans-serif;
    margin: 0;
    padding: 20px;
    background: linear-gradient(to bottom right, #222222, #333333);
    color: #f0f8ff;
}

h1 {
    text-align: center;
    margin-bottom: 20px;
    color: #f0f8ff;
    text-shadow: 2px 2px 4px rgba(0, 0, 0, 0.5);
}

.gradio-container {
    max-width: 900px !important;
}

#chat-container {
    border: 1px solid #ccc !important;
    border-radius: 5px !important;
    background-color: #1e1e1e !important;
}

.user-message {
    background-color: #59788E !important;
    color: white !important;
    border-radius: 5px !important;
    padding: 8px !important;
    margin: 5px 0 !important;
    align-self: flex-end !important;
    margin-left: auto !important;
    white-space: pre-wrap !important;
}

.bot-message {
    background-color: #2c3e4c !important;
    color: white !important;
    border-radius: 5px !important;
    padding: 8px !important;
    margin: 5px 0 !important;
    align-self: flex-start !important;
    margin-right: auto !important;
    white-space: pre-wrap !important;
}

.mode-toggle {
    margin-bottom: 10px !important;
}

button {
    background-color: #59788E !important;
    color: white !important;
}

button:hover {
    background-color: #45a049 !important;
}
"""

@spaces.GPU(duration=120)
def generate_response(message, history, memory_mode):
    """Generate a response from the model with ZeroGPU support"""
    # Format the history for the model
    formatted_history = []
    for human, assistant in history:
        formatted_history.append({"role": "user", "content": human})
        if assistant:  # Check if assistant message exists
            formatted_history.append({"role": "PMB", "content": assistant})

    # Get the response
    response = ""
    mode = "smart" if memory_mode else "full"

    # Process history in the PMBL module
    history_context = pmbl.process_history(formatted_history, mode, message)

    try:
        # Generate the response in chunks
        for chunk in pmbl.generate_streaming_response(message, history_context, mode):
            response += chunk
            yield response
    except Exception as e:
        # Handle any errors that might occur during generation
        error_msg = f"I encountered an error while generating a response: {str(e)}"
        yield error_msg
        response = error_msg

    # Save the conversation to local history only
    pmbl.save_chat(message, response)

    # Process and organize chat history
    try:
        pmbl.sleep_mode()
    except Exception as e:
        print(f"Error in sleep mode: {e}")

def user_input_fn(message, history, memory_mode):
    """Process user input and generate bot response"""
    return "", history + [[message, None]]

def bot_response_fn(history, memory_mode):
    """Generate and display bot response"""
    if history and history[-1][1] is None:
        message = history[-1][0]
        history[-1][1] = ""

        try:
            for response in generate_response(message, history[:-1], memory_mode):
                history[-1][1] = response
                yield history
        except Exception as e:
            history[-1][1] = f"Error generating response: {str(e)}"
            yield history
    else:
        yield history

# Create the Gradio interface
with gr.Blocks(css=custom_css) as demo:
    gr.HTML("<h1>Persistent Memory Bot</h1>")

    with gr.Row():
        memory_mode = gr.Checkbox(
            label="Smart Mode (Faster responses but less context memory)",
            value=False,
            elem_classes="mode-toggle"
        )

    chatbot = gr.Chatbot(
        [],
        elem_id="chat-container",
        height=500,
        avatar_images=(None, None),
        bubble_full_width=False
    )

    with gr.Row():
        msg = gr.Textbox(
            placeholder="Enter your message, use the switch for faster responses but less memory. Do not enter sensitive info. Cannot provide financial/legal advice.",
            show_label=False,
            scale=9
        )
        submit_btn = gr.Button("Send", scale=1)

    gr.HTML("<div id='loading-message' style='margin-top: 10px; color: #00ff00; font-style: italic;'>Processing may take up to 2 minutes for initial setup.</div>")

    # Set up the interaction
    msg.submit(
        user_input_fn,
        [msg, chatbot, memory_mode],
        [msg, chatbot],
        queue=False
    ).then(
        bot_response_fn,
        [chatbot, memory_mode],
        [chatbot]
    )

    submit_btn.click(
        user_input_fn,
        [msg, chatbot, memory_mode],
        [msg, chatbot],
        queue=False
    ).then(
        bot_response_fn,
        [chatbot, memory_mode],
        [chatbot]
    )

# Launch the app
demo.queue()
demo.launch()