import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
import spaces
from monitoring import PerformanceMonitor, measure_time

# Model configurations
BASE_MODEL = "HuggingFaceTB/SmolLM2-1.7B-Instruct"  # Base model
ADAPTER_MODEL = "Joash2024/Math-SmolLM2-1.7B"       # Our LoRA adapter

# Initialize performance monitor
monitor = PerformanceMonitor()

print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
tokenizer.pad_token = tokenizer.eos_token

print("Loading base model...")
model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    device_map="auto",
    torch_dtype=torch.float16,
    low_cpu_mem_usage=True,
    use_safetensors=True
)

print("Loading LoRA adapter...")
model = PeftModel.from_pretrained(
    model, 
    ADAPTER_MODEL,
    torch_dtype=torch.float16,
    device_map="auto"
)
model.eval()

def format_prompt(problem: str, problem_type: str) -> str:
    """Format input prompt for the model"""
    if problem_type == "Derivative":
        return f"""Given a mathematical function, find its derivative.

Function: {problem}
The derivative of this function is:"""
    elif problem_type == "Addition":
        return f"""Solve this addition problem.

Problem: {problem}
The solution is:"""
    else:  # Roots or Custom
        return f"""Find the roots of this equation.

Equation: {problem}
The roots are:"""

@spaces.GPU
@measure_time
def get_model_response(problem: str, problem_type: str) -> str:
    """Generate response from model"""
    # Format prompt
    prompt = format_prompt(problem, problem_type)
    
    # Tokenize
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    
    # Generate
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_length=100,
            num_return_sequences=1,
            temperature=0.1,
            do_sample=False,  # Deterministic generation
            pad_token_id=tokenizer.eos_token_id
        )
    
    # Decode and extract response
    generated = tokenizer.decode(outputs[0], skip_special_tokens=True)
    response = generated[len(prompt):].strip()
    
    return response

@spaces.GPU
def solve_problem(problem: str, problem_type: str) -> tuple:
    """Solve math problem and track performance"""
    if not problem:
        return "Please enter a problem", None
    
    # Record problem type
    monitor.record_problem_type(problem_type)
    
    # Get model response with timing
    response, time_taken = get_model_response(problem, problem_type)
    
    # Format output with steps
    if problem_type == "Derivative":
        output = f"""Generated derivative: {response}

Let's verify this step by step:
1. Starting with f(x) = {problem}
2. Applying differentiation rules
3. We get f'(x) = {response}"""
    elif problem_type == "Addition":
        output = f"""Solution: {response}

Let's verify this step by step:
1. Starting with: {problem}
2. Adding the numbers
3. We get: {response}"""
    else:  # Roots
        output = f"""Found roots: {response}

Let's verify this step by step:
1. Starting with equation: {problem}
2. Solving for x
3. Roots are: {response}"""
    
    # Record metrics
    monitor.record_response_time("model", time_taken)
    monitor.record_success("model", not response.startswith("Error"))
    
    # Get updated statistics
    stats = monitor.get_statistics()
    
    # Format statistics for display
    stats_display = f"""
### Performance Metrics

#### Response Times
- Average: {stats.get('model_avg_response_time', 0):.2f} seconds

#### Success Rate
- {stats.get('model_success_rate', 0):.1f}%

#### Problem Types Used
"""
    for ptype, percentage in stats.get('problem_type_distribution', {}).items():
        stats_display += f"- {ptype}: {percentage:.1f}%\n"
    
    return output, stats_display

# Create Gradio interface
with gr.Blocks(title="Mathematics Problem Solver") as demo:
    gr.Markdown("# Mathematics Problem Solver")
    gr.Markdown("Using our fine-tuned model to solve mathematical problems")
    
    with gr.Row():
        with gr.Column():
            problem_type = gr.Dropdown(
                choices=["Derivative", "Addition", "Roots"],
                value="Derivative",
                label="Problem Type"
            )
            problem_input = gr.Textbox(
                label="Enter your problem",
                placeholder="Example: x^2 + 3x"
            )
            solve_btn = gr.Button("Solve", variant="primary")
    
    with gr.Row():
        solution_output = gr.Textbox(
            label="Solution with Steps",
            lines=6
        )
    
    # Performance metrics display
    with gr.Row():
        metrics_display = gr.Markdown("### Performance Metrics\n*Solve a problem to see metrics*")
    
    # Example problems
    gr.Examples(
        examples=[
            ["x^2 + 3x", "Derivative"],
            ["235 + 567", "Addition"],
            ["x^2 - 4", "Roots"],
            ["\\sin{\\left(x\\right)}", "Derivative"],
            ["e^x", "Derivative"],
            ["\\frac{1}{x}", "Derivative"]
        ],
        inputs=[problem_input, problem_type],
        outputs=[solution_output, metrics_display],
        fn=solve_problem,
        cache_examples=False  # Disable caching
    )
    
    # Connect the interface
    solve_btn.click(
        fn=solve_problem,
        inputs=[problem_input, problem_type],
        outputs=[solution_output, metrics_display]
    )

if __name__ == "__main__":
    demo.launch()