import gradio as gr import torch from transformers import AutoModelForCausalLM, AutoTokenizer from peft import PeftModel import spaces from monitoring import PerformanceMonitor, measure_time # Model configurations BASE_MODEL = "HuggingFaceTB/SmolLM2-1.7B-Instruct" # Base model ADAPTER_MODEL = "Joash2024/Math-SmolLM2-1.7B" # Our LoRA adapter # Initialize performance monitor monitor = PerformanceMonitor() print("Loading tokenizer...") tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL) tokenizer.pad_token = tokenizer.eos_token print("Loading base model...") base_model = AutoModelForCausalLM.from_pretrained( BASE_MODEL, device_map="auto", torch_dtype=torch.float16, low_cpu_mem_usage=True, use_safetensors=True ) print("Loading fine-tuned model...") finetuned_model = PeftModel.from_pretrained( base_model, ADAPTER_MODEL, torch_dtype=torch.float16, device_map="auto" ) # Set models to eval mode base_model.eval() finetuned_model.eval() def format_prompt(problem: str, problem_type: str) -> str: """Format input prompt for the model""" if problem_type == "Derivative": return f"""Given a mathematical function, find its derivative. Function: {problem} The derivative of this function is:""" elif problem_type == "Addition": return f"""Solve this addition problem. Problem: {problem} The solution is:""" else: # Roots return f"""Find the roots of this equation. Equation: {problem} The roots are:""" @spaces.GPU @measure_time def get_model_response(problem: str, problem_type: str, model) -> str: """Generate response from model""" # Format prompt prompt = format_prompt(problem, problem_type) # Tokenize inputs = tokenizer(prompt, return_tensors="pt").to(model.device) # Generate with torch.no_grad(): outputs = model.generate( **inputs, max_length=100, num_return_sequences=1, temperature=0.1, do_sample=False, # Deterministic generation pad_token_id=tokenizer.eos_token_id ) # Decode and extract response generated = tokenizer.decode(outputs[0], skip_special_tokens=True) response = generated[len(prompt):].strip() return response @spaces.GPU def solve_problem(problem: str, problem_type: str) -> tuple: """Solve math problem with both models""" if not problem: return "Please enter a problem", "Please enter a problem", None # Record problem type monitor.record_problem_type(problem_type) # Get responses from both models with timing base_response, base_time = get_model_response(problem, problem_type, base_model) finetuned_response, finetuned_time = get_model_response(problem, problem_type, finetuned_model) # Format outputs with steps if problem_type == "Derivative": base_output = f"""Generated derivative: {base_response} Let's verify this step by step: 1. Starting with f(x) = {problem} 2. Applying differentiation rules 3. We get f'(x) = {base_response}""" finetuned_output = f"""Generated derivative: {finetuned_response} Let's verify this step by step: 1. Starting with f(x) = {problem} 2. Applying differentiation rules 3. We get f'(x) = {finetuned_response}""" elif problem_type == "Addition": base_output = f"""Solution: {base_response} Let's verify this step by step: 1. Starting with: {problem} 2. Adding the numbers 3. We get: {base_response}""" finetuned_output = f"""Solution: {finetuned_response} Let's verify this step by step: 1. Starting with: {problem} 2. Adding the numbers 3. We get: {finetuned_response}""" else: # Roots base_output = f"""Found roots: {base_response} Let's verify this step by step: 1. Starting with equation: {problem} 2. Solving for x 3. Roots are: {base_response}""" finetuned_output = f"""Found roots: {finetuned_response} Let's verify this step by step: 1. Starting with equation: {problem} 2. Solving for x 3. Roots are: {finetuned_response}""" # Record metrics monitor.record_response_time("base", base_time) monitor.record_response_time("finetuned", finetuned_time) monitor.record_success("base", not base_response.startswith("Error")) monitor.record_success("finetuned", not finetuned_response.startswith("Error")) # Get updated statistics stats = monitor.get_statistics() # Format statistics for display stats_display = f""" ### Performance Metrics #### Response Times (seconds) - Base Model: {stats.get('base_avg_response_time', 0):.2f} avg - Fine-tuned Model: {stats.get('finetuned_avg_response_time', 0):.2f} avg #### Success Rates - Base Model: {stats.get('base_success_rate', 0):.1f}% - Fine-tuned Model: {stats.get('finetuned_success_rate', 0):.1f}% #### Problem Types Used """ for ptype, percentage in stats.get('problem_type_distribution', {}).items(): stats_display += f"- {ptype}: {percentage:.1f}%\n" return base_output, finetuned_output, stats_display # Create Gradio interface with gr.Blocks(title="Mathematics Problem Solver") as demo: gr.Markdown("# Mathematics Problem Solver") gr.Markdown("Compare solutions between base and fine-tuned models") with gr.Row(): with gr.Column(): problem_type = gr.Dropdown( choices=["Derivative", "Addition", "Roots"], value="Derivative", label="Problem Type" ) problem_input = gr.Textbox( label="Enter your problem", placeholder="Example: x^2 + 3x" ) solve_btn = gr.Button("Solve", variant="primary") with gr.Row(): with gr.Column(): gr.Markdown("### Base Model") base_output = gr.Textbox(label="Base Model Solution", lines=6) with gr.Column(): gr.Markdown("### Fine-tuned Model") finetuned_output = gr.Textbox(label="Fine-tuned Model Solution", lines=6) # Performance metrics display with gr.Row(): metrics_display = gr.Markdown("### Performance Metrics\n*Solve a problem to see metrics*") # Example problems gr.Examples( examples=[ ["x^2 + 3x", "Derivative"], ["235 + 567", "Addition"], ["x^2 - 4", "Roots"], ["\\sin{\\left(x\\right)}", "Derivative"], ["e^x", "Derivative"], ["\\frac{1}{x}", "Derivative"] ], inputs=[problem_input, problem_type], outputs=[base_output, finetuned_output, metrics_display], fn=solve_problem, cache_examples=False # Disable caching ) # Connect the interface solve_btn.click( fn=solve_problem, inputs=[problem_input, problem_type], outputs=[base_output, finetuned_output, metrics_display] ) if __name__ == "__main__": demo.launch()