import pandas as pd


def load_data():
    """Load and preprocess the data."""
    df = pd.read_csv("results.csv").dropna()

    # Add combined I/O cost column with 3:1 ratio
    df["IO Cost"] = (
        df["Input cost per million token"] * 0.75
        + df["Output cost per million token"] * 0.25
    )
    return df


# categories.py
CATEGORIES = {
    "Overall": ["Model Avg"],
    "Overall single turn": ["single turn perf"],
    "Overall multi turn": ["multi turn perf"],
    "Single func call": [
        "xlam_single_tool_single_call",
        "xlam_multiple_tool_single_call",
    ],
    "Multiple func call": [
        "xlam_multiple_tool_multiple_call",
        "xlam_single_tool_multiple_call",
        "BFCL_v3_multi_turn_base_multi_func_call",
    ],
    "Irrelevant query": ["BFCL_v3_irrelevance"],
    "Long context": ["tau_long_context", "BFCL_v3_multi_turn_long_context"],
    "Missing func": ["xlam_tool_miss", "BFCL_v3_multi_turn_miss_func"],
    "Missing params": ["BFCL_v3_multi_turn_miss_param"],
    "Composite": ["BFCL_v3_multi_turn_composite"],
}

INSIGHTS = """
                # Key Insights from Agent Leaderboard
                
                | Category | Finding | Implications |
                |----------|---------|--------------|
                | Performance Leader | Gemini-2.0-flash dominates with excellent performance at a fraction of typical costs | Demonstrates that top-tier performance is achievable without premium pricing |
                | Cost vs Performance | Top 3 models span a 200x price difference yet show only 6% performance gap | Challenges traditional pricing assumptions in the market and suggests potential overpricing at the high end |
                | Open Source Models | Qwen-72b matches premium models in safety and context handling at lower cost | Signals growing maturity in open-source models and potential for broader adoption |
                | Safety Features | While irrelevance detection is widely solved, tool miss detection remains a challenge | Highlights uneven development in safety features and areas needing focused improvement |
                | Edge Case Handling | Models still struggle with maintaining context in complex scenarios | Indicates need for architectural improvements in handling sophisticated interactions |
                | Architecture Impact | Models show clear trade-offs between context handling and parallel execution | Suggests need for specialized models or hybrid approaches for different use cases |

                **Note:** Findings based on comprehensive evaluation across multiple tasks and scenarios.
                """

METHODOLOGY = """
                # Methodology
                            
                ## Overview
                The Agent Leaderboard evaluates language models' ability to effectively use tools and maintain coherent multi-turn conversations. 
                The evaluation focuses on both basic functionality and edge cases that challenge real-world applicability.
                
                ## Tool Selection Quality Metric
                Models are evaluated on their ability to:
                - Correctly identify when tools are needed
                - Select the appropriate tool for the task
                - Handle cases where no suitable tool exists
                - Maintain context across multiple interactions
                
                ## Dataset Structure
                | Type | Samples | Category | Dataset Name | Purpose |
                |------|---------|-----------|--------------|----------|
                | Single-Turn | 100 + 100 | Single Function Call | xlam_single_tool_single_call | Evaluates basic ability to read documentation and make single function calls |
                | | 200 + 50 | Multiple Function Call | xlam_multiple_tool_multiple_call, xlam_single_tool_multiple_call | Tests parallel execution and result aggregation capabilities |
                | | 100 | Irrelevant Query | BFCL_v3_irrelevance | Tests ability to recognize when available tools don't match user needs |
                | | 100 | Long Context | tau_long_context | Assesses handling of extended interactions and complex instructions |
                | Multi-Turn | 50 + 30 | Single Function Call | BFCL_v3_multi_turn_base_single_func_call, toolscs_single_func_call | Tests basic conversational function calling abilities |
                | | 50 | Multiple Function Call | BFCL_v3_multi_turn_base_multi_func_call | Evaluates handling of multiple function calls in conversation |
                | | 100 | Missing Function | BFCL_v3_multi_turn_miss_func | Tests graceful handling of unavailable tools |
                | | 100 | Missing Parameters | BFCL_v3_multi_turn_miss_param | Assesses parameter collection and handling incomplete information |
                | | 100 | Composite | BFCL_v3_multi_turn_composite | Tests overall robustness in complex scenarios |              
                """

TITLE = """
    <div style="text-align: center; margin-bottom: 2rem;">
        <h1 style="margin-bottom: 1rem; font-size: 2.5rem; font-weight: bold;">Agent Leaderboard</h1>
        <div style="display: flex; justify-content: center; gap: 2rem; font-size: 1.1rem;">
            <a href="https://galileo.ai" target="_blank" style="text-decoration: none; color: #4F46E5; font-weight: 500; padding: 0.5rem;">
                Blog&nbsp;📝
            </a>
            <a href="https://galileo.ai" target="_blank" style="text-decoration: none; color: #4F46E5; font-weight: 500; padding: 0.5rem;">
                GitHub&nbsp;⭐
            </a>
            <a href="https://galileo.ai" target="_blank" style="text-decoration: none; color: #4F46E5; font-weight: 500; padding: 0.5rem;">
                Paper&nbsp;📄
            </a>
            <a href="https://galileo.ai" target="_blank" style="text-decoration: none; color: #4F46E5; font-weight: 500; padding: 0.5rem;">
                Dataset&nbsp;📊
            </a>
        </div>
        <div style="margin-top: 1rem; color: #6B7280; font-size: 0.9rem;">
            Last updated: January 2025
        </div>
    </div>
"""