File size: 3,830 Bytes
5267683
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
255ad58
 
5267683
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
255ad58
 
5267683
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
from collections import defaultdict
from datetime import datetime, timezone
from typing import Dict, List

# Constants
DEFAULT_ELO = 1200  # Starting ELO for new models
K_FACTOR = 32  # Standard chess K-factor

def get_leaderboard(model_data: Dict, voting_data: List, show_preliminary=True):
    """Generate leaderboard data using votes from MongoDB."""
    # Initialize dictionaries for tracking
    ratings = defaultdict(lambda: DEFAULT_ELO)
    matches = defaultdict(int)

    # Process each vote
    for vote in voting_data:
        try:
            model_a = vote.get("model_a")
            model_b = vote.get("model_b")
            winner = vote.get("winner")

            # Skip if models aren't in current model_data
            if (
                not all([model_a, model_b, winner])
                or model_a not in model_data
                or model_b not in model_data
            ):
                continue

            # Update match counts
            matches[model_a] += 1
            matches[model_b] += 1

            # Calculate ELO changes
            elo_a = ratings[model_a]
            elo_b = ratings[model_b]

            # Expected scores
            expected_a = 1 / (1 + 10 ** ((elo_b - elo_a) / 400))
            expected_b = 1 - expected_a

            # Actual scores
            score_a = 1 if winner == "A" else 0 if winner == "B" else 0.5
            score_b = 1 - score_a

            # Update ratings
            ratings[model_a] += K_FACTOR * (score_a - expected_a)
            ratings[model_b] += K_FACTOR * (score_b - expected_b)

        except Exception as e:
            print(f"Error processing vote: {e}")
            continue

    # Generate leaderboard data
    leaderboard = []
    for model in model_data.keys():
        votes = matches[model]
        # Skip models with < 300 votes if show_preliminary is False
        if not show_preliminary and votes < 300:
            continue
            
        elo = ratings[model]
        ci = 1.96 * (400 / (votes + 1) ** 0.5) if votes > 0 else 0
        data = {
            "Model": model,
            "ELO Score": f"{int(elo)}",
            "95% CI": f"±{int(ci)}",
            "# Votes": votes,
            "Organization": model_data[model]["organization"],
            "License": model_data[model]["license"],
        }
        leaderboard.append(data)

    # Sort leaderboard by ELO score in descending order
    leaderboard.sort(key=lambda x: float(x["ELO Score"]), reverse=True)

    return leaderboard

def get_leaderboard_stats(model_data: Dict, voting_data: List) -> str:
    """Get summary statistics for the leaderboard."""
    now = datetime.now(timezone.utc)
    total_votes = len(voting_data)
    total_models = len(model_data)
    # last_updated = now.strftime("%B %d, %Y at %H:%M:%S UTC")

    last_updated = now.replace(minute=0, second=0, microsecond=0).strftime(
        "%B %d, %Y at %H:00 UTC"
    )

    return f"""
### Leaderboard Stats
- **Total Models**: {total_models}
- **Total Votes**: {total_votes}
- **Last Updated**: {last_updated}
"""

def calculate_elo_change(rating_a: float, rating_b: float, winner: str) -> tuple[float, float]:
    """Calculate ELO rating changes for both players."""
    expected_a = 1 / (1 + 10 ** ((rating_b - rating_a) / 400))
    expected_b = 1 - expected_a

    if winner == "A":
        score_a, score_b = 1, 0
    elif winner == "B":
        score_a, score_b = 0, 1
    else:  # Handle ties
        score_a, score_b = 0.5, 0.5

    change_a = K_FACTOR * (score_a - expected_a)
    change_b = K_FACTOR * (score_b - expected_b)

    return change_a, change_b

def get_model_rankings(leaderboard: List[Dict]) -> Dict[str, int]:
    """Get current rankings of all models from leaderboard data."""
    return {entry["Model"]: idx + 1 for idx, entry in enumerate(leaderboard)}