Spaces:
Running
Running
from collections import defaultdict | |
from datetime import datetime, timezone | |
from typing import Dict, List | |
# Constants | |
DEFAULT_ELO = 1200 # Starting ELO for new models | |
K_FACTOR = 32 # Standard chess K-factor | |
def get_leaderboard(model_data: Dict, voting_data: List, show_preliminary=True): | |
"""Generate leaderboard data using votes from MongoDB.""" | |
# Initialize dictionaries for tracking | |
ratings = defaultdict(lambda: DEFAULT_ELO) | |
matches = defaultdict(int) | |
# Process each vote | |
for vote in voting_data: | |
try: | |
model_a = vote.get("model_a") | |
model_b = vote.get("model_b") | |
winner = vote.get("winner") | |
# Skip if models aren't in current model_data | |
if ( | |
not all([model_a, model_b, winner]) | |
or model_a not in model_data | |
or model_b not in model_data | |
): | |
continue | |
# Update match counts | |
matches[model_a] += 1 | |
matches[model_b] += 1 | |
# Calculate ELO changes | |
elo_a = ratings[model_a] | |
elo_b = ratings[model_b] | |
# Expected scores | |
expected_a = 1 / (1 + 10 ** ((elo_b - elo_a) / 400)) | |
expected_b = 1 - expected_a | |
# Actual scores | |
score_a = 1 if winner == "A" else 0 if winner == "B" else 0.5 | |
score_b = 1 - score_a | |
# Update ratings | |
ratings[model_a] += K_FACTOR * (score_a - expected_a) | |
ratings[model_b] += K_FACTOR * (score_b - expected_b) | |
except Exception as e: | |
print(f"Error processing vote: {e}") | |
continue | |
# Generate leaderboard data | |
leaderboard = [] | |
for model in model_data.keys(): | |
votes = matches[model] | |
# Skip models with < 300 votes if show_preliminary is False | |
if not show_preliminary and votes < 300: | |
continue | |
elo = ratings[model] | |
ci = 1.96 * (400 / (votes + 1) ** 0.5) if votes > 0 else 0 | |
data = { | |
"Model": model, | |
"ELO Score": f"{int(elo)}", | |
"95% CI": f"±{int(ci)}", | |
"# Votes": votes, | |
"Organization": model_data[model]["organization"], | |
"License": model_data[model]["license"], | |
} | |
leaderboard.append(data) | |
# Sort leaderboard by ELO score in descending order | |
leaderboard.sort(key=lambda x: float(x["ELO Score"]), reverse=True) | |
return leaderboard | |
def get_leaderboard_stats(model_data: Dict, voting_data: List) -> str: | |
"""Get summary statistics for the leaderboard.""" | |
now = datetime.now(timezone.utc) | |
total_votes = len(voting_data) | |
total_models = len(model_data) | |
# last_updated = now.strftime("%B %d, %Y at %H:%M:%S UTC") | |
last_updated = now.replace(minute=0, second=0, microsecond=0).strftime( | |
"%B %d, %Y at %H:00 UTC" | |
) | |
return f""" | |
### Leaderboard Stats | |
- **Total Models**: {total_models} | |
- **Total Votes**: {total_votes} | |
- **Last Updated**: {last_updated} | |
""" | |
def calculate_elo_change(rating_a: float, rating_b: float, winner: str) -> tuple[float, float]: | |
"""Calculate ELO rating changes for both players.""" | |
expected_a = 1 / (1 + 10 ** ((rating_b - rating_a) / 400)) | |
expected_b = 1 - expected_a | |
if winner == "A": | |
score_a, score_b = 1, 0 | |
elif winner == "B": | |
score_a, score_b = 0, 1 | |
else: # Handle ties | |
score_a, score_b = 0.5, 0.5 | |
change_a = K_FACTOR * (score_a - expected_a) | |
change_b = K_FACTOR * (score_b - expected_b) | |
return change_a, change_b | |
def get_model_rankings(leaderboard: List[Dict]) -> Dict[str, int]: | |
"""Get current rankings of all models from leaderboard data.""" | |
return {entry["Model"]: idx + 1 for idx, entry in enumerate(leaderboard)} |