Spaces:

open-llm-leaderboard
/

open_llm_leaderboard

Running on CPU Upgrade

File size: 5,841 Bytes

e7abd9e

import os
import json
import logging
from datetime import datetime
from pathlib import Path
from typing import Dict, Any, List, Tuple
from huggingface_hub import HfApi
from dotenv import load_dotenv

# Get the backend directory path
BACKEND_DIR = Path(__file__).parent.parent
ROOT_DIR = BACKEND_DIR.parent

# Load environment variables from .env file in root directory
load_dotenv(ROOT_DIR / ".env")

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(message)s'
)
logger = logging.getLogger(__name__)

# Initialize Hugging Face API
HF_TOKEN = os.getenv("HF_TOKEN")
if not HF_TOKEN:
    raise ValueError("HF_TOKEN not found in environment variables")
api = HfApi(token=HF_TOKEN)

# Default organization
HF_ORGANIZATION = os.getenv('HF_ORGANIZATION', 'open-llm-leaderboard')

def get_last_votes(limit: int = 5) -> List[Dict]:
    """Get the last votes from the votes dataset"""
    try:
        logger.info("\nFetching last votes...")
        
        # Download and read votes file
        logger.info("Downloading votes file...")
        votes_file = api.hf_hub_download(
            repo_id=f"{HF_ORGANIZATION}/votes",
            filename="votes_data.jsonl",
            repo_type="dataset"
        )
        
        logger.info("Reading votes file...")
        votes = []
        with open(votes_file, 'r') as f:
            for line in f:
                try:
                    vote = json.loads(line)
                    votes.append(vote)
                except json.JSONDecodeError:
                    continue
        
        # Sort by timestamp and get last n votes
        logger.info("Sorting votes...")
        votes.sort(key=lambda x: x.get('timestamp', ''), reverse=True)
        last_votes = votes[:limit]
        
        logger.info(f"✓ Found {len(last_votes)} recent votes")
        return last_votes
        
    except Exception as e:
        logger.error(f"Error reading votes: {str(e)}")
        return []

def get_last_models(limit: int = 5) -> List[Dict]:
    """Get the last models from the requests dataset using commit history"""
    try:
        logger.info("\nFetching last model submissions...")
        
        # Get commit history
        logger.info("Getting commit history...")
        commits = list(api.list_repo_commits(
            repo_id=f"{HF_ORGANIZATION}/requests",
            repo_type="dataset"
        ))
        logger.info(f"Found {len(commits)} commits")
        
        # Track processed files to avoid duplicates
        processed_files = set()
        models = []
        
        # Process commits until we have enough models
        for i, commit in enumerate(commits):
            logger.info(f"Processing commit {i+1}/{len(commits)} ({commit.created_at})")
            
            # Look at added/modified files in this commit
            files_to_process = [f for f in (commit.added + commit.modified) if f.endswith('.json')]
            if files_to_process:
                logger.info(f"Found {len(files_to_process)} JSON files in commit")
            
            for file in files_to_process:
                if file in processed_files:
                    continue
                    
                processed_files.add(file)
                logger.info(f"Downloading {file}...")
                
                try:
                    # Download and read the file
                    content = api.hf_hub_download(
                        repo_id=f"{HF_ORGANIZATION}/requests",
                        filename=file,
                        repo_type="dataset"
                    )
                    
                    with open(content, 'r') as f:
                        model_data = json.load(f)
                        models.append(model_data)
                        logger.info(f"✓ Added model {model_data.get('model', 'Unknown')}")
                        
                        if len(models) >= limit:
                            logger.info("Reached desired number of models")
                            break
                            
                except Exception as e:
                    logger.error(f"Error reading file {file}: {str(e)}")
                    continue
                    
            if len(models) >= limit:
                break
        
        logger.info(f"✓ Found {len(models)} recent model submissions")
        return models
        
    except Exception as e:
        logger.error(f"Error reading models: {str(e)}")
        return []

def main():
    """Display last activities from the leaderboard"""
    try:
        # Get last votes
        logger.info("\n=== Last Votes ===")
        last_votes = get_last_votes()
        if last_votes:
            for vote in last_votes:
                logger.info(f"\nModel: {vote.get('model')}")
                logger.info(f"User: {vote.get('username')}")
                logger.info(f"Timestamp: {vote.get('timestamp')}")
        else:
            logger.info("No votes found")
        
        # Get last model submissions
        logger.info("\n=== Last Model Submissions ===")
        last_models = get_last_models()
        if last_models:
            for model in last_models:
                logger.info(f"\nModel: {model.get('model')}")
                logger.info(f"Submitter: {model.get('sender', 'Unknown')}")
                logger.info(f"Status: {model.get('status', 'Unknown')}")
                logger.info(f"Submission Time: {model.get('submitted_time', 'Unknown')}")
                logger.info(f"Precision: {model.get('precision', 'Unknown')}")
                logger.info(f"Weight Type: {model.get('weight_type', 'Unknown')}")
        else:
            logger.info("No models found")
            
    except Exception as e:
        logger.error(f"Global error: {str(e)}")

if __name__ == "__main__":
    main()