paper-central / df /author_leaderboard.py
IAMJB's picture
leaderboards
064d8d7
raw
history blame
4.77 kB
import pandas as pd
from typing import List, Dict, Optional
import gradio as gr
from datasets import load_dataset
import numpy as np
class AuthorLeaderboard:
"""
A class to manage and process author leaderboard data for display in a Gradio Dataframe component.
"""
# Class-level constants defining columns and their data types
COLUMNS_ORDER: List[str] = [
'Rank',
'Author',
'Total Artifacts',
'Avg Artifacts per Paper',
'Total Papers',
'Total Models',
'Total Datasets',
'Total Spaces',
'Upvotes',
'Comments',
]
DATATYPES: Dict[str, str] = {
'Rank': 'str',
'Author': 'markdown',
'Total Artifacts': 'int',
'Avg Artifacts per Paper': 'float',
'Total Papers': 'int',
'Total Models': 'int',
'Total Datasets': 'int',
'Total Spaces': 'int',
'Upvotes': 'int',
'Comments': 'int',
}
EMOTICONS = {
1: '🥇',
2: '🥈',
3: '🥉'
}
def __init__(self):
"""
Initialize the AuthorLeaderboard class by loading and processing the dataset.
"""
self.df_raw: pd.DataFrame = self.get_df()
self.df_prettified: pd.DataFrame = self.prettify(self.df_raw)
@staticmethod
def get_df() -> pd.DataFrame:
"""
Load and process the leaderboard dataset.
Returns:
pd.DataFrame: The processed DataFrame.
"""
# Load the dataset from the Hugging Face Hub
dataset = load_dataset('IAMJB/paper-central-leaderboard', split='train')
df = dataset.to_pandas()
# Calculate total artifacts
df['Total Artifacts'] = df['num_models'] + df['num_datasets'] + df['num_spaces']
# Calculate average artifacts per paper
df['Avg Artifacts per Paper'] = df['Total Artifacts'] / df['num_papers']
df['Avg Artifacts per Paper'] = df['Avg Artifacts per Paper'].round(2)
# Rename columns for clarity
df.rename(columns={
'name': 'Author',
'num_papers': 'Total Papers',
'num_models': 'Total Models',
'num_datasets': 'Total Datasets',
'num_spaces': 'Total Spaces',
'upvotes': 'Upvotes',
'num_comments': 'Comments',
}, inplace=True)
return df
def prettify(self, df: pd.DataFrame) -> pd.DataFrame:
"""
Prettify the DataFrame by adding rankings, emoticons, and markdown links.
Args:
df (pd.DataFrame): The DataFrame to prettify.
Returns:
pd.DataFrame: The prettified DataFrame.
"""
df = df.copy()
# Sort authors by Total Artifacts descending
df.sort_values(by='Total Artifacts', ascending=False, inplace=True)
# Reset index to get ranks
df.reset_index(drop=True, inplace=True)
df.index += 1 # Start ranks from 1
# Add Rank column
df['Rank'] = df.index
# Add emoticons for top 3 ranks
df['Rank'] = df['Rank'].apply(lambda x: f"{self.EMOTICONS.get(x, '')} {x}" if x <= 3 else f"{x}")
# Convert 'Author' to markdown with profile links if 'username' is available
df['Author'] = df.apply(self._create_author_link, axis=1)
# Select columns to display
df = df[self.COLUMNS_ORDER]
return df
def _create_author_link(self, row: pd.Series) -> str:
"""
Create a markdown link for the author's profile.
Args:
row (pd.Series): A row from the DataFrame.
Returns:
str: The markdown link for the author.
"""
if pd.notna(row.get('username')) and row['username']:
profile_url = f"https://huggingface.co/{row['username']}"
return f"[{row['Author']}]({profile_url})"
else:
return row['Author']
def filter(self, author_search_input: Optional[str] = None) -> gr.update:
"""
Filter the DataFrame based on the author search input.
Args:
author_search_input (Optional[str]): The author name to search for.
Returns:
gr.Update: An update object for the Gradio Dataframe component.
"""
filtered_df: pd.DataFrame = self.df_prettified.copy()
if author_search_input:
search_string = author_search_input.lower()
filtered_df = filtered_df[filtered_df['Author'].str.lower().str.contains(search_string)]
# Get the corresponding data types for the columns
datatypes: List[str] = [self.DATATYPES.get(col, 'str') for col in filtered_df.columns]
return gr.update(value=filtered_df, datatype=datatypes)