Spaces:
Running
Running
File size: 4,765 Bytes
bc87bb9 064d8d7 bc87bb9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 |
import pandas as pd
from typing import List, Dict, Optional
import gradio as gr
from datasets import load_dataset
import numpy as np
class AuthorLeaderboard:
"""
A class to manage and process author leaderboard data for display in a Gradio Dataframe component.
"""
# Class-level constants defining columns and their data types
COLUMNS_ORDER: List[str] = [
'Rank',
'Author',
'Total Artifacts',
'Avg Artifacts per Paper',
'Total Papers',
'Total Models',
'Total Datasets',
'Total Spaces',
'Upvotes',
'Comments',
]
DATATYPES: Dict[str, str] = {
'Rank': 'str',
'Author': 'markdown',
'Total Artifacts': 'int',
'Avg Artifacts per Paper': 'float',
'Total Papers': 'int',
'Total Models': 'int',
'Total Datasets': 'int',
'Total Spaces': 'int',
'Upvotes': 'int',
'Comments': 'int',
}
EMOTICONS = {
1: '🥇',
2: '🥈',
3: '🥉'
}
def __init__(self):
"""
Initialize the AuthorLeaderboard class by loading and processing the dataset.
"""
self.df_raw: pd.DataFrame = self.get_df()
self.df_prettified: pd.DataFrame = self.prettify(self.df_raw)
@staticmethod
def get_df() -> pd.DataFrame:
"""
Load and process the leaderboard dataset.
Returns:
pd.DataFrame: The processed DataFrame.
"""
# Load the dataset from the Hugging Face Hub
dataset = load_dataset('IAMJB/paper-central-leaderboard', split='train')
df = dataset.to_pandas()
# Calculate total artifacts
df['Total Artifacts'] = df['num_models'] + df['num_datasets'] + df['num_spaces']
# Calculate average artifacts per paper
df['Avg Artifacts per Paper'] = df['Total Artifacts'] / df['num_papers']
df['Avg Artifacts per Paper'] = df['Avg Artifacts per Paper'].round(2)
# Rename columns for clarity
df.rename(columns={
'name': 'Author',
'num_papers': 'Total Papers',
'num_models': 'Total Models',
'num_datasets': 'Total Datasets',
'num_spaces': 'Total Spaces',
'upvotes': 'Upvotes',
'num_comments': 'Comments',
}, inplace=True)
return df
def prettify(self, df: pd.DataFrame) -> pd.DataFrame:
"""
Prettify the DataFrame by adding rankings, emoticons, and markdown links.
Args:
df (pd.DataFrame): The DataFrame to prettify.
Returns:
pd.DataFrame: The prettified DataFrame.
"""
df = df.copy()
# Sort authors by Total Artifacts descending
df.sort_values(by='Total Artifacts', ascending=False, inplace=True)
# Reset index to get ranks
df.reset_index(drop=True, inplace=True)
df.index += 1 # Start ranks from 1
# Add Rank column
df['Rank'] = df.index
# Add emoticons for top 3 ranks
df['Rank'] = df['Rank'].apply(lambda x: f"{self.EMOTICONS.get(x, '')} {x}" if x <= 3 else f"{x}")
# Convert 'Author' to markdown with profile links if 'username' is available
df['Author'] = df.apply(self._create_author_link, axis=1)
# Select columns to display
df = df[self.COLUMNS_ORDER]
return df
def _create_author_link(self, row: pd.Series) -> str:
"""
Create a markdown link for the author's profile.
Args:
row (pd.Series): A row from the DataFrame.
Returns:
str: The markdown link for the author.
"""
if pd.notna(row.get('username')) and row['username']:
profile_url = f"https://huggingface.co/{row['username']}"
return f"[{row['Author']}]({profile_url})"
else:
return row['Author']
def filter(self, author_search_input: Optional[str] = None) -> gr.update:
"""
Filter the DataFrame based on the author search input.
Args:
author_search_input (Optional[str]): The author name to search for.
Returns:
gr.Update: An update object for the Gradio Dataframe component.
"""
filtered_df: pd.DataFrame = self.df_prettified.copy()
if author_search_input:
search_string = author_search_input.lower()
filtered_df = filtered_df[filtered_df['Author'].str.lower().str.contains(search_string)]
# Get the corresponding data types for the columns
datatypes: List[str] = [self.DATATYPES.get(col, 'str') for col in filtered_df.columns]
return gr.update(value=filtered_df, datatype=datatypes)
|