Spaces:
Running
Running
import pandas as pd | |
from typing import List, Dict, Optional | |
import gradio as gr | |
from datasets import load_dataset | |
class AuthorLeaderboardContrib: | |
""" | |
A class to manage and process author resource leaderboard data for display in a Gradio Dataframe component. | |
""" | |
# Class-level constants defining columns and their data types | |
COLUMNS_ORDER: List[str] = [ | |
'Rank', | |
'Author', | |
'Entity Type', | |
'Total Artifacts', | |
'Total Papers', | |
'Total Models', | |
'Total Datasets', | |
'Total Spaces', | |
'Likes', | |
'Downloads', | |
] | |
DATATYPES: Dict[str, str] = { | |
'Rank': 'str', | |
'Author': 'markdown', | |
'Entity Type': 'str', | |
'Total Artifacts': 'int', | |
'Total Papers': 'int', | |
'Total Models': 'int', | |
'Total Datasets': 'int', | |
'Total Spaces': 'int', | |
'Likes': 'int', | |
'Downloads': 'int', | |
} | |
EMOTICONS = { | |
1: '🥇', | |
2: '🥈', | |
3: '🥉' | |
} | |
def __init__(self): | |
""" | |
Initialize the AuthorLeaderboardContrib class by loading and processing the dataset. | |
""" | |
self.df_raw: pd.DataFrame = self.get_df() | |
self.df_prettified: pd.DataFrame = self.prettify(self.df_raw) | |
def get_df() -> pd.DataFrame: | |
""" | |
Load and process the leaderboard dataset. | |
Returns: | |
pd.DataFrame: The processed DataFrame. | |
""" | |
# Load the dataset from the Hugging Face Hub | |
dataset = load_dataset('IAMJB/paper-central-leaderboard-contrib', split='train') | |
df = dataset.to_pandas() | |
# Exclude entries with 'entity_type' == 'unknown' | |
df = df[df['entity_type'] != 'unknown'] | |
# Rename columns for clarity | |
df.rename(columns={ | |
'author': 'Author', | |
'entity_type': 'Entity Type', | |
'total_artifacts': 'Total Artifacts', | |
'total_papers': 'Total Papers', | |
'total_models': 'Total Models', | |
'total_datasets': 'Total Datasets', | |
'total_spaces': 'Total Spaces', | |
'likes': 'Likes', | |
'downloads': 'Downloads', | |
}, inplace=True) | |
return df | |
def prettify(self, df: pd.DataFrame) -> pd.DataFrame: | |
""" | |
Prettify the DataFrame by adding rankings, emoticons, and markdown links. | |
Args: | |
df (pd.DataFrame): The DataFrame to prettify. | |
Returns: | |
pd.DataFrame: The prettified DataFrame. | |
""" | |
df = df.copy() | |
# Sort authors by Total Artifacts descending | |
df.sort_values(by='Total Artifacts', ascending=False, inplace=True) | |
# Reset index to get ranks | |
df.reset_index(drop=True, inplace=True) | |
df.index += 1 # Start ranks from 1 | |
# Add Rank column | |
df['Rank'] = df.index | |
# Add emoticons for top 3 ranks | |
df['Rank'] = df['Rank'].apply(lambda x: f"{self.EMOTICONS.get(x, '')} {x}" if x <= 3 else f"{x}") | |
# Convert 'Author' to markdown with profile links | |
df['Author'] = df.apply(self._create_author_link, axis=1) | |
# Select columns to display | |
df = df[self.COLUMNS_ORDER] | |
return df | |
def _create_author_link(self, row: pd.Series) -> str: | |
""" | |
Create a markdown link for the author's profile. | |
Args: | |
row (pd.Series): A row from the DataFrame. | |
Returns: | |
str: The markdown link for the author. | |
""" | |
author = row['Author'] | |
profile_url = f"https://huggingface.co/{author}" | |
return f"[{author}]({profile_url})" | |
def filter( | |
self, | |
author_search_input: Optional[str] = None, | |
entity_type_filter: Optional[str] = 'All' | |
) -> gr.update: | |
""" | |
Filter the DataFrame based on the author search input and entity type. | |
Args: | |
author_search_input (Optional[str]): The author name to search for. | |
entity_type_filter (Optional[str]): The entity type to filter by ('All', 'user', 'org'). | |
Returns: | |
gr.Update: An update object for the Gradio Dataframe component. | |
""" | |
filtered_df: pd.DataFrame = self.df_prettified.copy() | |
if author_search_input: | |
search_string = author_search_input.lower() | |
filtered_df = filtered_df[filtered_df['Author'].str.lower().str.contains(search_string)] | |
# Filter by entity type | |
if entity_type_filter != 'All': | |
filtered_df = filtered_df[filtered_df['Entity Type'] == entity_type_filter] | |
# Get the corresponding data types for the columns | |
datatypes: List[str] = [self.DATATYPES.get(col, 'str') for col in filtered_df.columns] | |
return gr.update(value=filtered_df, datatype=datatypes) | |