import pandas as pd from typing import List, Dict, Optional import gradio as gr from datasets import load_dataset class AuthorLeaderboardContrib: """ A class to manage and process author resource leaderboard data for display in a Gradio Dataframe component. """ # Class-level constants defining columns and their data types COLUMNS_ORDER: List[str] = [ 'Rank', 'Author', 'Entity Type', 'Total Artifacts', 'Total Papers', 'Total Models', 'Total Datasets', 'Total Spaces', 'Likes', 'Downloads', ] DATATYPES: Dict[str, str] = { 'Rank': 'str', 'Author': 'markdown', 'Entity Type': 'str', 'Total Artifacts': 'int', 'Total Papers': 'int', 'Total Models': 'int', 'Total Datasets': 'int', 'Total Spaces': 'int', 'Likes': 'int', 'Downloads': 'int', } EMOTICONS = { 1: '🥇', 2: '🥈', 3: '🥉' } def __init__(self): """ Initialize the AuthorLeaderboardContrib class by loading and processing the dataset. """ self.df_raw: pd.DataFrame = self.get_df() self.df_prettified: pd.DataFrame = self.prettify(self.df_raw) @staticmethod def get_df() -> pd.DataFrame: """ Load and process the leaderboard dataset. Returns: pd.DataFrame: The processed DataFrame. """ # Load the dataset from the Hugging Face Hub dataset = load_dataset('IAMJB/paper-central-leaderboard-contrib', split='train') df = dataset.to_pandas() # Exclude entries with 'entity_type' == 'unknown' df = df[df['entity_type'] != 'unknown'] # Rename columns for clarity df.rename(columns={ 'author': 'Author', 'entity_type': 'Entity Type', 'total_artifacts': 'Total Artifacts', 'total_papers': 'Total Papers', 'total_models': 'Total Models', 'total_datasets': 'Total Datasets', 'total_spaces': 'Total Spaces', 'likes': 'Likes', 'downloads': 'Downloads', }, inplace=True) return df def prettify(self, df: pd.DataFrame) -> pd.DataFrame: """ Prettify the DataFrame by adding rankings, emoticons, and markdown links. Args: df (pd.DataFrame): The DataFrame to prettify. Returns: pd.DataFrame: The prettified DataFrame. """ df = df.copy() # Sort authors by Total Artifacts descending df.sort_values(by='Total Artifacts', ascending=False, inplace=True) # Reset index to get ranks df.reset_index(drop=True, inplace=True) df.index += 1 # Start ranks from 1 # Add Rank column df['Rank'] = df.index # Add emoticons for top 3 ranks df['Rank'] = df['Rank'].apply(lambda x: f"{self.EMOTICONS.get(x, '')} {x}" if x <= 3 else f"{x}") # Convert 'Author' to markdown with profile links df['Author'] = df.apply(self._create_author_link, axis=1) # Select columns to display df = df[self.COLUMNS_ORDER] return df def _create_author_link(self, row: pd.Series) -> str: """ Create a markdown link for the author's profile. Args: row (pd.Series): A row from the DataFrame. Returns: str: The markdown link for the author. """ author = row['Author'] profile_url = f"https://huggingface.co/{author}" return f"[{author}]({profile_url})" def filter( self, author_search_input: Optional[str] = None, entity_type_filter: Optional[str] = 'All' ) -> gr.update: """ Filter the DataFrame based on the author search input and entity type. Args: author_search_input (Optional[str]): The author name to search for. entity_type_filter (Optional[str]): The entity type to filter by ('All', 'user', 'org'). Returns: gr.Update: An update object for the Gradio Dataframe component. """ filtered_df: pd.DataFrame = self.df_prettified.copy() if author_search_input: search_string = author_search_input.lower() filtered_df = filtered_df[filtered_df['Author'].str.lower().str.contains(search_string)] # Filter by entity type if entity_type_filter != 'All': filtered_df = filtered_df[filtered_df['Entity Type'] == entity_type_filter] # Get the corresponding data types for the columns datatypes: List[str] = [self.DATATYPES.get(col, 'str') for col in filtered_df.columns] return gr.update(value=filtered_df, datatype=datatypes)