Spaces:
Running
Running
File size: 4,839 Bytes
064d8d7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 |
import pandas as pd
from typing import List, Dict, Optional
import gradio as gr
from datasets import load_dataset
class AuthorLeaderboardContrib:
"""
A class to manage and process author resource leaderboard data for display in a Gradio Dataframe component.
"""
# Class-level constants defining columns and their data types
COLUMNS_ORDER: List[str] = [
'Rank',
'Author',
'Entity Type',
'Total Artifacts',
'Total Papers',
'Total Models',
'Total Datasets',
'Total Spaces',
'Likes',
'Downloads',
]
DATATYPES: Dict[str, str] = {
'Rank': 'str',
'Author': 'markdown',
'Entity Type': 'str',
'Total Artifacts': 'int',
'Total Papers': 'int',
'Total Models': 'int',
'Total Datasets': 'int',
'Total Spaces': 'int',
'Likes': 'int',
'Downloads': 'int',
}
EMOTICONS = {
1: '🥇',
2: '🥈',
3: '🥉'
}
def __init__(self):
"""
Initialize the AuthorLeaderboardContrib class by loading and processing the dataset.
"""
self.df_raw: pd.DataFrame = self.get_df()
self.df_prettified: pd.DataFrame = self.prettify(self.df_raw)
@staticmethod
def get_df() -> pd.DataFrame:
"""
Load and process the leaderboard dataset.
Returns:
pd.DataFrame: The processed DataFrame.
"""
# Load the dataset from the Hugging Face Hub
dataset = load_dataset('IAMJB/paper-central-leaderboard-contrib', split='train')
df = dataset.to_pandas()
# Exclude entries with 'entity_type' == 'unknown'
df = df[df['entity_type'] != 'unknown']
# Rename columns for clarity
df.rename(columns={
'author': 'Author',
'entity_type': 'Entity Type',
'total_artifacts': 'Total Artifacts',
'total_papers': 'Total Papers',
'total_models': 'Total Models',
'total_datasets': 'Total Datasets',
'total_spaces': 'Total Spaces',
'likes': 'Likes',
'downloads': 'Downloads',
}, inplace=True)
return df
def prettify(self, df: pd.DataFrame) -> pd.DataFrame:
"""
Prettify the DataFrame by adding rankings, emoticons, and markdown links.
Args:
df (pd.DataFrame): The DataFrame to prettify.
Returns:
pd.DataFrame: The prettified DataFrame.
"""
df = df.copy()
# Sort authors by Total Artifacts descending
df.sort_values(by='Total Artifacts', ascending=False, inplace=True)
# Reset index to get ranks
df.reset_index(drop=True, inplace=True)
df.index += 1 # Start ranks from 1
# Add Rank column
df['Rank'] = df.index
# Add emoticons for top 3 ranks
df['Rank'] = df['Rank'].apply(lambda x: f"{self.EMOTICONS.get(x, '')} {x}" if x <= 3 else f"{x}")
# Convert 'Author' to markdown with profile links
df['Author'] = df.apply(self._create_author_link, axis=1)
# Select columns to display
df = df[self.COLUMNS_ORDER]
return df
def _create_author_link(self, row: pd.Series) -> str:
"""
Create a markdown link for the author's profile.
Args:
row (pd.Series): A row from the DataFrame.
Returns:
str: The markdown link for the author.
"""
author = row['Author']
profile_url = f"https://huggingface.co/{author}"
return f"[{author}]({profile_url})"
def filter(
self,
author_search_input: Optional[str] = None,
entity_type_filter: Optional[str] = 'All'
) -> gr.update:
"""
Filter the DataFrame based on the author search input and entity type.
Args:
author_search_input (Optional[str]): The author name to search for.
entity_type_filter (Optional[str]): The entity type to filter by ('All', 'user', 'org').
Returns:
gr.Update: An update object for the Gradio Dataframe component.
"""
filtered_df: pd.DataFrame = self.df_prettified.copy()
if author_search_input:
search_string = author_search_input.lower()
filtered_df = filtered_df[filtered_df['Author'].str.lower().str.contains(search_string)]
# Filter by entity type
if entity_type_filter != 'All':
filtered_df = filtered_df[filtered_df['Entity Type'] == entity_type_filter]
# Get the corresponding data types for the columns
datatypes: List[str] = [self.DATATYPES.get(col, 'str') for col in filtered_df.columns]
return gr.update(value=filtered_df, datatype=datatypes)
|