|
import streamlit as st |
|
import numpy as np |
|
import pandas as pd |
|
from datasets import load_dataset |
|
|
|
st.set_page_config(layout="wide") |
|
|
|
col1, col2 = st.columns([2, 3]) |
|
|
|
sources = [ |
|
"https://huggingface.co/datasets/cfahlgren1/hub-stats", |
|
"https://huggingface.co/datasets/maxiw/hf-posts", |
|
] |
|
|
|
with col1: |
|
st.header("HuggingFace 🤗 Posts leaderboard") |
|
|
|
with col2: |
|
selected_source = st.selectbox( |
|
"Data Source:", |
|
options=sources, |
|
index=0, |
|
) |
|
|
|
if selected_source == sources[0]: |
|
try: |
|
df = pd.read_parquet("hf://datasets/cfahlgren1/hub-stats/posts.parquet") |
|
|
|
|
|
df["Name"] = df.fullname |
|
df["username"] = df.name |
|
except Exception as exp: |
|
st.error(f''' |
|
ERROR>> in loading {selected_source} |
|
|
|
>> {exp}''', icon="🚨") |
|
selected_source = sources[1] |
|
st.info(f''' |
|
This can be solved by "Space Restart" |
|
|
|
Switching Sources for now... |
|
|
|
New Source: {selected_source}''', icon="ℹ️") |
|
|
|
|
|
|
|
|
|
if selected_source == sources[1]: |
|
df = pd.read_json("hf://datasets/maxiw/hf-posts/posts.jsonl", lines=True) |
|
|
|
df["publishedAt"] = pd.to_datetime(df.publishedAt) |
|
print(">>> ", df.columns) |
|
|
|
df["Name"] = df.author.apply(lambda x: x["fullname"]) |
|
df["username"] = df.author.apply(lambda x: x["name"]) |
|
|
|
|
|
metrics = ["totalUniqueImpressions", "totalReactions", "numComments", "Num of posts"] |
|
|
|
|
|
|
|
min_date = df["publishedAt"].min().to_pydatetime() |
|
max_date = df["publishedAt"].max().to_pydatetime() |
|
|
|
|
|
col1, col2 = st.columns([3, 1]) |
|
|
|
with col1: |
|
date_range = st.slider( |
|
"Select Date Range", |
|
min_value=min_date, |
|
max_value=max_date, |
|
value=(min_date, max_date), |
|
format="DD/MMM/YYYY", |
|
) |
|
|
|
with col2: |
|
selected_metric = st.selectbox( |
|
"Sort by:", |
|
options=metrics, |
|
index=0, |
|
) |
|
|
|
|
|
|
|
mask = df["publishedAt"].between(*date_range) |
|
df = df[mask] |
|
|
|
|
|
df["totalReactions"] = df.reactions.apply(lambda x: sum([_["count"] for _ in x])) |
|
df["Num of posts"] = 1 |
|
|
|
|
|
df[metrics] = df[metrics].fillna(0).astype(int) |
|
|
|
data = ( |
|
df.groupby(["username", "Name"])[metrics] |
|
.sum() |
|
.sort_values(selected_metric, ascending=False) |
|
.reset_index() |
|
) |
|
data.index = np.arange(1, len(data) + 1) |
|
data.index.name = "Rank" |
|
|
|
|
|
data[metrics] = data[metrics].applymap(lambda x: f"{x:,}") |
|
|
|
|
|
def make_clickable(val): |
|
return f'<a target="_blank" href="https://huggingface.co/{val}">{val}</a>' |
|
|
|
|
|
df_styled = data.style.format({"username": make_clickable}) |
|
st.write( |
|
f"""<center>{df_styled.to_html(escape=False, index=False)}""", |
|
unsafe_allow_html=True, |
|
) |
|
|