bench1k / app.py
gaodrew's picture
first commit
7d98704 verified
import streamlit as st
import pandas as pd
import numpy as np
from html import escape
st.set_page_config(layout="wide")
column_config = {
"Downloads": st.column_config.NumberColumn(
"Downloads", format="%d 📥"
),
"Likes": st.column_config.NumberColumn(
"Likes", format="%d ❤️"
),
"Hugging Face URL": st.column_config.LinkColumn("Hugging Face URL", display_text="Open"),
"Arxiv URL": st.column_config.LinkColumn("Arxiv URL", display_text="Open"),
"PapersWithCode URL": st.column_config.LinkColumn("PapersWithCode URL", display_text="Open")
}
@st.cache_data
def load_data():
file_path = 'HuggingFaceBenchmarkDatasetsWithTags - Copy of HuggingFaceBenchmarkDatasetsWithTags (1).csv'
data = pd.read_csv(file_path, na_values=['NA', ''])
data['Created At'] = pd.to_datetime(data['Created At'], errors='coerce')
data['Last Modified'] = pd.to_datetime(data['Last Modified'], errors='coerce')
numeric_cols = ['Downloads', 'Likes', 'Total Examples', 'Dataset Size (bytes)']
for col in numeric_cols:
data[col] = pd.to_numeric(data[col], errors='coerce')
data.replace("", np.nan, inplace=True)
data = data.drop(columns=['Card Data', 'Model Card README'])
return data
def escape_html(val):
return escape(val) if isinstance(val, str) else val
df = load_data()
st.title('Bench1k: LLM Benchmarks & Evals Database')
st.subheader('Explore 1,327+ benchmarks. By default, sorted by # of downloads.')
st.write("Use the sidebar to apply filters.")
search_query = st.text_input("Search benchmarks by keyword")
task_ids = df['Task IDs'].dropna().unique().tolist()
selected_task_id = st.sidebar.multiselect('Filter by Task IDs', task_ids)
task_categories = df['Task Categories'].dropna().unique().tolist()
selected_task_category = st.sidebar.multiselect('Filter by Task Categories', task_categories)
licenses = df['Licenses'].dropna().unique().tolist()
selected_license = st.sidebar.multiselect('Filter by License', licenses)
min_likes, max_likes = int(df['Likes'].min(skipna=True)), int(df['Likes'].max(skipna=True))
selected_likes = st.sidebar.slider('Filter by Likes', min_likes, max_likes, (min_likes, max_likes))
min_size, max_size = df['Dataset Size (bytes)'].min(skipna=True), df['Dataset Size (bytes)'].max(skipna=True)
selected_size = st.sidebar.slider('Filter by Dataset Size (bytes)', min_size, max_size, (min_size, max_size))
min_examples, max_examples = df['Total Examples'].min(skipna=True), df['Total Examples'].max(skipna=True)
selected_examples = st.sidebar.slider('Filter by Total Examples', min_examples, max_examples, (min_examples, max_examples))
filtered_df = df
if search_query:
search_cols = df.select_dtypes(include=[object]).columns
filtered_df = filtered_df[filtered_df[search_cols].apply(lambda x: x.str.contains(search_query, case=False, na=False)).any(axis=1)]
if selected_task_id:
mask_task_id = filtered_df['Task IDs'].apply(lambda x: any(task_id.strip() in str(x).split(',') for task_id in selected_task_id))
filtered_df = filtered_df[mask_task_id]
if selected_task_category:
mask_task_category = filtered_df['Task Categories'].apply(lambda x: any(category.strip() in str(x).split(',') for category in selected_task_category))
filtered_df = filtered_df[mask_task_category]
if selected_license:
filtered_df = filtered_df[filtered_df['Licenses'].isin(selected_license)]
if selected_likes:
filtered_df = filtered_df[filtered_df['Likes'].between(selected_likes[0], selected_likes[1])]
if selected_size:
filtered_df = filtered_df[filtered_df['Dataset Size (bytes)'].between(selected_size[0], selected_size[1])]
if selected_examples:
filtered_df = filtered_df[filtered_df['Total Examples'].between(selected_examples[0], selected_examples[1])]
def clean_html_sensitive_content(val):
if isinstance(val, str):
val = ''.join(e for e in val if e.isalnum() or e in [' ', '-', '_'])
return val
filtered_df['Task IDs'] = filtered_df['Task IDs'].apply(clean_html_sensitive_content)
filtered_df['Task Categories'] = filtered_df['Task Categories'].apply(clean_html_sensitive_content)
st.dataframe(filtered_df, column_config=column_config, hide_index=True)
st.sidebar.info("Use the filters above to explore different aspects of the benchmark datasets.")
@st.cache_data
def convert_df(df):
return df.to_csv().encode('utf-8')
csv = convert_df(df)
st.download_button(
label="Download database as CSV",
data=csv,
file_name='bench1k_database_full.csv',
mime='text/csv',
)