|
import gradio as gr |
|
import pandas as pd |
|
|
|
|
|
UGI_COLS = [ |
|
'#P', 'Model', 'UGI π', 'Willingness π', 'Unruly', 'Internet', 'CrimeStats', 'Stories/Jokes', 'PolContro' |
|
] |
|
|
|
|
|
def load_leaderboard_data(csv_file_path): |
|
try: |
|
df = pd.read_csv(csv_file_path) |
|
|
|
df['Model'] = df.apply(lambda row: f'<a href="{row["Link"]}" target="_blank" style="color: blue; text-decoration: none;">{row["Model"]}</a>' if pd.notna(row["Link"]) else row["Model"], axis=1) |
|
|
|
df.drop(columns=['Link'], inplace=True) |
|
return df |
|
except Exception as e: |
|
print(f"Error loading CSV file: {e}") |
|
return pd.DataFrame(columns=UGI_COLS) |
|
|
|
|
|
def update_table(df: pd.DataFrame, query: str, param_ranges: dict) -> pd.DataFrame: |
|
filtered_df = df |
|
if any(param_ranges.values()): |
|
conditions = [] |
|
for param_range, checked in param_ranges.items(): |
|
if checked: |
|
if param_range == '~1.5': |
|
conditions.append((filtered_df['Params'] < 2.5)) |
|
elif param_range == '~3': |
|
conditions.append(((filtered_df['Params'] >= 2.5) & (filtered_df['Params'] < 6))) |
|
elif param_range == '~7': |
|
conditions.append(((filtered_df['Params'] >= 6) & (filtered_df['Params'] < 9.5))) |
|
elif param_range == '~13': |
|
conditions.append(((filtered_df['Params'] >= 9.5) & (filtered_df['Params'] < 16))) |
|
elif param_range == '~20': |
|
conditions.append(((filtered_df['Params'] >= 16) & (filtered_df['Params'] < 28))) |
|
elif param_range == '~34': |
|
conditions.append(((filtered_df['Params'] >= 28) & (filtered_df['Params'] < 40))) |
|
elif param_range == '~50': |
|
conditions.append(((filtered_df['Params'] >= 40) & (filtered_df['Params'] < 60))) |
|
elif param_range == '~70+': |
|
conditions.append((filtered_df['Params'] >= 60)) |
|
|
|
if all(param_ranges.values()): |
|
filtered_df = filtered_df[pd.concat(conditions, axis=1).any(axis=1) | filtered_df['Params'].isna()] |
|
else: |
|
filtered_df = filtered_df[pd.concat(conditions, axis=1).any(axis=1)] |
|
else: |
|
filtered_df = filtered_df[filtered_df['Params'].isna()] |
|
|
|
if query: |
|
filtered_df = filtered_df[filtered_df.apply(lambda row: query.lower() in row.to_string().lower(), axis=1)] |
|
|
|
return filtered_df[UGI_COLS] |
|
|
|
|
|
GraInter = gr.Blocks() |
|
|
|
with GraInter: |
|
gr.HTML("""<h1 align="center">UGI Leaderboard</h1>""") |
|
gr.Markdown(""" |
|
UGI: Uncensored General Intelligence. The average of 5 different subjects that LLMs are commonly steered away from. The leaderboard is made from roughly 60 questions overall, measuring both "willingness to answer" and "accuracy" in fact-based controversial questions. |
|
|
|
Willingness: A more narrow, 10-point score, solely measuring the LLM's willingness to answer controversial questions. |
|
|
|
Unruly: Knowledge of activities that are generally frowned upon. |
|
|
|
Internet: Knowledge of various internet information, from professional to deviant. |
|
|
|
CrimeStats: Knowledge of crime statistics which are uncomfortable to talk about. |
|
|
|
Stories/Jokes: Ability to write offensive stories and jokes. |
|
|
|
PolContro: Knowledge of politically/socially controversial information. |
|
""") |
|
with gr.Column(): |
|
with gr.Row(): |
|
search_bar = gr.Textbox(placeholder=" π Search for a model...", show_label=False, elem_id="search-bar") |
|
with gr.Row(): |
|
filter_columns_size = gr.CheckboxGroup( |
|
label="Model sizes (in billions of parameters)", |
|
choices=['~1.5', '~3', '~7', '~13', '~20', '~34', '~50', '~70+'], |
|
value=[], |
|
interactive=True, |
|
elem_id="filter-columns-size", |
|
) |
|
|
|
|
|
leaderboard_df = load_leaderboard_data("ugi-leaderboard-data.csv") |
|
|
|
|
|
datatypes = ['html' if col == 'Model' else 'str' for col in UGI_COLS] |
|
|
|
leaderboard_table = gr.Dataframe( |
|
value=leaderboard_df[UGI_COLS], |
|
datatype=datatypes, |
|
interactive=False, |
|
visible=True, |
|
elem_classes="text-sm" |
|
) |
|
|
|
|
|
inputs = [ |
|
search_bar, |
|
filter_columns_size |
|
] |
|
|
|
outputs = leaderboard_table |
|
|
|
search_bar.change( |
|
fn=lambda query, param_ranges: update_table(leaderboard_df, query, dict(zip(['~1.5', '~3', '~7', '~13', '~20', '~34', '~50', '~70+'], param_ranges))), |
|
inputs=inputs, |
|
outputs=outputs |
|
) |
|
|
|
filter_columns_size.change( |
|
fn=lambda query, param_ranges: update_table(leaderboard_df, query, dict(zip(['~1.5', '~3', '~7', '~13', '~20', '~34', '~50', '~70+'], param_ranges))), |
|
inputs=inputs, |
|
outputs=outputs |
|
) |
|
|
|
|
|
GraInter.launch() |