Spaces:
Running
Running
import abc | |
import gradio as gr | |
from gen_table import * | |
from meta_data import * | |
# import pandas as pd | |
# pd.set_option('display.max_colwidth', 0) | |
# head_style = """ | |
# <style> | |
# @media (min-width: 1536px) | |
# { | |
# .gradio-container { | |
# min-width: var(--size-full) !important; | |
# } | |
# } | |
# /* Add checkbox styles */ | |
# .gr-checkbox { | |
# accent-color: rgb(59, 130, 246) !important; /* blue */ | |
# } | |
# .gr-checkbox-group label input[type="checkbox"] { | |
# accent-color: rgb(59, 130, 246) !important; | |
# } | |
# .gr-checkbox-group input[type="checkbox"]:checked { | |
# background-color: rgb(59, 130, 246) !important; | |
# border-color: rgb(59, 130, 246) !important; | |
# } | |
# </style> | |
# """ | |
with gr.Blocks(title="Open Agent Leaderboard") as demo: | |
struct = load_results(OVERALL_MATH_SCORE_FILE) | |
timestamp = struct['time'] | |
EVAL_TIME = format_timestamp(timestamp) | |
results = struct['results'] | |
N_MODEL = len(results) | |
N_DATA = len(results['IO']) | |
DATASETS = list(results['IO']) | |
DATASETS.remove('META') | |
print(DATASETS) | |
with gr.Tabs() as tabs: | |
gr.Markdown(LEADERBORAD_INTRODUCTION.format(EVAL_TIME)) | |
with gr.Tab(label='🏅 Open Agent Overall Math Leaderboard'): | |
gr.Markdown(LEADERBOARD_MD['MATH_MAIN']) | |
check_box = BUILD_L1_DF(results, DEFAULT_MATH_BENCH) | |
overall_table = generate_table(results, DEFAULT_MATH_BENCH) | |
type_map = check_box['type_map'] | |
type_map['Rank'] = 'number' | |
checkbox_group = gr.CheckboxGroup( | |
choices=check_box['all'], | |
value=check_box['required'], | |
label='Evaluation Dimension', | |
interactive=True, | |
) | |
initial_headers = ['Rank'] + check_box['essential'] + checkbox_group.value | |
available_headers = [h for h in initial_headers if h in overall_table.columns] | |
data_component = gr.components.DataFrame( | |
value=overall_table[available_headers], | |
type='pandas', | |
datatype=[type_map[x] for x in available_headers], | |
interactive=False, | |
wrap=True, | |
visible=True) | |
def filter_df(fields, *args): | |
headers = ['Rank'] + check_box['essential'] + fields | |
# df = overall_table.copy() | |
# Ensure all requested columns exist | |
available_headers = [h for h in headers if h in overall_table.columns] | |
original_columns = overall_table.columns.tolist() | |
available_headers = sorted(available_headers, key=lambda x: original_columns.index(x)) | |
# If no columns are available, return an empty DataFrame with basic columns | |
if not available_headers: | |
available_headers = ['Rank'] + check_box['essential'] | |
comp = gr.components.DataFrame( | |
value=overall_table[available_headers], | |
type='pandas', | |
datatype=[type_map[x] for x in available_headers], | |
interactive=False, | |
wrap=True, | |
visible=True) | |
return comp | |
checkbox_group.change( | |
fn=filter_df, | |
inputs=[checkbox_group], | |
outputs=data_component | |
) | |
with gr.Tab(label='🏅 Open Agent Detail Math Leaderboard'): | |
gr.Markdown(LEADERBOARD_MD['MATH_DETAIL']) | |
struct_detail = load_results(DETAIL_MATH_SCORE_FILE) | |
timestamp = struct_detail['time'] | |
EVAL_TIME = format_timestamp(timestamp) | |
results_detail = struct_detail['results'] | |
table, check_box = BUILD_L2_DF(results_detail, DEFAULT_MATH_BENCH) | |
# table = generate_table_detail(results_detail, DEFAULT_MATH_BENCH) | |
type_map = check_box['type_map'] | |
type_map['Rank'] = 'number' | |
checkbox_group = gr.CheckboxGroup( | |
choices=check_box['all'], | |
value=check_box['required'], | |
label='Evaluation Dimension', | |
interactive=True, | |
) | |
headers = ['Rank'] + checkbox_group.value | |
with gr.Row(): | |
algo_name = gr.CheckboxGroup( | |
choices=ALGORITHMS, | |
value=ALGORITHMS, | |
label='Algorithm', | |
interactive=True | |
) | |
dataset_name = gr.CheckboxGroup( | |
choices=DATASETS, | |
value=DATASETS, | |
label='Datasets', | |
interactive=True | |
) | |
llm_name = gr.CheckboxGroup( | |
choices=LLM, | |
value=LLM, | |
label='LLM', | |
interactive=True | |
) | |
data_component = gr.components.DataFrame( | |
value=table[headers], | |
type='pandas', | |
datatype=[type_map[x] for x in headers], | |
interactive=False, | |
wrap=True, | |
visible=True) | |
def filter_df2(fields, algos, datasets, llms): | |
headers = ['Rank'] + fields | |
df = table.copy() | |
# Filter data | |
df['flag'] = df.apply(lambda row: ( | |
row['Algorithm'] in algos and | |
row['Dataset'] in datasets and | |
row['LLM'] in llms | |
), axis=1) | |
df = df[df['flag']].copy() | |
df.pop('flag') | |
# Group by dataset and calculate ranking within each group based on Score | |
if 'Score' in df.columns: | |
# Create a temporary ranking column | |
df['Rank'] = df.groupby('Dataset')['Score'].rank(method='first', ascending=False) | |
# Ensure ranking is integer | |
df['Rank'] = df['Rank'].astype(int) | |
original_columns = df.columns.tolist() | |
headers = sorted(headers, key=lambda x: original_columns.index(x)) | |
comp = gr.components.DataFrame( | |
value=df[headers], | |
type='pandas', | |
datatype=[type_map[x] for x in headers], | |
interactive=False, | |
wrap=True, | |
visible=True) | |
return comp | |
# 为所有复选框组添加change事件 | |
checkbox_group.change( | |
fn=filter_df2, | |
inputs=[checkbox_group, algo_name, dataset_name, llm_name], | |
outputs=data_component | |
) | |
algo_name.change( | |
fn=filter_df2, | |
inputs=[checkbox_group, algo_name, dataset_name, llm_name], | |
outputs=data_component | |
) | |
dataset_name.change( | |
fn=filter_df2, | |
inputs=[checkbox_group, algo_name, dataset_name, llm_name], | |
outputs=data_component | |
) | |
llm_name.change( | |
fn=filter_df2, | |
inputs=[checkbox_group, algo_name, dataset_name, llm_name], | |
outputs=data_component | |
) | |
with gr.Row(): | |
with gr.Accordion("📙 Citation", open=False): | |
gr.Textbox( | |
value=CITATION_BUTTON_TEXT, lines=7, | |
label="Copy the BibTeX snippet to cite this source", | |
elem_id="citation-button", | |
show_copy_button=True, | |
) | |
if __name__ == '__main__': | |
demo.launch(server_name='0.0.0.0') |