Spaces:

omlab
/

open-agent-leaderboard

Running

File size: 7,015 Bytes

import abc
import gradio as gr

from gen_table import *
from meta_data import *

# import pandas as pd
# pd.set_option('display.max_colwidth', 0)

head_style = """
<style>
@media (min-width: 1536px)
{
    .gradio-container {
        min-width: var(--size-full) !important;
    }
}
</style>
"""

with gr.Blocks(title="Open Agent Leaderboard", head=head_style) as demo:
    struct = load_results(OVERALL_MATH_SCORE_FILE)
    timestamp = struct['time']
    EVAL_TIME = format_timestamp(timestamp)
    results = struct['results']
    N_MODEL = len(results)
    N_DATA = len(results['IO'])
    DATASETS = list(results['IO'])
    DATASETS.remove('META')
    print(DATASETS)

    
    
    with gr.Tabs(elem_classes='tab-buttons') as tabs:
        gr.Markdown(LEADERBORAD_INTRODUCTION.format(EVAL_TIME))

        with gr.TabItem('🏅 Open Agent Overall Math Leaderboard', elem_id='math', id=0):
            gr.Markdown(LEADERBOARD_MD['MATH_MAIN'])
            check_box = BUILD_L1_DF(results, DEFAULT_MATH_BENCH)
            table = generate_table(results, DEFAULT_MATH_BENCH)

            type_map = check_box['type_map']
            type_map['Rank'] = 'number'

            checkbox_group = gr.CheckboxGroup(
                choices=check_box['all'],
                value=check_box['required'],
                label='Evaluation Dimension',
                interactive=True,
            )

            headers = ['Rank'] + check_box['essential'] + checkbox_group.value
            data_component = gr.components.DataFrame(
                value=table[headers],
                type='pandas',
                datatype=[type_map[x] for x in headers],
                interactive=False,
                wrap=True,
                visible=True)
            
            def filter_df(fields, *args):
                # 获取基础列和选中的列
                headers = ['Rank'] + check_box['essential'] + fields
                df = table.copy()

                comp = gr.components.DataFrame(
                    value=table[headers],  # 只显示选中的列
                    type='pandas',
                    datatype=[type_map[x] for x in headers],
                    interactive=False,
                    wrap=True, 
                    visible=True)
                return comp

            # checkbox_group的change事件只需要传入checkbox_group
            checkbox_group.change(
                fn=filter_df, 
                inputs=[checkbox_group], 
                outputs=data_component
            )

        # detail math leaderboard
        with gr.TabItem('🏅 Open Agent Detail Math Leaderboard', elem_id='math_detail', id=1):
            gr.Markdown(LEADERBOARD_MD['MATH_DETAIL'])
            struct_detail = load_results(DETAIL_MATH_SCORE_FILE)
            timestamp = struct_detail['time']
            EVAL_TIME = format_timestamp(timestamp)
            results_detail = struct_detail['results']

            table, check_box = BUILD_L2_DF(results_detail, DEFAULT_MATH_BENCH)
            # table = generate_table_detail(results_detail, DEFAULT_MATH_BENCH)

            type_map = check_box['type_map']
            type_map['Rank'] = 'number'

            checkbox_group = gr.CheckboxGroup(
                choices=check_box['all'],
                value=check_box['required'],
                label='Evaluation Dimension',
                interactive=True,
            )

            headers = ['Rank'] + checkbox_group.value
            with gr.Row():

                algo_name = gr.CheckboxGroup(
                        choices=ALGORITHMS,
                        value=ALGORITHMS,
                        label='Algorithm',
                        interactive=True
                    )

                dataset_name = gr.CheckboxGroup(
                        choices=DATASETS,
                        value=DATASETS,
                        label='Datasets',
                        interactive=True
                    )

                llm_name = gr.CheckboxGroup(
                        choices=LLM,
                        value=LLM,
                        label='LLM',
                        interactive=True
                    )
                
            data_component = gr.components.DataFrame(
                value=table[headers],
                type='pandas',
                datatype=[type_map[x] for x in headers],
                interactive=False,
                wrap=True,
                visible=True)
            
            def filter_df(fields, algos, datasets, llms):
                headers = ['Rank'] + check_box['essential'] + fields
                df = table.copy()
                
                # 过滤数据
                df['flag'] = df.apply(lambda row: (
                    row['Algorithm'] in algos and 
                    row['Dataset'] in datasets and 
                    row['LLM'] in llms
                ), axis=1)
                
                df = df[df['flag']].copy()
                df.pop('flag')
                
                # 按数据集分组，在每个组内根据Score排序并计算排名
                if 'Score' in df.columns:
                    # 创建一个临时的排名列
                    df['Rank'] = df.groupby('Dataset')['Score'].rank(method='first', ascending=False)
                    
                    # 确保排名为整数
                    df['Rank'] = df['Rank'].astype(int)
                
                comp = gr.components.DataFrame(
                    value=df[headers],
                    type='pandas',
                    datatype=[type_map[x] for x in headers],
                    interactive=False,
                    wrap=True, 
                    visible=True)
                return comp

            # 为所有复选框组添加change事件
            checkbox_group.change(
                fn=filter_df, 
                inputs=[checkbox_group, algo_name, dataset_name, llm_name], 
                outputs=data_component
            )
            
            algo_name.change(
                fn=filter_df,
                inputs=[checkbox_group, algo_name, dataset_name, llm_name],
                outputs=data_component
            )
            
            dataset_name.change(
                fn=filter_df,
                inputs=[checkbox_group, algo_name, dataset_name, llm_name],
                outputs=data_component
            )
            
            llm_name.change(
                fn=filter_df,
                inputs=[checkbox_group, algo_name, dataset_name, llm_name],
                outputs=data_component
            )


    with gr.Row():
        with gr.Accordion("📙 Citation", open=False):
            gr.Textbox(
                value=CITATION_BUTTON_TEXT, lines=7,
                label="Copy the BibTeX snippet to cite this source",
                elem_id="citation-button",
                show_copy_button=True,
            )

            
if __name__ == '__main__':
    demo.launch(server_name='0.0.0.0')