Spaces:

omlab
/

open-agent-leaderboard

Running

App Files Files Community

qq-hzlh commited on 8 days ago

Commit

e90e797

1 Parent(s): 8d8059b

fix filter bug

Browse files

Files changed (2) hide show

app.py +47 -43
gen_table.py +44 -36

app.py CHANGED Viewed

@@ -7,32 +7,32 @@ from meta_data import *
 # import pandas as pd
 # pd.set_option('display.max_colwidth', 0)
-head_style = """
-<style>
-@media (min-width: 1536px)
-{
-    .gradio-container {
-        min-width: var(--size-full) !important;
-    }
-}
-/* 添加复选框样式 */
-.gr-checkbox {
-    accent-color: rgb(59, 130, 246) !important;  /* 蓝色 */
-}
-.gr-checkbox-group label input[type="checkbox"] {
-    accent-color: rgb(59, 130, 246) !important;
-}
-.gr-checkbox-group input[type="checkbox"]:checked {
-    background-color: rgb(59, 130, 246) !important;
-    border-color: rgb(59, 130, 246) !important;
-}
-</style>
-"""
-with gr.Blocks(title="Open Agent Leaderboard", head=head_style) as demo:
     struct = load_results(OVERALL_MATH_SCORE_FILE)
     timestamp = struct['time']
     EVAL_TIME = format_timestamp(timestamp)
@@ -51,7 +51,7 @@ with gr.Blocks(title="Open Agent Leaderboard", head=head_style) as demo:
         with gr.Tab(label='🏅 Open Agent Overall Math Leaderboard'):
             gr.Markdown(LEADERBOARD_MD['MATH_MAIN'])
             check_box = BUILD_L1_DF(results, DEFAULT_MATH_BENCH)
-            table = generate_table(results, DEFAULT_MATH_BENCH)
             type_map = check_box['type_map']
             type_map['Rank'] = 'number'
@@ -63,12 +63,11 @@ with gr.Blocks(title="Open Agent Leaderboard", head=head_style) as demo:
                 interactive=True,
             )
-            # 修改这里：确保初始显示的列都存在于表格中
             initial_headers = ['Rank'] + check_box['essential'] + checkbox_group.value
-            available_headers = [h for h in initial_headers if h in table.columns]
             data_component = gr.components.DataFrame(
-                value=table[available_headers],  # 只使用可用的列
                 type='pandas',
                 datatype=[type_map[x] for x in available_headers],
                 interactive=False,
@@ -77,17 +76,21 @@ with gr.Blocks(title="Open Agent Leaderboard", head=head_style) as demo:
             def filter_df(fields, *args):
                 headers = ['Rank'] + check_box['essential'] + fields
-                df = table.copy()
                 # 确保所有请求的列都存在
-                available_headers = [h for h in headers if h in df.columns]
                 # 如果没有可用的列，返回一个带有基本列的空DataFrame
                 if not available_headers:
                     available_headers = ['Rank'] + check_box['essential']
                 comp = gr.components.DataFrame(
-                    value=df[available_headers],
                     type='pandas',
                     datatype=[type_map[x] for x in available_headers],
                     interactive=False,
@@ -95,7 +98,6 @@ with gr.Blocks(title="Open Agent Leaderboard", head=head_style) as demo:
                     visible=True)
                 return comp
-            # checkbox_group的change事件只需要传入checkbox_group
             checkbox_group.change(
                 fn=filter_df,
                 inputs=[checkbox_group],
@@ -111,7 +113,6 @@ with gr.Blocks(title="Open Agent Leaderboard", head=head_style) as demo:
             table, check_box = BUILD_L2_DF(results_detail, DEFAULT_MATH_BENCH)
             # table = generate_table_detail(results_detail, DEFAULT_MATH_BENCH)
             type_map = check_box['type_map']
             type_map['Rank'] = 'number'
@@ -154,8 +155,8 @@ with gr.Blocks(title="Open Agent Leaderboard", head=head_style) as demo:
                 wrap=True,
                 visible=True)
-            def filter_df(fields, algos, datasets, llms):
-                headers = ['Rank'] + check_box['essential'] + fields
                 df = table.copy()
                 # 过滤数据
@@ -176,6 +177,9 @@ with gr.Blocks(title="Open Agent Leaderboard", head=head_style) as demo:
                     # 确保排名为整数
                     df['Rank'] = df['Rank'].astype(int)
                 comp = gr.components.DataFrame(
                     value=df[headers],
                     type='pandas',
@@ -187,25 +191,25 @@ with gr.Blocks(title="Open Agent Leaderboard", head=head_style) as demo:
             # 为所有复选框组添加change事件
             checkbox_group.change(
-                fn=filter_df,
                 inputs=[checkbox_group, algo_name, dataset_name, llm_name],
                 outputs=data_component
             )
             algo_name.change(
-                fn=filter_df,
                 inputs=[checkbox_group, algo_name, dataset_name, llm_name],
                 outputs=data_component
             )
             dataset_name.change(
-                fn=filter_df,
                 inputs=[checkbox_group, algo_name, dataset_name, llm_name],
                 outputs=data_component
             )
             llm_name.change(
-                fn=filter_df,
                 inputs=[checkbox_group, algo_name, dataset_name, llm_name],
                 outputs=data_component
             )

 # import pandas as pd
 # pd.set_option('display.max_colwidth', 0)
+# head_style = """
+# <style>
+# @media (min-width: 1536px)
+# {
+#     .gradio-container {
+#         min-width: var(--size-full) !important;
+#     }
+# }
+# /* 添加复选框样式 */
+# .gr-checkbox {
+#     accent-color: rgb(59, 130, 246) !important;  /* 蓝色 */
+# }
+# .gr-checkbox-group label input[type="checkbox"] {
+#     accent-color: rgb(59, 130, 246) !important;
+# }
+# .gr-checkbox-group input[type="checkbox"]:checked {
+#     background-color: rgb(59, 130, 246) !important;
+#     border-color: rgb(59, 130, 246) !important;
+# }
+# </style>
+# """
+with gr.Blocks(title="Open Agent Leaderboard") as demo:
     struct = load_results(OVERALL_MATH_SCORE_FILE)
     timestamp = struct['time']
     EVAL_TIME = format_timestamp(timestamp)
         with gr.Tab(label='🏅 Open Agent Overall Math Leaderboard'):
             gr.Markdown(LEADERBOARD_MD['MATH_MAIN'])
             check_box = BUILD_L1_DF(results, DEFAULT_MATH_BENCH)
+            overall_table = generate_table(results, DEFAULT_MATH_BENCH)
             type_map = check_box['type_map']
             type_map['Rank'] = 'number'
                 interactive=True,
             )
             initial_headers = ['Rank'] + check_box['essential'] + checkbox_group.value
+            available_headers = [h for h in initial_headers if h in overall_table.columns]
             data_component = gr.components.DataFrame(
+                value=overall_table[available_headers],
                 type='pandas',
                 datatype=[type_map[x] for x in available_headers],
                 interactive=False,
             def filter_df(fields, *args):
                 headers = ['Rank'] + check_box['essential'] + fields
+                # df = overall_table.copy()
                 # 确保所有请求的列都存在
+                available_headers = [h for h in headers if h in overall_table.columns]
+                original_columns = overall_table.columns.tolist()
+                available_headers = sorted(available_headers, key=lambda x: original_columns.index(x))
                 # 如果没有可用的列，返回一个带有基本列的空DataFrame
                 if not available_headers:
                     available_headers = ['Rank'] + check_box['essential']
                 comp = gr.components.DataFrame(
+                    value=overall_table[available_headers],
                     type='pandas',
                     datatype=[type_map[x] for x in available_headers],
                     interactive=False,
                     visible=True)
                 return comp
             checkbox_group.change(
                 fn=filter_df,
                 inputs=[checkbox_group],
             table, check_box = BUILD_L2_DF(results_detail, DEFAULT_MATH_BENCH)
             # table = generate_table_detail(results_detail, DEFAULT_MATH_BENCH)
             type_map = check_box['type_map']
             type_map['Rank'] = 'number'
                 wrap=True,
                 visible=True)
+            def filter_df2(fields, algos, datasets, llms):
+                headers =  ['Rank'] + fields
                 df = table.copy()
                 # 过滤数据
                     # 确保排名为整数
                     df['Rank'] = df['Rank'].astype(int)
+                original_columns = df.columns.tolist()
+                headers = sorted(headers, key=lambda x: original_columns.index(x))
                 comp = gr.components.DataFrame(
                     value=df[headers],
                     type='pandas',
             # 为所有复选框组添加change事件
             checkbox_group.change(
+                fn=filter_df2,
                 inputs=[checkbox_group, algo_name, dataset_name, llm_name],
                 outputs=data_component
             )
             algo_name.change(
+                fn=filter_df2,
                 inputs=[checkbox_group, algo_name, dataset_name, llm_name],
                 outputs=data_component
             )
             dataset_name.change(
+                fn=filter_df2,
                 inputs=[checkbox_group, algo_name, dataset_name, llm_name],
                 outputs=data_component
             )
             llm_name.change(
+                fn=filter_df2,
                 inputs=[checkbox_group, algo_name, dataset_name, llm_name],
                 outputs=data_component
             )

gen_table.py CHANGED Viewed

@@ -33,15 +33,33 @@ def nth_large(val, vals):
 def BUILD_L1_DF(results, fields):
     check_box = {}
     check_box['essential'] = ['Algorithm', 'LLM', 'Eval Date']
-    # 修改这里：确保列名与generate_table函数生成的列名一致
-    check_box['required'] = ['Avg Score'] + [f for field in fields for f in [f'{field}-Score', f'{field}-Cost($)']]
-    check_box['avg'] = ['Avg Score']
-    check_box['all'] = check_box['avg'] + [f for field in fields for f in [f'{field}-Score', f'{field}-Cost($)']]
     type_map = defaultdict(lambda: 'number')
     type_map['Algorithm'] = 'html'
     type_map['LLM'] = type_map['Vision Model'] = 'html'
     type_map['Eval Date'] = 'str'
     check_box['type_map'] = type_map
     return check_box
@@ -115,58 +133,48 @@ def generate_table(results, fields):
         for k in META_FIELDS:
             res[k].append(meta[k])
         scores, costs = [], []
         for d in fields:
-            if d in item.keys():
-                res[d+"-Score"].append(item[d]["Score"])
-                res[d+"-Cost($)"].append(item[d]["Cost($)"])
-                scores.append(item[d]["Score"])
-                costs.append(item[d]["Cost($)"])
             else:
-                res[d+"-Score"].append(None)
-                res[d+"-Cost($)"].append(None)
-                scores.append(None)
-                costs.append(None)
-        res['Avg Score'].append(round(np.mean(scores), 2) if None not in scores else None)
     df = pd.DataFrame(res)
-    # Sort by Avg Score and assign rank
     valid = df[~pd.isna(df['Avg Score'])].copy()
     missing = df[pd.isna(df['Avg Score'])].copy()
-    # Assign rank to valid rows (using integer type)
     valid = valid.sort_values('Avg Score', ascending=False)
-    valid['Rank'] = pd.Series(range(1, len(valid) + 1)[::-1], dtype=int)
-    # Assign last rank to missing rows (using integer type)
     if not missing.empty:
-        missing['Rank'] = pd.Series([len(valid) + 1] * len(missing), dtype=int)
-    # Merge and sort by Rank
     df = pd.concat([valid, missing])
     df = df.sort_values('Rank')
-    # Rearrange column order to ensure Rank is the first column
-    columns = ['Rank', 'Algorithm', 'LLM', 'Eval Date', 'Avg Score']  # Fixed column order
     for d in fields:
-        columns.extend([f"{d}-Score", f"{d}-Cost($)"])  # Add dataset-related columns
-    # Ensure all columns exist and reorder
     existing_columns = [col for col in columns if col in df.columns]
-    remaining_columns = [col for col in df.columns if col not in columns]
-    df = df[existing_columns + remaining_columns]  # Reorder columns
-    # Sort by Score in descending order
-    df = df.sort_values(['Avg Score'], ascending=[False])
-    # Add rank for each dataset separately
-    df['Rank'] = range(1, len(df) + 1)
-    # Rearrange column order
-    columns = ['Rank', 'Algorithm', 'LLM', 'Eval Date', 'Avg Score']
-    remaining_columns = [col for col in df.columns if col not in columns]
-    df = df[columns + remaining_columns]
     return df

 def BUILD_L1_DF(results, fields):
     check_box = {}
     check_box['essential'] = ['Algorithm', 'LLM', 'Eval Date']
+    # 首先检查实际的数据结构中有哪些列
+    sample_data = next(iter(results.values()))
+    available_fields = []
+    for field in fields:
+        if field in sample_data:
+            available_fields.append(field)
+    # 构建列名，确保与generate_table函数中的列名完全一致
+    score_columns = [f"{field}-Score" for field in available_fields]
+    cost_columns = [f"{field}-Cost($)" for field in available_fields]
+    combined_columns = score_columns + cost_columns
+    combined_columns_sorted = sorted(combined_columns, key=lambda x: x.split('-')[0])
+    check_box['required'] = ['Avg Score'] + combined_columns_sorted
+    check_box['all'] = ['Avg Score'] + combined_columns_sorted
     type_map = defaultdict(lambda: 'number')
     type_map['Algorithm'] = 'html'
     type_map['LLM'] = type_map['Vision Model'] = 'html'
     type_map['Eval Date'] = 'str'
+    type_map['Avg Score'] = 'number'
+    type_map['gsm8k-Score'] = 'number'
+    type_map['AQuA-Score'] = 'number'
+    type_map['gsm8k-Cost($)'] = 'number'
+    type_map['AQuA-Cost($)'] = 'number'
     check_box['type_map'] = type_map
     return check_box
         for k in META_FIELDS:
             res[k].append(meta[k])
         scores, costs = [], []
+        # 确保列名格式与BUILD_L1_DF中的一致
         for d in fields:
+            if d in item:
+                score = item[d].get("Score")
+                cost = item[d].get("Cost($)")
+                res[f"{d}-Score"].append(score)
+                res[f"{d}-Cost($)"].append(cost)
+                if score is not None:
+                    scores.append(score)
+                if cost is not None:
+                    costs.append(cost)
             else:
+                res[f"{d}-Score"].append(None)
+                res[f"{d}-Cost($)"].append(None)
+        # 计算平均分
+        res['Avg Score'].append(round(np.mean(scores), 2) if scores else None)
     df = pd.DataFrame(res)
+    # 排序和排名逻辑保持不变
     valid = df[~pd.isna(df['Avg Score'])].copy()
     missing = df[pd.isna(df['Avg Score'])].copy()
     valid = valid.sort_values('Avg Score', ascending=False)
+    valid['Rank'] = range(1, len(valid) + 1)
     if not missing.empty:
+        missing['Rank'] = len(valid) + 1
     df = pd.concat([valid, missing])
     df = df.sort_values('Rank')
+    # 重新排列列顺序
+    columns = ['Rank', 'Algorithm', 'LLM', 'Eval Date', 'Avg Score']
     for d in fields:
+        columns.extend([f"{d}-Score", f"{d}-Cost($)"])
     existing_columns = [col for col in columns if col in df.columns]
+    df = df[existing_columns]
     return df