qq-hzlh commited on
Commit
e90e797
·
1 Parent(s): 8d8059b

fix filter bug

Browse files
Files changed (2) hide show
  1. app.py +47 -43
  2. gen_table.py +44 -36
app.py CHANGED
@@ -7,32 +7,32 @@ from meta_data import *
7
  # import pandas as pd
8
  # pd.set_option('display.max_colwidth', 0)
9
 
10
- head_style = """
11
- <style>
12
- @media (min-width: 1536px)
13
- {
14
- .gradio-container {
15
- min-width: var(--size-full) !important;
16
- }
17
- }
18
-
19
- /* 添加复选框样式 */
20
- .gr-checkbox {
21
- accent-color: rgb(59, 130, 246) !important; /* 蓝色 */
22
- }
23
-
24
- .gr-checkbox-group label input[type="checkbox"] {
25
- accent-color: rgb(59, 130, 246) !important;
26
- }
27
-
28
- .gr-checkbox-group input[type="checkbox"]:checked {
29
- background-color: rgb(59, 130, 246) !important;
30
- border-color: rgb(59, 130, 246) !important;
31
- }
32
- </style>
33
- """
34
-
35
- with gr.Blocks(title="Open Agent Leaderboard", head=head_style) as demo:
36
  struct = load_results(OVERALL_MATH_SCORE_FILE)
37
  timestamp = struct['time']
38
  EVAL_TIME = format_timestamp(timestamp)
@@ -51,7 +51,7 @@ with gr.Blocks(title="Open Agent Leaderboard", head=head_style) as demo:
51
  with gr.Tab(label='🏅 Open Agent Overall Math Leaderboard'):
52
  gr.Markdown(LEADERBOARD_MD['MATH_MAIN'])
53
  check_box = BUILD_L1_DF(results, DEFAULT_MATH_BENCH)
54
- table = generate_table(results, DEFAULT_MATH_BENCH)
55
 
56
  type_map = check_box['type_map']
57
  type_map['Rank'] = 'number'
@@ -63,12 +63,11 @@ with gr.Blocks(title="Open Agent Leaderboard", head=head_style) as demo:
63
  interactive=True,
64
  )
65
 
66
- # 修改这里:确保初始显示的列都存在于表格中
67
  initial_headers = ['Rank'] + check_box['essential'] + checkbox_group.value
68
- available_headers = [h for h in initial_headers if h in table.columns]
69
-
70
  data_component = gr.components.DataFrame(
71
- value=table[available_headers], # 只使用可用的列
72
  type='pandas',
73
  datatype=[type_map[x] for x in available_headers],
74
  interactive=False,
@@ -77,17 +76,21 @@ with gr.Blocks(title="Open Agent Leaderboard", head=head_style) as demo:
77
 
78
  def filter_df(fields, *args):
79
  headers = ['Rank'] + check_box['essential'] + fields
80
- df = table.copy()
81
-
82
  # 确保所有请求的列都存在
83
- available_headers = [h for h in headers if h in df.columns]
 
 
 
 
84
 
85
  # 如果没有可用的列,返回一个带有基本列的空DataFrame
86
  if not available_headers:
87
  available_headers = ['Rank'] + check_box['essential']
88
 
89
  comp = gr.components.DataFrame(
90
- value=df[available_headers],
91
  type='pandas',
92
  datatype=[type_map[x] for x in available_headers],
93
  interactive=False,
@@ -95,7 +98,6 @@ with gr.Blocks(title="Open Agent Leaderboard", head=head_style) as demo:
95
  visible=True)
96
  return comp
97
 
98
- # checkbox_group的change事件只需要传入checkbox_group
99
  checkbox_group.change(
100
  fn=filter_df,
101
  inputs=[checkbox_group],
@@ -111,7 +113,6 @@ with gr.Blocks(title="Open Agent Leaderboard", head=head_style) as demo:
111
 
112
  table, check_box = BUILD_L2_DF(results_detail, DEFAULT_MATH_BENCH)
113
  # table = generate_table_detail(results_detail, DEFAULT_MATH_BENCH)
114
-
115
  type_map = check_box['type_map']
116
  type_map['Rank'] = 'number'
117
 
@@ -154,8 +155,8 @@ with gr.Blocks(title="Open Agent Leaderboard", head=head_style) as demo:
154
  wrap=True,
155
  visible=True)
156
 
157
- def filter_df(fields, algos, datasets, llms):
158
- headers = ['Rank'] + check_box['essential'] + fields
159
  df = table.copy()
160
 
161
  # 过滤数据
@@ -176,6 +177,9 @@ with gr.Blocks(title="Open Agent Leaderboard", head=head_style) as demo:
176
  # 确保排名为整数
177
  df['Rank'] = df['Rank'].astype(int)
178
 
 
 
 
179
  comp = gr.components.DataFrame(
180
  value=df[headers],
181
  type='pandas',
@@ -187,25 +191,25 @@ with gr.Blocks(title="Open Agent Leaderboard", head=head_style) as demo:
187
 
188
  # 为所有复选框组添加change事件
189
  checkbox_group.change(
190
- fn=filter_df,
191
  inputs=[checkbox_group, algo_name, dataset_name, llm_name],
192
  outputs=data_component
193
  )
194
 
195
  algo_name.change(
196
- fn=filter_df,
197
  inputs=[checkbox_group, algo_name, dataset_name, llm_name],
198
  outputs=data_component
199
  )
200
 
201
  dataset_name.change(
202
- fn=filter_df,
203
  inputs=[checkbox_group, algo_name, dataset_name, llm_name],
204
  outputs=data_component
205
  )
206
 
207
  llm_name.change(
208
- fn=filter_df,
209
  inputs=[checkbox_group, algo_name, dataset_name, llm_name],
210
  outputs=data_component
211
  )
 
7
  # import pandas as pd
8
  # pd.set_option('display.max_colwidth', 0)
9
 
10
+ # head_style = """
11
+ # <style>
12
+ # @media (min-width: 1536px)
13
+ # {
14
+ # .gradio-container {
15
+ # min-width: var(--size-full) !important;
16
+ # }
17
+ # }
18
+
19
+ # /* 添加复选框样式 */
20
+ # .gr-checkbox {
21
+ # accent-color: rgb(59, 130, 246) !important; /* 蓝色 */
22
+ # }
23
+
24
+ # .gr-checkbox-group label input[type="checkbox"] {
25
+ # accent-color: rgb(59, 130, 246) !important;
26
+ # }
27
+
28
+ # .gr-checkbox-group input[type="checkbox"]:checked {
29
+ # background-color: rgb(59, 130, 246) !important;
30
+ # border-color: rgb(59, 130, 246) !important;
31
+ # }
32
+ # </style>
33
+ # """
34
+
35
+ with gr.Blocks(title="Open Agent Leaderboard") as demo:
36
  struct = load_results(OVERALL_MATH_SCORE_FILE)
37
  timestamp = struct['time']
38
  EVAL_TIME = format_timestamp(timestamp)
 
51
  with gr.Tab(label='🏅 Open Agent Overall Math Leaderboard'):
52
  gr.Markdown(LEADERBOARD_MD['MATH_MAIN'])
53
  check_box = BUILD_L1_DF(results, DEFAULT_MATH_BENCH)
54
+ overall_table = generate_table(results, DEFAULT_MATH_BENCH)
55
 
56
  type_map = check_box['type_map']
57
  type_map['Rank'] = 'number'
 
63
  interactive=True,
64
  )
65
 
 
66
  initial_headers = ['Rank'] + check_box['essential'] + checkbox_group.value
67
+ available_headers = [h for h in initial_headers if h in overall_table.columns]
68
+
69
  data_component = gr.components.DataFrame(
70
+ value=overall_table[available_headers],
71
  type='pandas',
72
  datatype=[type_map[x] for x in available_headers],
73
  interactive=False,
 
76
 
77
  def filter_df(fields, *args):
78
  headers = ['Rank'] + check_box['essential'] + fields
79
+ # df = overall_table.copy()
80
+
81
  # 确保所有请求的列都存在
82
+ available_headers = [h for h in headers if h in overall_table.columns]
83
+
84
+ original_columns = overall_table.columns.tolist()
85
+ available_headers = sorted(available_headers, key=lambda x: original_columns.index(x))
86
+
87
 
88
  # 如果没有可用的列,返回一个带有基本列的空DataFrame
89
  if not available_headers:
90
  available_headers = ['Rank'] + check_box['essential']
91
 
92
  comp = gr.components.DataFrame(
93
+ value=overall_table[available_headers],
94
  type='pandas',
95
  datatype=[type_map[x] for x in available_headers],
96
  interactive=False,
 
98
  visible=True)
99
  return comp
100
 
 
101
  checkbox_group.change(
102
  fn=filter_df,
103
  inputs=[checkbox_group],
 
113
 
114
  table, check_box = BUILD_L2_DF(results_detail, DEFAULT_MATH_BENCH)
115
  # table = generate_table_detail(results_detail, DEFAULT_MATH_BENCH)
 
116
  type_map = check_box['type_map']
117
  type_map['Rank'] = 'number'
118
 
 
155
  wrap=True,
156
  visible=True)
157
 
158
+ def filter_df2(fields, algos, datasets, llms):
159
+ headers = ['Rank'] + fields
160
  df = table.copy()
161
 
162
  # 过滤数据
 
177
  # 确保排名为整数
178
  df['Rank'] = df['Rank'].astype(int)
179
 
180
+
181
+ original_columns = df.columns.tolist()
182
+ headers = sorted(headers, key=lambda x: original_columns.index(x))
183
  comp = gr.components.DataFrame(
184
  value=df[headers],
185
  type='pandas',
 
191
 
192
  # 为所有复选框组添加change事件
193
  checkbox_group.change(
194
+ fn=filter_df2,
195
  inputs=[checkbox_group, algo_name, dataset_name, llm_name],
196
  outputs=data_component
197
  )
198
 
199
  algo_name.change(
200
+ fn=filter_df2,
201
  inputs=[checkbox_group, algo_name, dataset_name, llm_name],
202
  outputs=data_component
203
  )
204
 
205
  dataset_name.change(
206
+ fn=filter_df2,
207
  inputs=[checkbox_group, algo_name, dataset_name, llm_name],
208
  outputs=data_component
209
  )
210
 
211
  llm_name.change(
212
+ fn=filter_df2,
213
  inputs=[checkbox_group, algo_name, dataset_name, llm_name],
214
  outputs=data_component
215
  )
gen_table.py CHANGED
@@ -33,15 +33,33 @@ def nth_large(val, vals):
33
  def BUILD_L1_DF(results, fields):
34
  check_box = {}
35
  check_box['essential'] = ['Algorithm', 'LLM', 'Eval Date']
36
- # 修改这里:确保列名与generate_table函数生成的列名一致
37
- check_box['required'] = ['Avg Score'] + [f for field in fields for f in [f'{field}-Score', f'{field}-Cost($)']]
38
- check_box['avg'] = ['Avg Score']
39
- check_box['all'] = check_box['avg'] + [f for field in fields for f in [f'{field}-Score', f'{field}-Cost($)']]
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
  type_map = defaultdict(lambda: 'number')
42
  type_map['Algorithm'] = 'html'
43
  type_map['LLM'] = type_map['Vision Model'] = 'html'
44
  type_map['Eval Date'] = 'str'
 
 
 
 
 
45
  check_box['type_map'] = type_map
46
 
47
  return check_box
@@ -115,58 +133,48 @@ def generate_table(results, fields):
115
  for k in META_FIELDS:
116
  res[k].append(meta[k])
117
  scores, costs = [], []
 
 
118
  for d in fields:
119
- if d in item.keys():
120
- res[d+"-Score"].append(item[d]["Score"])
121
- res[d+"-Cost($)"].append(item[d]["Cost($)"])
122
- scores.append(item[d]["Score"])
123
- costs.append(item[d]["Cost($)"])
 
 
 
 
124
  else:
125
- res[d+"-Score"].append(None)
126
- res[d+"-Cost($)"].append(None)
127
- scores.append(None)
128
- costs.append(None)
129
 
130
- res['Avg Score'].append(round(np.mean(scores), 2) if None not in scores else None)
 
131
 
132
  df = pd.DataFrame(res)
133
 
134
- # Sort by Avg Score and assign rank
135
  valid = df[~pd.isna(df['Avg Score'])].copy()
136
  missing = df[pd.isna(df['Avg Score'])].copy()
137
 
138
- # Assign rank to valid rows (using integer type)
139
  valid = valid.sort_values('Avg Score', ascending=False)
140
- valid['Rank'] = pd.Series(range(1, len(valid) + 1)[::-1], dtype=int)
141
 
142
- # Assign last rank to missing rows (using integer type)
143
  if not missing.empty:
144
- missing['Rank'] = pd.Series([len(valid) + 1] * len(missing), dtype=int)
145
 
146
- # Merge and sort by Rank
147
  df = pd.concat([valid, missing])
148
  df = df.sort_values('Rank')
149
 
150
- # Rearrange column order to ensure Rank is the first column
151
- columns = ['Rank', 'Algorithm', 'LLM', 'Eval Date', 'Avg Score'] # Fixed column order
152
  for d in fields:
153
- columns.extend([f"{d}-Score", f"{d}-Cost($)"]) # Add dataset-related columns
154
 
155
- # Ensure all columns exist and reorder
156
  existing_columns = [col for col in columns if col in df.columns]
157
- remaining_columns = [col for col in df.columns if col not in columns]
158
- df = df[existing_columns + remaining_columns] # Reorder columns
159
-
160
- # Sort by Score in descending order
161
- df = df.sort_values(['Avg Score'], ascending=[False])
162
 
163
- # Add rank for each dataset separately
164
- df['Rank'] = range(1, len(df) + 1)
165
-
166
- # Rearrange column order
167
- columns = ['Rank', 'Algorithm', 'LLM', 'Eval Date', 'Avg Score']
168
- remaining_columns = [col for col in df.columns if col not in columns]
169
- df = df[columns + remaining_columns]
170
  return df
171
 
172
 
 
33
  def BUILD_L1_DF(results, fields):
34
  check_box = {}
35
  check_box['essential'] = ['Algorithm', 'LLM', 'Eval Date']
36
+
37
+ # 首先检查实际的数据结构中有哪些列
38
+ sample_data = next(iter(results.values()))
39
+ available_fields = []
40
+ for field in fields:
41
+ if field in sample_data:
42
+ available_fields.append(field)
43
+
44
+ # 构建列名,确保与generate_table函数中的列名完全一致
45
+ score_columns = [f"{field}-Score" for field in available_fields]
46
+ cost_columns = [f"{field}-Cost($)" for field in available_fields]
47
+
48
+ combined_columns = score_columns + cost_columns
49
+ combined_columns_sorted = sorted(combined_columns, key=lambda x: x.split('-')[0])
50
+
51
+ check_box['required'] = ['Avg Score'] + combined_columns_sorted
52
+ check_box['all'] = ['Avg Score'] + combined_columns_sorted
53
 
54
  type_map = defaultdict(lambda: 'number')
55
  type_map['Algorithm'] = 'html'
56
  type_map['LLM'] = type_map['Vision Model'] = 'html'
57
  type_map['Eval Date'] = 'str'
58
+ type_map['Avg Score'] = 'number'
59
+ type_map['gsm8k-Score'] = 'number'
60
+ type_map['AQuA-Score'] = 'number'
61
+ type_map['gsm8k-Cost($)'] = 'number'
62
+ type_map['AQuA-Cost($)'] = 'number'
63
  check_box['type_map'] = type_map
64
 
65
  return check_box
 
133
  for k in META_FIELDS:
134
  res[k].append(meta[k])
135
  scores, costs = [], []
136
+
137
+ # 确保列名格式与BUILD_L1_DF中的一致
138
  for d in fields:
139
+ if d in item:
140
+ score = item[d].get("Score")
141
+ cost = item[d].get("Cost($)")
142
+ res[f"{d}-Score"].append(score)
143
+ res[f"{d}-Cost($)"].append(cost)
144
+ if score is not None:
145
+ scores.append(score)
146
+ if cost is not None:
147
+ costs.append(cost)
148
  else:
149
+ res[f"{d}-Score"].append(None)
150
+ res[f"{d}-Cost($)"].append(None)
 
 
151
 
152
+ # 计算平均分
153
+ res['Avg Score'].append(round(np.mean(scores), 2) if scores else None)
154
 
155
  df = pd.DataFrame(res)
156
 
157
+ # 排序和排名逻辑保持不变
158
  valid = df[~pd.isna(df['Avg Score'])].copy()
159
  missing = df[pd.isna(df['Avg Score'])].copy()
160
 
 
161
  valid = valid.sort_values('Avg Score', ascending=False)
162
+ valid['Rank'] = range(1, len(valid) + 1)
163
 
 
164
  if not missing.empty:
165
+ missing['Rank'] = len(valid) + 1
166
 
 
167
  df = pd.concat([valid, missing])
168
  df = df.sort_values('Rank')
169
 
170
+ # 重新排列列顺序
171
+ columns = ['Rank', 'Algorithm', 'LLM', 'Eval Date', 'Avg Score']
172
  for d in fields:
173
+ columns.extend([f"{d}-Score", f"{d}-Cost($)"])
174
 
 
175
  existing_columns = [col for col in columns if col in df.columns]
176
+ df = df[existing_columns]
 
 
 
 
177
 
 
 
 
 
 
 
 
178
  return df
179
 
180