yzabc007 commited on
Commit
c156aae
·
1 Parent(s): efb8c59

Update space

Browse files
Files changed (2) hide show
  1. app.py +14 -5
  2. src/populate.py +23 -9
app.py CHANGED
@@ -151,6 +151,15 @@ with demo:
151
  '</p>'
152
  )
153
  gr.HTML(INTRODUCTION_TEXT)
 
 
 
 
 
 
 
 
 
154
 
155
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
156
 
@@ -256,7 +265,7 @@ with demo:
256
  AutoEvalColumn.rank_math_geometry.name,
257
  AutoEvalColumn.rank_math_probability.name,
258
  ],
259
- rank_col=['sort_by_rank', 1, 4],
260
  )
261
  )
262
 
@@ -277,7 +286,7 @@ with demo:
277
  # AutoEvalColumn.rank_math_geometry.name,
278
  # AutoEvalColumn.rank_math_probability.name,
279
  ],
280
- rank_col=['sort_by_score', 1, 4],
281
  )
282
  )
283
 
@@ -389,7 +398,7 @@ with demo:
389
  AutoEvalColumn.rank_reason_logical.name,
390
  AutoEvalColumn.rank_reason_social.name,
391
  ],
392
- rank_col=['sort_by_rank', 1, 3],
393
  )
394
  )
395
 
@@ -406,7 +415,7 @@ with demo:
406
  AutoEvalColumn.score_reason_logical.name,
407
  AutoEvalColumn.score_reason_social.name,
408
  ],
409
- rank_col=['sort_by_score', 1, 3],
410
  )
411
  )
412
 
@@ -488,7 +497,7 @@ with demo:
488
 
489
  AutoEvalColumn.rank_chemistry.name,
490
  ],
491
- rank_col=['sort_by_rank', 4, 5],
492
  )
493
  )
494
 
 
151
  '</p>'
152
  )
153
  gr.HTML(INTRODUCTION_TEXT)
154
+
155
+ '''
156
+ TEXT = (
157
+ '<p style="font-size:{INTRODUCTION_TEXT_FONT_SIZE}px;">'
158
+ ''
159
+ '</p>'
160
+ )
161
+ gr.HTML(TEXT)
162
+ '''
163
 
164
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
165
 
 
265
  AutoEvalColumn.rank_math_geometry.name,
266
  AutoEvalColumn.rank_math_probability.name,
267
  ],
268
+ rank_col=['sort_by_rank', 1, 4, 'Math'],
269
  )
270
  )
271
 
 
286
  # AutoEvalColumn.rank_math_geometry.name,
287
  # AutoEvalColumn.rank_math_probability.name,
288
  ],
289
+ rank_col=['sort_by_score', 1, 4, 'Math'],
290
  )
291
  )
292
 
 
398
  AutoEvalColumn.rank_reason_logical.name,
399
  AutoEvalColumn.rank_reason_social.name,
400
  ],
401
+ rank_col=['sort_by_rank', 1, 3, 'Reasoning'],
402
  )
403
  )
404
 
 
415
  AutoEvalColumn.score_reason_logical.name,
416
  AutoEvalColumn.score_reason_social.name,
417
  ],
418
+ rank_col=['sort_by_score', 1, 3, 'Reasoning'],
419
  )
420
  )
421
 
 
497
 
498
  AutoEvalColumn.rank_chemistry.name,
499
  ],
500
+ rank_col=['sort_by_rank', 4, 5, 'Science'],
501
  )
502
  )
503
 
src/populate.py CHANGED
@@ -42,11 +42,17 @@ def get_model_leaderboard_df(results_path: str, requests_path: str="", cols: lis
42
  start_idx = rank_col[1]
43
  end_idx = rank_col[2]
44
  avg_scores = df.iloc[:, start_idx:end_idx].mean(axis=1)
45
- df.insert(1, "Average Score", avg_scores)
 
 
 
 
 
 
46
 
47
- df["Average Score"] = avg_scores.round(decimals=4)
48
- df = df.sort_values(by=["Average Score"], ascending=False)
49
- df["Average Score"] = df["Average Score"].map('{:.2f}'.format)
50
 
51
  # df = df.drop(columns=benchmark_cols[offset_idx:])
52
  # print(benchmark_cols)
@@ -68,11 +74,18 @@ def get_model_leaderboard_df(results_path: str, requests_path: str="", cols: lis
68
  start_idx = rank_col[1]
69
  end_idx = rank_col[2]
70
  avg_rank = df.iloc[:, start_idx:end_idx].mean(axis=1)
71
- df.insert(1, "Average Rank", avg_rank)
72
 
73
- df["Average Rank"] = avg_rank.round(decimals=4)
74
- df = df.sort_values(by=["Average Rank"], ascending=True)
75
- df["Average Rank"] = df["Average Rank"].map('{:.2f}'.format)
 
 
 
 
 
 
 
 
76
 
77
  # we'll skip NaN, instrad of deleting the whole row
78
  df = df.fillna('--')
@@ -80,7 +93,8 @@ def get_model_leaderboard_df(results_path: str, requests_path: str="", cols: lis
80
  rank = np.arange(1, len(df)+1)
81
  df.insert(0, 'Rank', rank)
82
 
83
- df.style.background_gradient(cmap='coolwarm', subset=benchmark_cols)
 
84
 
85
 
86
 
 
42
  start_idx = rank_col[1]
43
  end_idx = rank_col[2]
44
  avg_scores = df.iloc[:, start_idx:end_idx].mean(axis=1)
45
+ if len(rank_col) == 4:
46
+ avg_col_name = f"Overall ({rank_col[3]})"
47
+ else:
48
+ # avg_col_name = "Average Score"
49
+ avg_col_name = 'Overall'
50
+
51
+ df.insert(1, avg_col_name, avg_scores)
52
 
53
+ df[avg_col_name] = avg_scores.round(decimals=4)
54
+ df = df.sort_values(by=[avg_col_name], ascending=False)
55
+ df[avg_col_name] = df[avg_col_name].map('{:.2f}'.format)
56
 
57
  # df = df.drop(columns=benchmark_cols[offset_idx:])
58
  # print(benchmark_cols)
 
74
  start_idx = rank_col[1]
75
  end_idx = rank_col[2]
76
  avg_rank = df.iloc[:, start_idx:end_idx].mean(axis=1)
 
77
 
78
+ if len(rank_col) == 4:
79
+ avg_col_name = f"Overall ({rank_col[3]})"
80
+ else:
81
+ # avg_col_name = "Average Rank"
82
+ avg_col_name = 'Overall'
83
+
84
+ df.insert(1, avg_col_name, avg_rank)
85
+
86
+ df[avg_col_name] = avg_rank.round(decimals=4)
87
+ df = df.sort_values(by=[avg_col_name], ascending=True)
88
+ df[avg_col_name] = df[avg_col_name].map('{:.2f}'.format)
89
 
90
  # we'll skip NaN, instrad of deleting the whole row
91
  df = df.fillna('--')
 
93
  rank = np.arange(1, len(df)+1)
94
  df.insert(0, 'Rank', rank)
95
 
96
+ # print(benchmark_cols)
97
+ # df.style.background_gradient(cmap='coolwarm', subset=benchmark_cols)
98
 
99
 
100