yzabc007 commited on
Commit
2f5cc84
1 Parent(s): 841a40d

Update space

Browse files
Files changed (2) hide show
  1. app.py +18 -1
  2. src/populate.py +9 -4
app.py CHANGED
@@ -156,6 +156,7 @@ with demo:
156
  AutoEvalColumn.rank_math_probability.name,
157
  AutoEvalColumn.rank_reason_logical.name,
158
  AutoEvalColumn.rank_reason_social.name,
 
159
  ],
160
  rank_col=[],
161
  )
@@ -313,7 +314,8 @@ with demo:
313
 
314
  with gr.TabItem("🔬 Science", elem_id="science-table", id=4):
315
  CURRENT_TEXT = """
316
- We are working on adding more tasks on scientific domains to the leaderboard. The forthcoming ones are biology, chemistry, and physics.
 
317
  We have diversely and aggressively collected recent science datasets, including but not limited to
318
  [GPQA](https://arxiv.org/abs/2311.12022),
319
  [JEEBench](https://aclanthology.org/2023.emnlp-main.468/),
@@ -323,6 +325,7 @@ with demo:
323
  [SciEval](https://arxiv.org/abs/2308.13149).
324
  """
325
  gr.Markdown(CURRENT_TEXT, elem_classes="markdown-text")
 
326
  with gr.TabItem("🧪 Chemistry", elem_id="chemistry_subtab", id=0, elem_classes="subtab"):
327
  leaderboard = overall_leaderboard(
328
  get_model_leaderboard_df(
@@ -340,6 +343,20 @@ with demo:
340
  )
341
  )
342
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
343
  with gr.TabItem("</> Coding", elem_id="coding-table", id=5):
344
  CURRENT_TEXT = """
345
  # Coming soon!
 
156
  AutoEvalColumn.rank_math_probability.name,
157
  AutoEvalColumn.rank_reason_logical.name,
158
  AutoEvalColumn.rank_reason_social.name,
159
+ # AutoEvalColumn.rank_chemistry.name,
160
  ],
161
  rank_col=[],
162
  )
 
314
 
315
  with gr.TabItem("🔬 Science", elem_id="science-table", id=4):
316
  CURRENT_TEXT = """
317
+ Sicnece domain is a critical area for evaluating LLMs.
318
+ We are working on adding several tasks on scientific domains to the leaderboard. The forthcoming ones are biology, chemistry, and physics.
319
  We have diversely and aggressively collected recent science datasets, including but not limited to
320
  [GPQA](https://arxiv.org/abs/2311.12022),
321
  [JEEBench](https://aclanthology.org/2023.emnlp-main.468/),
 
325
  [SciEval](https://arxiv.org/abs/2308.13149).
326
  """
327
  gr.Markdown(CURRENT_TEXT, elem_classes="markdown-text")
328
+
329
  with gr.TabItem("🧪 Chemistry", elem_id="chemistry_subtab", id=0, elem_classes="subtab"):
330
  leaderboard = overall_leaderboard(
331
  get_model_leaderboard_df(
 
343
  )
344
  )
345
 
346
+ with gr.TabItem("⚛️ Physics", elem_id="physics_subtab", id=1, elem_classes="subtab"):
347
+ CURRENT_TEXT = """
348
+ # Coming soon!
349
+ """
350
+ gr.Markdown(CURRENT_TEXT, elem_classes="markdown-text")
351
+
352
+
353
+ with gr.TabItem("🧬 Biology", elem_id="biology_subtab", id=2, elem_classes="subtab"):
354
+ CURRENT_TEXT = """
355
+ # Coming soon!
356
+ """
357
+ gr.Markdown(CURRENT_TEXT, elem_classes="markdown-text")
358
+
359
+
360
  with gr.TabItem("</> Coding", elem_id="coding-table", id=5):
361
  CURRENT_TEXT = """
362
  # Coming soon!
src/populate.py CHANGED
@@ -25,11 +25,16 @@ def get_model_leaderboard_df(results_path: str, requests_path: str="", cols: lis
25
  df = df.dropna(subset=benchmark_cols)
26
  df = df.sort_values(by=[rank_col[0]], ascending=True)
27
  # print(rank_col)
28
- else: # when rank_col is empty, sort by averaging all the benchmarks, except the first one
29
- avg_rank = df.iloc[:, 1:].mean(axis=1) # we'll skip NaN, instrad of deleting the whole row
30
- df["Average Rank"] = avg_rank
31
- df = df.sort_values(by=["Average Rank"], ascending=True)
 
 
 
 
32
  df = df.fillna('--')
 
33
  rank = np.arange(1, len(df)+1)
34
  df.insert(0, 'Rank', rank)
35
 
 
25
  df = df.dropna(subset=benchmark_cols)
26
  df = df.sort_values(by=[rank_col[0]], ascending=True)
27
  # print(rank_col)
28
+ else:
29
+ # when rank_col, the first in benchmark_cols is empty, sort by averaging all the benchmarks, except the first one
30
+ avg_rank = df.iloc[:, 1:].mean(axis=1)
31
+ df["Average Rank"] = avg_rank.round(decimals=4)
32
+ df = df.sort_values(by=["Average Rank"], ascending=True)
33
+ df["Average Rank"] = df["Average Rank"].map('{:.4f}'.format)
34
+
35
+ # we'll skip NaN, instrad of deleting the whole row
36
  df = df.fillna('--')
37
+ # insert a rank column
38
  rank = np.arange(1, len(df)+1)
39
  df.insert(0, 'Rank', rank)
40