albertvillanova HF staff commited on
Commit
ddc25db
1 Parent(s): a0691fa

Add task selector

Browse files
Files changed (1) hide show
  1. app.py +45 -12
app.py CHANGED
@@ -33,6 +33,16 @@ DEFAULT_HTML_TABLE = """
33
  </table>
34
  """
35
 
 
 
 
 
 
 
 
 
 
 
36
 
37
  fs = HfFileSystem()
38
 
@@ -113,6 +123,11 @@ def concat_result_1(result_1, results):
113
  pd.concat([result_1, results.iloc[:, [0, 2]].set_index("Parameters")], axis=1)
114
  .reset_index()
115
  )
 
 
 
 
 
116
  return (
117
  df.style
118
  .format(na_rep="")
@@ -127,22 +142,28 @@ def concat_result_2(result_2, results):
127
  pd.concat([results.iloc[:, [0, 1]].set_index("Parameters"), result_2], axis=1)
128
  .reset_index()
129
  )
130
- return (
131
- df.style
132
- .format(na_rep="")
133
- .hide(axis="index")
134
- .to_html()
135
- )
136
 
137
 
138
- def render_result_1(model_id, *results):
139
  result = load_result(model_id)
140
- return [concat_result_1(*result_args) for result_args in zip(result, results)]
 
 
 
141
 
142
 
143
- def render_result_2(model_id, *results):
144
  result = load_result(model_id)
145
- return [concat_result_2(*result_args) for result_args in zip(result, results)]
 
 
 
 
 
 
 
 
146
 
147
 
148
  # if __name__ == "__main__":
@@ -159,6 +180,13 @@ with gr.Blocks(fill_height=True) as demo:
159
  with gr.Column():
160
  model_id_2 = gr.Dropdown(choices=list(latest_result_path_per_model.keys()), label="Results")
161
  load_btn_2 = gr.Button("Load")
 
 
 
 
 
 
 
162
 
163
  results = []
164
  with gr.Row():
@@ -192,12 +220,17 @@ with gr.Blocks(fill_height=True) as demo:
192
 
193
  load_btn_1.click(
194
  fn=render_result_1,
195
- inputs=[model_id_1, *results],
196
  outputs=[*results],
197
  )
198
  load_btn_2.click(
199
  fn=render_result_2,
200
- inputs=[model_id_2, *results],
 
 
 
 
 
201
  outputs=[*results],
202
  )
203
 
 
33
  </table>
34
  """
35
 
36
+ TASKS = {
37
+ "leaderboard_arc_challenge": ("ARC", "leaderboard_arc_challenge"),
38
+ "leaderboard_bbh": ("BBH", "leaderboard_bbh"),
39
+ "leaderboard_gpqa": ("GPQA", "leaderboard_gpqa"),
40
+ "leaderboard_ifeval": ("IFEval", "leaderboard_ifeval"),
41
+ "leaderboard_math_hard": ("MATH", "leaderboard_math"),
42
+ "leaderboard_mmlu": ("MMLU", "leaderboard_mmlu"),
43
+ "leaderboard_mmlu_pro": ("MMLU-Pro", "leaderboard_mmlu_pro"),
44
+ "leaderboard_musr": ("MuSR", "leaderboard_musr"),
45
+ }
46
 
47
  fs = HfFileSystem()
48
 
 
123
  pd.concat([result_1, results.iloc[:, [0, 2]].set_index("Parameters")], axis=1)
124
  .reset_index()
125
  )
126
+ return df
127
+
128
+
129
+ def display_dataframe(df):
130
+ # style = Styler(df, uuid_len=0, cell_ids=False)
131
  return (
132
  df.style
133
  .format(na_rep="")
 
142
  pd.concat([results.iloc[:, [0, 1]].set_index("Parameters"), result_2], axis=1)
143
  .reset_index()
144
  )
145
+ return df
 
 
 
 
 
146
 
147
 
148
+ def render_result_1(model_id, task, *results):
149
  result = load_result(model_id)
150
+ concat_results = [concat_result_1(*result_args) for result_args in zip(result, results)]
151
+ if task:
152
+ concat_results = [df[df["Parameters"].str.startswith(task[len("leaderboard_"):])] for df in concat_results]
153
+ return [display_dataframe(df) for df in concat_results]
154
 
155
 
156
+ def render_result_2(model_id, task, *results):
157
  result = load_result(model_id)
158
+ concat_results = [concat_result_2(*result_args) for result_args in zip(result, results)]
159
+ if task:
160
+ concat_results = [df[df["Parameters"].str.startswith(task[len("leaderboard_"):])] for df in concat_results]
161
+ return [display_dataframe(df) for df in concat_results]
162
+
163
+
164
+ def render_results(model_id_1, model_id_2, task, *results):
165
+ results = render_result_1(model_id_1, task, *results)
166
+ return render_result_2(model_id_2, task, *results)
167
 
168
 
169
  # if __name__ == "__main__":
 
180
  with gr.Column():
181
  model_id_2 = gr.Dropdown(choices=list(latest_result_path_per_model.keys()), label="Results")
182
  load_btn_2 = gr.Button("Load")
183
+ with gr.Row():
184
+ task = gr.Radio(
185
+ ["All"] + list(TASKS.values()),
186
+ label="Tasks",
187
+ info="Evaluation tasks to be displayed",
188
+ value="All",
189
+ )
190
 
191
  results = []
192
  with gr.Row():
 
220
 
221
  load_btn_1.click(
222
  fn=render_result_1,
223
+ inputs=[model_id_1, task, *results],
224
  outputs=[*results],
225
  )
226
  load_btn_2.click(
227
  fn=render_result_2,
228
+ inputs=[model_id_2, task, *results],
229
+ outputs=[*results],
230
+ )
231
+ task.change(
232
+ fn=render_results,
233
+ inputs=[model_id_1, model_id_2, task, *results],
234
  outputs=[*results],
235
  )
236