Spaces:
AIR-Bench
/
Running on CPU Upgrade

nan commited on
Commit
77ded94
·
verified ·
1 Parent(s): 3bab3e9

feat-add-toggle-button-for-revision-col-0514 (#3)

Browse files

- feat: add the toggle checkbox (ea7f641055b8b1920e6b13455ec5d3f48cef3a64)
- feat: add toggle checkbox for qa (fff686a88e3ddf8a1c85a594b174dfa51b97725f)
- feat: add toggle checkbox for long-doc (4ab9cec62b148e8ef70ac23e8205477d47d34a14)

Files changed (2) hide show
  1. app.py +60 -33
  2. src/utils.py +19 -10
app.py CHANGED
@@ -11,7 +11,7 @@ from src.about import (
11
  from src.benchmarks import DOMAIN_COLS_QA, LANG_COLS_QA, DOMAIN_COLS_LONG_DOC, LANG_COLS_LONG_DOC, METRIC_LIST, \
12
  DEFAULT_METRIC
13
  from src.display.css_html_js import custom_css
14
- from src.display.utils import COL_NAME_IS_ANONYMOUS
15
  from src.envs import API, EVAL_RESULTS_PATH, REPO_ID, RESULTS_REPO, TOKEN
16
  from src.read_evals import get_raw_eval_results, get_leaderboard_df
17
  from src.utils import update_table, update_metric, update_table_long_doc, upload_file, get_default_cols, submit_results
@@ -45,11 +45,13 @@ leaderboard_df_qa = original_df_qa.copy()
45
  shown_columns_qa, types_qa = get_default_cols(
46
  'qa', leaderboard_df_qa.columns, add_fix_cols=True)
47
  leaderboard_df_qa = leaderboard_df_qa[~leaderboard_df_qa[COL_NAME_IS_ANONYMOUS]][shown_columns_qa]
 
48
 
49
  leaderboard_df_long_doc = original_df_long_doc.copy()
50
  shown_columns_long_doc, types_long_doc = get_default_cols(
51
  'long-doc', leaderboard_df_long_doc.columns, add_fix_cols=True)
52
  leaderboard_df_long_doc = leaderboard_df_long_doc[~leaderboard_df_long_doc[COL_NAME_IS_ANONYMOUS]][shown_columns_long_doc]
 
53
 
54
 
55
  def update_metric_qa(
@@ -58,9 +60,10 @@ def update_metric_qa(
58
  langs: list,
59
  reranking_model: list,
60
  query: str,
61
- show_anonymous: bool
 
62
  ):
63
- return update_metric(raw_data, 'qa', metric, domains, langs, reranking_model, query, show_anonymous)
64
 
65
 
66
  def update_metric_long_doc(
@@ -69,9 +72,10 @@ def update_metric_long_doc(
69
  langs: list,
70
  reranking_model: list,
71
  query: str,
72
- show_anonymous: bool
 
73
  ):
74
- return update_metric(raw_data, "long-doc", metric, domains, langs, reranking_model, query, show_anonymous)
75
 
76
 
77
  def update_table_without_ranking(
@@ -80,9 +84,10 @@ def update_table_without_ranking(
80
  langs,
81
  reranking_query,
82
  query,
83
- show_anonymous
 
84
  ):
85
- return update_table(hidden_df, domains, langs, reranking_query, query, show_anonymous, reset_ranking=False)
86
 
87
 
88
  def update_table_without_ranking_long_doc(
@@ -91,9 +96,10 @@ def update_table_without_ranking_long_doc(
91
  langs,
92
  reranking_query,
93
  query,
94
- show_anonymous
 
95
  ):
96
- return update_table_long_doc(hidden_df, domains, langs, reranking_query, query, show_anonymous, reset_ranking=False)
97
 
98
 
99
  demo = gr.Blocks(css=custom_css)
@@ -157,6 +163,12 @@ with demo:
157
  value=False,
158
  info="The anonymous submissions might have invalid model information."
159
  )
 
 
 
 
 
 
160
 
161
  leaderboard_table = gr.components.Dataframe(
162
  value=leaderboard_df_qa,
@@ -187,18 +199,21 @@ with demo:
187
  leaderboard_table,
188
  )
189
 
190
- selected_rerankings.change(
191
- update_table_without_ranking,
192
- [
193
- hidden_leaderboard_table_for_search,
194
- selected_domains,
195
- selected_langs,
196
- selected_rerankings,
197
- search_bar,
198
- show_anonymous,
199
- ],
200
- leaderboard_table,
201
- )
 
 
 
202
 
203
  # Set column-wise listener
204
  for selector in [
@@ -288,6 +303,12 @@ with demo:
288
  value=False,
289
  info="The anonymous submissions might have invalid model information."
290
  )
 
 
 
 
 
 
291
 
292
  leaderboard_table_long_doc = gr.components.Dataframe(
293
  value=leaderboard_df_long_doc,
@@ -314,22 +335,26 @@ with demo:
314
  selected_rerankings,
315
  search_bar,
316
  show_anonymous,
 
317
  ],
318
  leaderboard_table_long_doc,
319
  )
320
 
321
- selected_rerankings.change(
322
- update_table_without_ranking_long_doc,
323
- [
324
- hidden_leaderboard_table_for_search,
325
- selected_domains,
326
- selected_langs,
327
- selected_rerankings,
328
- search_bar,
329
- show_anonymous,
330
- ],
331
- leaderboard_table_long_doc,
332
- )
 
 
 
333
 
334
  # Set column-wise listener
335
  for selector in [
@@ -344,6 +369,7 @@ with demo:
344
  selected_rerankings,
345
  search_bar,
346
  show_anonymous,
 
347
  ],
348
  leaderboard_table_long_doc,
349
  queue=True,
@@ -359,6 +385,7 @@ with demo:
359
  selected_rerankings,
360
  search_bar,
361
  show_anonymous,
 
362
  ],
363
  leaderboard_table_long_doc,
364
  queue=True
 
11
  from src.benchmarks import DOMAIN_COLS_QA, LANG_COLS_QA, DOMAIN_COLS_LONG_DOC, LANG_COLS_LONG_DOC, METRIC_LIST, \
12
  DEFAULT_METRIC
13
  from src.display.css_html_js import custom_css
14
+ from src.display.utils import COL_NAME_IS_ANONYMOUS, COL_NAME_REVISION, COL_NAME_TIMESTAMP
15
  from src.envs import API, EVAL_RESULTS_PATH, REPO_ID, RESULTS_REPO, TOKEN
16
  from src.read_evals import get_raw_eval_results, get_leaderboard_df
17
  from src.utils import update_table, update_metric, update_table_long_doc, upload_file, get_default_cols, submit_results
 
45
  shown_columns_qa, types_qa = get_default_cols(
46
  'qa', leaderboard_df_qa.columns, add_fix_cols=True)
47
  leaderboard_df_qa = leaderboard_df_qa[~leaderboard_df_qa[COL_NAME_IS_ANONYMOUS]][shown_columns_qa]
48
+ leaderboard_df_qa.drop([COL_NAME_REVISION, COL_NAME_TIMESTAMP], axis=1, inplace=True)
49
 
50
  leaderboard_df_long_doc = original_df_long_doc.copy()
51
  shown_columns_long_doc, types_long_doc = get_default_cols(
52
  'long-doc', leaderboard_df_long_doc.columns, add_fix_cols=True)
53
  leaderboard_df_long_doc = leaderboard_df_long_doc[~leaderboard_df_long_doc[COL_NAME_IS_ANONYMOUS]][shown_columns_long_doc]
54
+ leaderboard_df_long_doc.drop([COL_NAME_REVISION, COL_NAME_TIMESTAMP], axis=1, inplace=True)
55
 
56
 
57
  def update_metric_qa(
 
60
  langs: list,
61
  reranking_model: list,
62
  query: str,
63
+ show_anonymous: bool,
64
+ show_revision_and_timestamp,
65
  ):
66
+ return update_metric(raw_data, 'qa', metric, domains, langs, reranking_model, query, show_anonymous, show_revision_and_timestamp)
67
 
68
 
69
  def update_metric_long_doc(
 
72
  langs: list,
73
  reranking_model: list,
74
  query: str,
75
+ show_anonymous: bool,
76
+ show_revision_and_timestamp,
77
  ):
78
+ return update_metric(raw_data, "long-doc", metric, domains, langs, reranking_model, query, show_anonymous, show_revision_and_timestamp)
79
 
80
 
81
  def update_table_without_ranking(
 
84
  langs,
85
  reranking_query,
86
  query,
87
+ show_anonymous,
88
+ show_revision_and_timestamp,
89
  ):
90
+ return update_table(hidden_df, domains, langs, reranking_query, query, show_anonymous, reset_ranking=False, show_revision_and_timestamp=show_revision_and_timestamp)
91
 
92
 
93
  def update_table_without_ranking_long_doc(
 
96
  langs,
97
  reranking_query,
98
  query,
99
+ show_anonymous,
100
+ show_revision_and_timestamp,
101
  ):
102
+ return update_table_long_doc(hidden_df, domains, langs, reranking_query, query, show_anonymous, reset_ranking=False, show_revision_and_timestamp=show_revision_and_timestamp)
103
 
104
 
105
  demo = gr.Blocks(css=custom_css)
 
163
  value=False,
164
  info="The anonymous submissions might have invalid model information."
165
  )
166
+ with gr.Row():
167
+ show_revision_and_timestamp = gr.Checkbox(
168
+ label="Show submission details",
169
+ value=False,
170
+ info="Show the revision and timestamp information of submissions"
171
+ )
172
 
173
  leaderboard_table = gr.components.Dataframe(
174
  value=leaderboard_df_qa,
 
199
  leaderboard_table,
200
  )
201
 
202
+ for selector in [show_revision_and_timestamp, selected_rerankings]:
203
+ selector.change(
204
+ update_table_without_ranking,
205
+ [
206
+ hidden_leaderboard_table_for_search,
207
+ selected_domains,
208
+ selected_langs,
209
+ selected_rerankings,
210
+ search_bar,
211
+ show_anonymous,
212
+ show_revision_and_timestamp
213
+ ],
214
+ leaderboard_table,
215
+ queue=True
216
+ )
217
 
218
  # Set column-wise listener
219
  for selector in [
 
303
  value=False,
304
  info="The anonymous submissions might have invalid model information."
305
  )
306
+ with gr.Row():
307
+ show_revision_and_timestamp = gr.Checkbox(
308
+ label="Show submission details",
309
+ value=False,
310
+ info="Show the revision and timestamp information of submissions"
311
+ )
312
 
313
  leaderboard_table_long_doc = gr.components.Dataframe(
314
  value=leaderboard_df_long_doc,
 
335
  selected_rerankings,
336
  search_bar,
337
  show_anonymous,
338
+ show_revision_and_timestamp
339
  ],
340
  leaderboard_table_long_doc,
341
  )
342
 
343
+ for selector in [show_revision_and_timestamp, selected_rerankings]:
344
+ selector.change(
345
+ update_table_without_ranking_long_doc,
346
+ [
347
+ hidden_leaderboard_table_for_search,
348
+ selected_domains,
349
+ selected_langs,
350
+ selected_rerankings,
351
+ search_bar,
352
+ show_anonymous,
353
+ show_revision_and_timestamp
354
+ ],
355
+ leaderboard_table_long_doc,
356
+ queue=True,
357
+ )
358
 
359
  # Set column-wise listener
360
  for selector in [
 
369
  selected_rerankings,
370
  search_bar,
371
  show_anonymous,
372
+ show_revision_and_timestamp
373
  ],
374
  leaderboard_table_long_doc,
375
  queue=True,
 
385
  selected_rerankings,
386
  search_bar,
387
  show_anonymous,
388
+ show_revision_and_timestamp
389
  ],
390
  leaderboard_table_long_doc,
391
  queue=True
src/utils.py CHANGED
@@ -9,7 +9,7 @@ import pandas as pd
9
  from src.benchmarks import BENCHMARK_COLS_QA, BENCHMARK_COLS_LONG_DOC, BenchmarksQA, BenchmarksLongDoc
10
  from src.display.formatting import styled_message, styled_error
11
  from src.display.utils import COLS_QA, TYPES_QA, COLS_LONG_DOC, TYPES_LONG_DOC, COL_NAME_RANK, COL_NAME_AVG, \
12
- COL_NAME_RERANKING_MODEL, COL_NAME_RETRIEVAL_MODEL, COL_NAME_IS_ANONYMOUS, get_default_auto_eval_column_dict
13
  from src.envs import API, SEARCH_RESULTS_REPO
14
  from src.read_evals import FullEvalResult, get_leaderboard_df, calculate_mean
15
 
@@ -130,14 +130,18 @@ def _update_table(
130
  reranking_query: list,
131
  query: str,
132
  show_anonymous: bool,
133
- reset_ranking: bool = True
 
134
  ):
135
  filtered_df = hidden_df.copy()
136
  if not show_anonymous:
137
  filtered_df = filtered_df[~filtered_df[COL_NAME_IS_ANONYMOUS]]
138
  filtered_df = filter_models(filtered_df, reranking_query)
139
  filtered_df = filter_queries(query, filtered_df)
140
- return select_columns(filtered_df, domains, langs, task, reset_ranking)
 
 
 
141
 
142
 
143
  def update_table(
@@ -147,10 +151,11 @@ def update_table(
147
  reranking_query: list,
148
  query: str,
149
  show_anonymous: bool,
150
- reset_ranking: bool = True
 
151
  ):
152
  return _update_table(
153
- "qa", hidden_df, domains, langs, reranking_query, query, show_anonymous, reset_ranking)
154
 
155
 
156
  def update_table_long_doc(
@@ -160,10 +165,11 @@ def update_table_long_doc(
160
  reranking_query: list,
161
  query: str,
162
  show_anonymous: bool,
163
- reset_ranking: bool = True
 
164
  ):
165
  return _update_table(
166
- "long-doc", hidden_df, domains, langs, reranking_query, query, show_anonymous, reset_ranking)
167
 
168
 
169
  def update_metric(
@@ -174,7 +180,8 @@ def update_metric(
174
  langs: list,
175
  reranking_model: list,
176
  query: str,
177
- show_anonymous: bool = False
 
178
  ) -> pd.DataFrame:
179
  if task == 'qa':
180
  leaderboard_df = get_leaderboard_df(raw_data, task=task, metric=metric)
@@ -184,7 +191,8 @@ def update_metric(
184
  langs,
185
  reranking_model,
186
  query,
187
- show_anonymous
 
188
  )
189
  elif task == "long-doc":
190
  leaderboard_df = get_leaderboard_df(raw_data, task=task, metric=metric)
@@ -194,7 +202,8 @@ def update_metric(
194
  langs,
195
  reranking_model,
196
  query,
197
- show_anonymous
 
198
  )
199
 
200
 
 
9
  from src.benchmarks import BENCHMARK_COLS_QA, BENCHMARK_COLS_LONG_DOC, BenchmarksQA, BenchmarksLongDoc
10
  from src.display.formatting import styled_message, styled_error
11
  from src.display.utils import COLS_QA, TYPES_QA, COLS_LONG_DOC, TYPES_LONG_DOC, COL_NAME_RANK, COL_NAME_AVG, \
12
+ COL_NAME_RERANKING_MODEL, COL_NAME_RETRIEVAL_MODEL, COL_NAME_IS_ANONYMOUS, COL_NAME_TIMESTAMP, COL_NAME_REVISION, get_default_auto_eval_column_dict
13
  from src.envs import API, SEARCH_RESULTS_REPO
14
  from src.read_evals import FullEvalResult, get_leaderboard_df, calculate_mean
15
 
 
130
  reranking_query: list,
131
  query: str,
132
  show_anonymous: bool,
133
+ reset_ranking: bool = True,
134
+ show_revision_and_timestamp: bool = False
135
  ):
136
  filtered_df = hidden_df.copy()
137
  if not show_anonymous:
138
  filtered_df = filtered_df[~filtered_df[COL_NAME_IS_ANONYMOUS]]
139
  filtered_df = filter_models(filtered_df, reranking_query)
140
  filtered_df = filter_queries(query, filtered_df)
141
+ filtered_df = select_columns(filtered_df, domains, langs, task, reset_ranking)
142
+ if not show_revision_and_timestamp:
143
+ filtered_df.drop([COL_NAME_REVISION, COL_NAME_TIMESTAMP], axis=1, inplace=True)
144
+ return filtered_df
145
 
146
 
147
  def update_table(
 
151
  reranking_query: list,
152
  query: str,
153
  show_anonymous: bool,
154
+ reset_ranking: bool = True,
155
+ show_revision_and_timestamp: bool = False
156
  ):
157
  return _update_table(
158
+ "qa", hidden_df, domains, langs, reranking_query, query, show_anonymous, reset_ranking, show_revision_and_timestamp)
159
 
160
 
161
  def update_table_long_doc(
 
165
  reranking_query: list,
166
  query: str,
167
  show_anonymous: bool,
168
+ reset_ranking: bool = True,
169
+ show_revision_and_timestamp: bool = False
170
  ):
171
  return _update_table(
172
+ "long-doc", hidden_df, domains, langs, reranking_query, query, show_anonymous, reset_ranking, show_revision_and_timestamp)
173
 
174
 
175
  def update_metric(
 
180
  langs: list,
181
  reranking_model: list,
182
  query: str,
183
+ show_anonymous: bool = False,
184
+ show_revision_and_timestamp: bool = False,
185
  ) -> pd.DataFrame:
186
  if task == 'qa':
187
  leaderboard_df = get_leaderboard_df(raw_data, task=task, metric=metric)
 
191
  langs,
192
  reranking_model,
193
  query,
194
+ show_anonymous,
195
+ show_revision_and_timestamp
196
  )
197
  elif task == "long-doc":
198
  leaderboard_df = get_leaderboard_df(raw_data, task=task, metric=metric)
 
202
  langs,
203
  reranking_model,
204
  query,
205
+ show_anonymous,
206
+ show_revision_and_timestamp
207
  )
208
 
209