Spaces:
AIR-Bench
/
Running on CPU Upgrade

nan commited on
Commit
9400714
1 Parent(s): d491ab8

feat: add is_anonymous field

Browse files
Files changed (4) hide show
  1. app.py +2 -2
  2. src/display/utils.py +5 -0
  3. src/read_evals.py +6 -2
  4. src/utils.py +3 -3
app.py CHANGED
@@ -315,7 +315,7 @@ with demo:
315
  with gr.Row():
316
  file_output = gr.File()
317
  with gr.Row():
318
- submit_anonymous = gr.Checkbox(
319
  label="Nope. I want to submit anonymously 🥷",
320
  value=False,
321
  info="Do you want to shown on the leaderboard by default?")
@@ -336,7 +336,7 @@ with demo:
336
  model_name,
337
  model_url,
338
  benchmark_version,
339
- submit_anonymous
340
  ],
341
  submission_result,
342
  show_progress="hidden"
 
315
  with gr.Row():
316
  file_output = gr.File()
317
  with gr.Row():
318
+ is_anonymous = gr.Checkbox(
319
  label="Nope. I want to submit anonymously 🥷",
320
  value=False,
321
  info="Do you want to shown on the leaderboard by default?")
 
336
  model_name,
337
  model_url,
338
  benchmark_version,
339
+ is_anonymous
340
  ],
341
  submission_result,
342
  show_progress="hidden"
src/display/utils.py CHANGED
@@ -27,6 +27,7 @@ COL_NAME_RERANKING_MODEL_LINK = "Reranking Model LINK"
27
  COL_NAME_RANK = "Rank 🏆"
28
  COL_NAME_REVISION = "Revision"
29
  COL_NAME_TIMESTAMP = "Submission Date"
 
30
 
31
 
32
  def get_default_auto_eval_column_dict():
@@ -56,8 +57,12 @@ def get_default_auto_eval_column_dict():
56
  auto_eval_column_dict.append(
57
  ["reranking_model_link", ColumnContent, ColumnContent(COL_NAME_RERANKING_MODEL, "markdown", False, hidden=True, never_hidden=False)]
58
  )
 
 
 
59
  return auto_eval_column_dict
60
 
 
61
  def make_autoevalcolumn(cls_name="BenchmarksQA", benchmarks=BenchmarksQA):
62
  auto_eval_column_dict = get_default_auto_eval_column_dict()
63
  ## Leaderboard columns
 
27
  COL_NAME_RANK = "Rank 🏆"
28
  COL_NAME_REVISION = "Revision"
29
  COL_NAME_TIMESTAMP = "Submission Date"
30
+ COL_NAME_IS_ANONYMOUS = "Anonymous Submission"
31
 
32
 
33
  def get_default_auto_eval_column_dict():
 
57
  auto_eval_column_dict.append(
58
  ["reranking_model_link", ColumnContent, ColumnContent(COL_NAME_RERANKING_MODEL, "markdown", False, hidden=True, never_hidden=False)]
59
  )
60
+ auto_eval_column_dict.append(
61
+ ["is_anonymous", ColumnContent, ColumnContent(COL_NAME_IS_ANONYMOUS, "bool", False, hidden=True)]
62
+ )
63
  return auto_eval_column_dict
64
 
65
+
66
  def make_autoevalcolumn(cls_name="BenchmarksQA", benchmarks=BenchmarksQA):
67
  auto_eval_column_dict = get_default_auto_eval_column_dict()
68
  ## Leaderboard columns
src/read_evals.py CHANGED
@@ -40,6 +40,7 @@ class EvalResult:
40
  metric: str
41
  timestamp: str = "" # submission timestamp
42
  revision: str = ""
 
43
 
44
 
45
  @dataclass
@@ -55,6 +56,7 @@ class FullEvalResult:
55
  results: List[EvalResult] # results on all the EvalResults over different tasks and metrics.
56
  timestamp: str = ""
57
  revision: str = ""
 
58
 
59
  @classmethod
60
  def init_from_json_file(cls, json_filepath):
@@ -87,7 +89,8 @@ class FullEvalResult:
87
  task=config["task"],
88
  metric=config["metric"],
89
  timestamp=config.get("timestamp", "2024-05-12T12:24:02Z"),
90
- revision=config.get("revision", "3a2ba9dcad796a48a02ca1147557724e")
 
91
  )
92
  result_list.append(eval_result)
93
  return cls(
@@ -98,7 +101,8 @@ class FullEvalResult:
98
  reranking_model_link=reranking_model_link,
99
  results=result_list,
100
  timestamp=result_list[0].timestamp,
101
- revision=result_list[0].revision
 
102
  )
103
 
104
  def to_dict(self, task='qa', metric='ndcg_at_3') -> List:
 
40
  metric: str
41
  timestamp: str = "" # submission timestamp
42
  revision: str = ""
43
+ is_anonymous: bool = False
44
 
45
 
46
  @dataclass
 
56
  results: List[EvalResult] # results on all the EvalResults over different tasks and metrics.
57
  timestamp: str = ""
58
  revision: str = ""
59
+ is_anonymous: bool = False
60
 
61
  @classmethod
62
  def init_from_json_file(cls, json_filepath):
 
89
  task=config["task"],
90
  metric=config["metric"],
91
  timestamp=config.get("timestamp", "2024-05-12T12:24:02Z"),
92
+ revision=config.get("revision", "3a2ba9dcad796a48a02ca1147557724e"),
93
+ is_anonymous=config.get("is_anonymous", False)
94
  )
95
  result_list.append(eval_result)
96
  return cls(
 
101
  reranking_model_link=reranking_model_link,
102
  results=result_list,
103
  timestamp=result_list[0].timestamp,
104
+ revision=result_list[0].revision,
105
+ is_anonymous=result_list[0].is_anonymous
106
  )
107
 
108
  def to_dict(self, task='qa', metric='ndcg_at_3') -> List:
src/utils.py CHANGED
@@ -59,7 +59,7 @@ def get_default_cols(task: str, columns: list = [], add_fix_cols: bool = True) -
59
  for col_name, col_type in zip(cols_list, types_list):
60
  if col_name not in benchmark_list:
61
  continue
62
- if columns and col_name not in columns:
63
  continue
64
  cols.append(col_name)
65
  types.append(col_type)
@@ -178,7 +178,7 @@ def get_iso_format_timestamp():
178
  return iso_format_timestamp, filename_friendly_timestamp
179
 
180
 
181
- def submit_results(filepath: str, model: str, model_url: str, version: str = "AIR-Bench_24.04", anonymous=False):
182
  if not filepath.endswith(".zip"):
183
  return styled_error(f"file uploading aborted. wrong file type: {filepath}")
184
 
@@ -218,7 +218,7 @@ def submit_results(filepath: str, model: str, model_url: str, version: str = "AI
218
  "model_name": f"{model}",
219
  "model_url": f"{model_url}",
220
  "version": f"{version}",
221
- "anonymous": f"{anonymous}",
222
  "revision": f"{revision}",
223
  "timestamp": f"{timestamp_config}"
224
  }
 
59
  for col_name, col_type in zip(cols_list, types_list):
60
  if col_name not in benchmark_list:
61
  continue
62
+ if len(columns) > 0 and col_name not in columns:
63
  continue
64
  cols.append(col_name)
65
  types.append(col_type)
 
178
  return iso_format_timestamp, filename_friendly_timestamp
179
 
180
 
181
+ def submit_results(filepath: str, model: str, model_url: str, version: str = "AIR-Bench_24.04", is_anonymous=False):
182
  if not filepath.endswith(".zip"):
183
  return styled_error(f"file uploading aborted. wrong file type: {filepath}")
184
 
 
218
  "model_name": f"{model}",
219
  "model_url": f"{model_url}",
220
  "version": f"{version}",
221
+ "is_anonymous": f"{is_anonymous}",
222
  "revision": f"{revision}",
223
  "timestamp": f"{timestamp_config}"
224
  }