Clémentine commited on
Commit
d350941
1 Parent(s): 35763fc

fix rounding

Browse files
Files changed (2) hide show
  1. app.py +3 -1
  2. src/auto_leaderboard/load_results.py +2 -4
app.py CHANGED
@@ -18,6 +18,8 @@ from src.assets.css_html_js import custom_css, get_window_url_params
18
  from src.utils_display import AutoEvalColumn, EvalQueueColumn, fields, styled_error, styled_warning, styled_message
19
  from src.init import get_all_requested_models, load_all_info_from_hub
20
 
 
 
21
  # clone / pull the lmeh eval data
22
  H4_TOKEN = os.environ.get("H4_TOKEN", None)
23
 
@@ -91,7 +93,7 @@ def get_leaderboard_df():
91
 
92
  df = pd.DataFrame.from_records(all_data)
93
  df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
94
- df = df[COLS]
95
 
96
  # filter out if any of the benchmarks have not been produced
97
  df = df[has_no_nan_values(df, BENCHMARK_COLS)]
 
18
  from src.utils_display import AutoEvalColumn, EvalQueueColumn, fields, styled_error, styled_warning, styled_message
19
  from src.init import get_all_requested_models, load_all_info_from_hub
20
 
21
+ pd.set_option('display.precision', 1)
22
+
23
  # clone / pull the lmeh eval data
24
  H4_TOKEN = os.environ.get("H4_TOKEN", None)
25
 
 
93
 
94
  df = pd.DataFrame.from_records(all_data)
95
  df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
96
+ df = df[COLS].round(decimals=2)
97
 
98
  # filter out if any of the benchmarks have not been produced
99
  df = df[has_no_nan_values(df, BENCHMARK_COLS)]
src/auto_leaderboard/load_results.py CHANGED
@@ -44,9 +44,7 @@ class EvalResult:
44
  data_dict[AutoEvalColumn.model.name] = make_clickable_model(base_model)
45
  data_dict[AutoEvalColumn.dummy.name] = base_model
46
  data_dict[AutoEvalColumn.revision.name] = self.revision
47
- data_dict[AutoEvalColumn.average.name] = round(
48
- sum([v for k, v in self.results.items()]) / 4.0, 1
49
- )
50
 
51
  for benchmark in BENCHMARKS:
52
  if benchmark not in self.results.keys():
@@ -95,7 +93,7 @@ def parse_eval_result(json_filepath: str) -> Tuple[str, list[dict]]:
95
  accs = np.array([v[metric] for k, v in data["results"].items() if benchmark in k])
96
  if accs.size == 0:
97
  continue
98
- mean_acc = round(np.mean(accs) * 100.0, 1)
99
  eval_results.append(EvalResult(
100
  eval_name=result_key, org=org, model=model, revision=model_sha, results={benchmark: mean_acc}, #todo model_type=, weight_type=
101
  ))
 
44
  data_dict[AutoEvalColumn.model.name] = make_clickable_model(base_model)
45
  data_dict[AutoEvalColumn.dummy.name] = base_model
46
  data_dict[AutoEvalColumn.revision.name] = self.revision
47
+ data_dict[AutoEvalColumn.average.name] = sum([v for k, v in self.results.items()]) / 4.0
 
 
48
 
49
  for benchmark in BENCHMARKS:
50
  if benchmark not in self.results.keys():
 
93
  accs = np.array([v[metric] for k, v in data["results"].items() if benchmark in k])
94
  if accs.size == 0:
95
  continue
96
+ mean_acc = np.mean(accs) * 100.0
97
  eval_results.append(EvalResult(
98
  eval_name=result_key, org=org, model=model, revision=model_sha, results={benchmark: mean_acc}, #todo model_type=, weight_type=
99
  ))