pminervini commited on
Commit
4a18d8a
1 Parent(s): 9fbeaa1
Files changed (1) hide show
  1. src/leaderboard/read_evals.py +8 -0
src/leaderboard/read_evals.py CHANGED
@@ -89,10 +89,18 @@ class EvalResult:
89
 
90
  def post_process_results(results: dict) -> dict:
91
  res_copy = results.copy()
 
 
 
 
 
 
 
92
  for k, v in res_copy.items():
93
  if "," in k:
94
  tokens = k.split(",")
95
  results[tokens[0]] = v
 
96
  return results
97
 
98
  accs = np.array([v.get(task.metric, None) for k, v in post_process_results(data["results"]).items() if task.benchmark in k])
 
89
 
90
  def post_process_results(results: dict) -> dict:
91
  res_copy = results.copy()
92
+
93
+ for k, v in res_copy.items():
94
+ if "exact_match" in k:
95
+ results[k.replace("exact_match", "em")] = v
96
+
97
+ res_copy = results.copy()
98
+
99
  for k, v in res_copy.items():
100
  if "," in k:
101
  tokens = k.split(",")
102
  results[tokens[0]] = v
103
+
104
  return results
105
 
106
  accs = np.array([v.get(task.metric, None) for k, v in post_process_results(data["results"]).items() if task.benchmark in k])