Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
pminervini
commited on
Commit
•
4a18d8a
1
Parent(s):
9fbeaa1
update
Browse files
src/leaderboard/read_evals.py
CHANGED
@@ -89,10 +89,18 @@ class EvalResult:
|
|
89 |
|
90 |
def post_process_results(results: dict) -> dict:
|
91 |
res_copy = results.copy()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
for k, v in res_copy.items():
|
93 |
if "," in k:
|
94 |
tokens = k.split(",")
|
95 |
results[tokens[0]] = v
|
|
|
96 |
return results
|
97 |
|
98 |
accs = np.array([v.get(task.metric, None) for k, v in post_process_results(data["results"]).items() if task.benchmark in k])
|
|
|
89 |
|
90 |
def post_process_results(results: dict) -> dict:
|
91 |
res_copy = results.copy()
|
92 |
+
|
93 |
+
for k, v in res_copy.items():
|
94 |
+
if "exact_match" in k:
|
95 |
+
results[k.replace("exact_match", "em")] = v
|
96 |
+
|
97 |
+
res_copy = results.copy()
|
98 |
+
|
99 |
for k, v in res_copy.items():
|
100 |
if "," in k:
|
101 |
tokens = k.split(",")
|
102 |
results[tokens[0]] = v
|
103 |
+
|
104 |
return results
|
105 |
|
106 |
accs = np.array([v.get(task.metric, None) for k, v in post_process_results(data["results"]).items() if task.benchmark in k])
|