yentinglin
commited on
Commit
•
be645c2
1
Parent(s):
9fc574b
add
Browse files
src/leaderboard/read_evals.py
CHANGED
@@ -44,7 +44,7 @@ class EvalResult:
|
|
44 |
precision = Precision.from_str(config.get("model_dtype"))
|
45 |
|
46 |
# Get model and org
|
47 |
-
org_and_model = config.get("model_name", config.get("model_args", None))
|
48 |
org_and_model = org_and_model.split("/", 1)
|
49 |
|
50 |
if len(org_and_model) == 1:
|
@@ -66,7 +66,7 @@ class EvalResult:
|
|
66 |
if architectures:
|
67 |
architecture = ";".join(architectures)
|
68 |
|
69 |
-
print(data["results"])
|
70 |
# Extract results available in this file (some results are split in several files)
|
71 |
results = {}
|
72 |
for task in Tasks:
|
@@ -74,7 +74,7 @@ class EvalResult:
|
|
74 |
|
75 |
# We average all scores of a given metric (not all metrics are present in all files)
|
76 |
accs = np.array([v.get(task.metric, None) for k, v in data["results"].items() if task.benchmark == k])
|
77 |
-
print(f"{task}: {accs}")
|
78 |
if accs.size == 0 or any([acc is None for acc in accs]):
|
79 |
continue
|
80 |
|
@@ -177,12 +177,12 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
|
|
177 |
eval_results = {}
|
178 |
for model_result_filepath in model_result_filepaths:
|
179 |
# Creation of result
|
180 |
-
print(f"Model result filepath: {model_result_filepath}")
|
181 |
eval_result = EvalResult.init_from_json_file(model_result_filepath)
|
182 |
-
print(eval_result.results)
|
183 |
# print(eval_result)
|
184 |
eval_result.update_with_request_file(requests_path)
|
185 |
-
print(eval_result.results)
|
186 |
|
187 |
# Store results of same eval together
|
188 |
eval_name = eval_result.eval_name
|
|
|
44 |
precision = Precision.from_str(config.get("model_dtype"))
|
45 |
|
46 |
# Get model and org
|
47 |
+
org_and_model = data.get("model_name", config.get("model_name", config.get("model_args", None)))
|
48 |
org_and_model = org_and_model.split("/", 1)
|
49 |
|
50 |
if len(org_and_model) == 1:
|
|
|
66 |
if architectures:
|
67 |
architecture = ";".join(architectures)
|
68 |
|
69 |
+
# print(data["results"])
|
70 |
# Extract results available in this file (some results are split in several files)
|
71 |
results = {}
|
72 |
for task in Tasks:
|
|
|
74 |
|
75 |
# We average all scores of a given metric (not all metrics are present in all files)
|
76 |
accs = np.array([v.get(task.metric, None) for k, v in data["results"].items() if task.benchmark == k])
|
77 |
+
# print(f"{task}: {accs}")
|
78 |
if accs.size == 0 or any([acc is None for acc in accs]):
|
79 |
continue
|
80 |
|
|
|
177 |
eval_results = {}
|
178 |
for model_result_filepath in model_result_filepaths:
|
179 |
# Creation of result
|
180 |
+
# print(f"Model result filepath: {model_result_filepath}")
|
181 |
eval_result = EvalResult.init_from_json_file(model_result_filepath)
|
182 |
+
# print(eval_result.results)
|
183 |
# print(eval_result)
|
184 |
eval_result.update_with_request_file(requests_path)
|
185 |
+
# print(eval_result.results)
|
186 |
|
187 |
# Store results of same eval together
|
188 |
eval_name = eval_result.eval_name
|