Spaces:
Runtime error
Runtime error
pminervini
commited on
Commit
·
a88d51c
1
Parent(s):
9bfc5f2
update
Browse files- backend-cli.py +5 -3
- src/leaderboard/read_evals.py +4 -3
- submit-cli.py +1 -1
backend-cli.py
CHANGED
@@ -3,6 +3,7 @@
|
|
3 |
import os
|
4 |
import json
|
5 |
|
|
|
6 |
from datetime import datetime
|
7 |
|
8 |
from huggingface_hub import snapshot_download
|
@@ -99,7 +100,6 @@ def process_finished_requests() -> bool:
|
|
99 |
# Sort the evals by priority (first submitted first run)
|
100 |
eval_requests: list[EvalRequest] = sort_models_by_priority(api=API, models=eval_requests)
|
101 |
|
102 |
-
import random
|
103 |
random.shuffle(eval_requests)
|
104 |
|
105 |
from src.leaderboard.read_evals import get_raw_eval_results
|
@@ -115,8 +115,11 @@ def process_finished_requests() -> bool:
|
|
115 |
from typing import Optional
|
116 |
eval_result: Optional[EvalResult] = result_name_to_result[result_name] if result_name in result_name_to_result else None
|
117 |
|
|
|
|
|
|
|
118 |
# Iterate over tasks and, if we do not have results for a task, run the relevant evaluations
|
119 |
-
for task in
|
120 |
task_name = task.benchmark
|
121 |
|
122 |
if eval_result is None or task_name not in eval_result.results:
|
@@ -145,7 +148,6 @@ def process_pending_requests() -> bool:
|
|
145 |
# Sort the evals by priority (first submitted first run)
|
146 |
eval_requests = sort_models_by_priority(api=API, models=eval_requests)
|
147 |
|
148 |
-
import random
|
149 |
random.shuffle(eval_requests)
|
150 |
|
151 |
print(f"Found {len(eval_requests)} {','.join(current_pending_status)} eval requests")
|
|
|
3 |
import os
|
4 |
import json
|
5 |
|
6 |
+
import random
|
7 |
from datetime import datetime
|
8 |
|
9 |
from huggingface_hub import snapshot_download
|
|
|
100 |
# Sort the evals by priority (first submitted first run)
|
101 |
eval_requests: list[EvalRequest] = sort_models_by_priority(api=API, models=eval_requests)
|
102 |
|
|
|
103 |
random.shuffle(eval_requests)
|
104 |
|
105 |
from src.leaderboard.read_evals import get_raw_eval_results
|
|
|
115 |
from typing import Optional
|
116 |
eval_result: Optional[EvalResult] = result_name_to_result[result_name] if result_name in result_name_to_result else None
|
117 |
|
118 |
+
task_lst = TASKS_HARNESS.copy()
|
119 |
+
random.shuffle(task_lst)
|
120 |
+
|
121 |
# Iterate over tasks and, if we do not have results for a task, run the relevant evaluations
|
122 |
+
for task in task_lst:
|
123 |
task_name = task.benchmark
|
124 |
|
125 |
if eval_result is None or task_name not in eval_result.results:
|
|
|
148 |
# Sort the evals by priority (first submitted first run)
|
149 |
eval_requests = sort_models_by_priority(api=API, models=eval_requests)
|
150 |
|
|
|
151 |
random.shuffle(eval_requests)
|
152 |
|
153 |
print(f"Found {len(eval_requests)} {','.join(current_pending_status)} eval requests")
|
src/leaderboard/read_evals.py
CHANGED
@@ -133,7 +133,7 @@ class EvalResult:
|
|
133 |
self.num_params = request.get("params", 0)
|
134 |
self.date = request.get("submitted_time", "")
|
135 |
except Exception:
|
136 |
-
print(f"Could not find request file for {self.org}/{self.model}")
|
137 |
|
138 |
def is_complete(self) -> bool:
|
139 |
for task in Tasks:
|
@@ -169,7 +169,7 @@ class EvalResult:
|
|
169 |
|
170 |
|
171 |
def get_request_file_for_model(requests_path, model_name, precision):
|
172 |
-
"""Selects the correct request file for a given model. Only keeps runs tagged as FINISHED"""
|
173 |
request_files = os.path.join(
|
174 |
requests_path,
|
175 |
f"{model_name}_eval_request_*.json",
|
@@ -179,11 +179,12 @@ def get_request_file_for_model(requests_path, model_name, precision):
|
|
179 |
# Select correct request file (precision)
|
180 |
request_file = ""
|
181 |
request_files = sorted(request_files, reverse=True)
|
|
|
182 |
for tmp_request_file in request_files:
|
183 |
with open(tmp_request_file, "r") as f:
|
184 |
req_content = json.load(f)
|
185 |
if (
|
186 |
-
req_content["status"] in ["FINISHED"]
|
187 |
and req_content["precision"] == precision.split(".")[-1]
|
188 |
):
|
189 |
request_file = tmp_request_file
|
|
|
133 |
self.num_params = request.get("params", 0)
|
134 |
self.date = request.get("submitted_time", "")
|
135 |
except Exception:
|
136 |
+
print(f"Could not find request file for {self.org}/{self.model} -- path: {requests_path}")
|
137 |
|
138 |
def is_complete(self) -> bool:
|
139 |
for task in Tasks:
|
|
|
169 |
|
170 |
|
171 |
def get_request_file_for_model(requests_path, model_name, precision):
|
172 |
+
"""Selects the correct request file for a given model. Only keeps runs tagged as FINISHED and RUNNING"""
|
173 |
request_files = os.path.join(
|
174 |
requests_path,
|
175 |
f"{model_name}_eval_request_*.json",
|
|
|
179 |
# Select correct request file (precision)
|
180 |
request_file = ""
|
181 |
request_files = sorted(request_files, reverse=True)
|
182 |
+
# print('XXX', request_files)
|
183 |
for tmp_request_file in request_files:
|
184 |
with open(tmp_request_file, "r") as f:
|
185 |
req_content = json.load(f)
|
186 |
if (
|
187 |
+
req_content["status"] in ["FINISHED", "RUNNING"]
|
188 |
and req_content["precision"] == precision.split(".")[-1]
|
189 |
):
|
190 |
request_file = tmp_request_file
|
submit-cli.py
CHANGED
@@ -118,7 +118,7 @@ def main():
|
|
118 |
|
119 |
filtered_model_lst = sorted([m for m in model_lst if custom_filter(m)], key=lambda m: m.downloads, reverse=True)
|
120 |
|
121 |
-
for i in range(min(
|
122 |
model = filtered_model_lst[i]
|
123 |
|
124 |
print(f'Considering {model.id} ..')
|
|
|
118 |
|
119 |
filtered_model_lst = sorted([m for m in model_lst if custom_filter(m)], key=lambda m: m.downloads, reverse=True)
|
120 |
|
121 |
+
for i in range(min(200, len(filtered_model_lst))):
|
122 |
model = filtered_model_lst[i]
|
123 |
|
124 |
print(f'Considering {model.id} ..')
|