Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
pminervini
commited on
Commit
•
13218df
1
Parent(s):
e034fec
update
Browse files- backend-cli.py +17 -12
backend-cli.py
CHANGED
@@ -122,7 +122,7 @@ def process_evaluation(task: Task, eval_request: EvalRequest) -> dict:
|
|
122 |
return results
|
123 |
|
124 |
|
125 |
-
def process_finished_requests(thr: int) -> bool:
|
126 |
sanity_checks()
|
127 |
|
128 |
current_finished_status = [FINISHED_STATUS, FAILED_STATUS]
|
@@ -155,7 +155,11 @@ def process_finished_requests(thr: int) -> bool:
|
|
155 |
for task in task_lst:
|
156 |
task_name = task.benchmark
|
157 |
|
158 |
-
|
|
|
|
|
|
|
|
|
159 |
eval_request: EvalRequest = result_name_to_request[result_name]
|
160 |
|
161 |
my_snapshot_download(repo_id=QUEUE_REPO, revision="main", local_dir=EVAL_REQUESTS_PATH_BACKEND, repo_type="dataset", max_workers=60)
|
@@ -171,7 +175,7 @@ def process_finished_requests(thr: int) -> bool:
|
|
171 |
return False
|
172 |
|
173 |
|
174 |
-
def maybe_refresh_results(thr: int) -> bool:
|
175 |
sanity_checks()
|
176 |
|
177 |
current_finished_status = [PENDING_STATUS, FINISHED_STATUS, FAILED_STATUS]
|
@@ -195,8 +199,6 @@ def maybe_refresh_results(thr: int) -> bool:
|
|
195 |
# Check the corresponding result
|
196 |
eval_result: Optional[EvalResult] = result_name_to_result[result_name] if result_name in result_name_to_result else None
|
197 |
|
198 |
-
# breakpoint()
|
199 |
-
|
200 |
task_lst = TASKS_HARNESS.copy()
|
201 |
random.shuffle(task_lst)
|
202 |
|
@@ -204,11 +206,12 @@ def maybe_refresh_results(thr: int) -> bool:
|
|
204 |
for task in task_lst:
|
205 |
task_name = task.benchmark
|
206 |
|
|
|
|
|
|
|
207 |
|
208 |
-
# task_lst = ['nq', 'trivia', 'tqa', 'self', 'xsum', 'cnn', 'memo']
|
209 |
task_lst = ['nq', 'trivia', 'tqa', 'self']
|
210 |
-
if (eval_result is None or
|
211 |
-
task_name not in eval_result.results or
|
212 |
any(ss in task_name for ss in task_lst)):
|
213 |
eval_request: EvalRequest = result_name_to_request[result_name]
|
214 |
|
@@ -262,9 +265,11 @@ def process_pending_requests() -> bool:
|
|
262 |
|
263 |
if __name__ == "__main__":
|
264 |
wait = True
|
|
|
265 |
|
266 |
if socket.gethostname() in {'hamburg', 'neuromancer'} or os.path.isdir("/home/pminervi"):
|
267 |
wait = False
|
|
|
268 |
|
269 |
if wait:
|
270 |
time.sleep(60 * random.randint(5, 10))
|
@@ -277,14 +282,14 @@ if __name__ == "__main__":
|
|
277 |
|
278 |
if res is False:
|
279 |
if random.randint(0, 1) == 0:
|
280 |
-
res = maybe_refresh_results(100)
|
281 |
else:
|
282 |
-
res = process_finished_requests(100)
|
283 |
|
284 |
time.sleep(60)
|
285 |
|
286 |
if res is False:
|
287 |
if random.randint(0, 1) == 0:
|
288 |
-
res = maybe_refresh_results(0)
|
289 |
else:
|
290 |
-
res = process_finished_requests(0)
|
|
|
122 |
return results
|
123 |
|
124 |
|
125 |
+
def process_finished_requests(thr: int, hard_task_lst: Optional[list[str]] = None) -> bool:
|
126 |
sanity_checks()
|
127 |
|
128 |
current_finished_status = [FINISHED_STATUS, FAILED_STATUS]
|
|
|
155 |
for task in task_lst:
|
156 |
task_name = task.benchmark
|
157 |
|
158 |
+
do_run_task = False
|
159 |
+
if hard_task_lst is None or any(ss in task_name for ss in hard_task_lst):
|
160 |
+
do_run_task = True
|
161 |
+
|
162 |
+
if (eval_result is None or task_name not in eval_result.results) and do_run_task:
|
163 |
eval_request: EvalRequest = result_name_to_request[result_name]
|
164 |
|
165 |
my_snapshot_download(repo_id=QUEUE_REPO, revision="main", local_dir=EVAL_REQUESTS_PATH_BACKEND, repo_type="dataset", max_workers=60)
|
|
|
175 |
return False
|
176 |
|
177 |
|
178 |
+
def maybe_refresh_results(thr: int, hard_task_lst: Optional[list[str]] = None) -> bool:
|
179 |
sanity_checks()
|
180 |
|
181 |
current_finished_status = [PENDING_STATUS, FINISHED_STATUS, FAILED_STATUS]
|
|
|
199 |
# Check the corresponding result
|
200 |
eval_result: Optional[EvalResult] = result_name_to_result[result_name] if result_name in result_name_to_result else None
|
201 |
|
|
|
|
|
202 |
task_lst = TASKS_HARNESS.copy()
|
203 |
random.shuffle(task_lst)
|
204 |
|
|
|
206 |
for task in task_lst:
|
207 |
task_name = task.benchmark
|
208 |
|
209 |
+
do_run_task = False
|
210 |
+
if hard_task_lst is None or any(ss in task_name for ss in hard_task_lst):
|
211 |
+
do_run_task = True
|
212 |
|
|
|
213 |
task_lst = ['nq', 'trivia', 'tqa', 'self']
|
214 |
+
if (eval_result is None or do_run_task or task_name not in eval_result.results or
|
|
|
215 |
any(ss in task_name for ss in task_lst)):
|
216 |
eval_request: EvalRequest = result_name_to_request[result_name]
|
217 |
|
|
|
265 |
|
266 |
if __name__ == "__main__":
|
267 |
wait = True
|
268 |
+
hard_task_lst = None
|
269 |
|
270 |
if socket.gethostname() in {'hamburg', 'neuromancer'} or os.path.isdir("/home/pminervi"):
|
271 |
wait = False
|
272 |
+
hard_task_lst = ['nq', 'trivia', 'tqa']
|
273 |
|
274 |
if wait:
|
275 |
time.sleep(60 * random.randint(5, 10))
|
|
|
282 |
|
283 |
if res is False:
|
284 |
if random.randint(0, 1) == 0:
|
285 |
+
res = maybe_refresh_results(100, hard_task_lst=hard_task_lst)
|
286 |
else:
|
287 |
+
res = process_finished_requests(100, hard_task_lst=hard_task_lst)
|
288 |
|
289 |
time.sleep(60)
|
290 |
|
291 |
if res is False:
|
292 |
if random.randint(0, 1) == 0:
|
293 |
+
res = maybe_refresh_results(0, hard_task_lst=hard_task_lst)
|
294 |
else:
|
295 |
+
res = process_finished_requests(0, hard_task_lst=hard_task_lst)
|