Spaces:
Runtime error
Runtime error
Refactor
Browse files- app.py +94 -93
- evaluation.py +1 -1
app.py
CHANGED
@@ -446,9 +446,9 @@ with st.form(key="form"):
|
|
446 |
elif len(selected_models) == 0:
|
447 |
st.warning("β οΈ No models were selected for evaluation! Please select at least one model and try again.")
|
448 |
elif len(selected_models) > 10:
|
449 |
-
st.warning("Only 10 models can be evaluated at once. Please select fewer models
|
450 |
else:
|
451 |
-
# Filter out
|
452 |
selected_models = filter_evaluated_models(
|
453 |
selected_models,
|
454 |
selected_task,
|
@@ -458,102 +458,103 @@ with st.form(key="form"):
|
|
458 |
selected_metrics,
|
459 |
)
|
460 |
print("INFO -- Selected models after filter:", selected_models)
|
461 |
-
|
462 |
-
|
463 |
-
|
464 |
-
|
465 |
-
|
466 |
-
|
467 |
-
|
468 |
-
|
469 |
-
|
470 |
-
|
471 |
-
|
472 |
-
|
473 |
-
|
474 |
-
|
475 |
-
|
476 |
-
|
477 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
478 |
},
|
479 |
-
"evaluation": {"metrics": selected_metrics, "models": selected_models, "hf_username": hf_username},
|
480 |
-
},
|
481 |
-
}
|
482 |
-
print(f"INFO -- Payload: {project_payload}")
|
483 |
-
project_json_resp = http_post(
|
484 |
-
path="/projects/create",
|
485 |
-
payload=project_payload,
|
486 |
-
token=HF_TOKEN,
|
487 |
-
domain=AUTOTRAIN_BACKEND_API,
|
488 |
-
).json()
|
489 |
-
print(f"INFO -- Project creation response: {project_json_resp}")
|
490 |
-
|
491 |
-
if project_json_resp["created"]:
|
492 |
-
data_payload = {
|
493 |
-
"split": 4, # use "auto" split choice in AutoTrain
|
494 |
-
"col_mapping": col_mapping,
|
495 |
-
"load_config": {"max_size_bytes": 0, "shuffle": False},
|
496 |
}
|
497 |
-
|
498 |
-
|
499 |
-
|
|
|
500 |
token=HF_TOKEN,
|
501 |
domain=AUTOTRAIN_BACKEND_API,
|
502 |
-
params={
|
503 |
-
"type": "dataset",
|
504 |
-
"config_name": selected_config,
|
505 |
-
"split_name": selected_split,
|
506 |
-
},
|
507 |
).json()
|
508 |
-
print(f"INFO --
|
509 |
-
|
510 |
-
|
511 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
512 |
token=HF_TOKEN,
|
513 |
domain=AUTOTRAIN_BACKEND_API,
|
|
|
|
|
|
|
|
|
|
|
514 |
).json()
|
515 |
-
print(f"INFO --
|
516 |
-
if
|
517 |
-
|
518 |
-
"
|
519 |
-
|
520 |
-
|
521 |
-
|
522 |
-
|
523 |
-
|
524 |
-
|
525 |
-
|
526 |
-
|
527 |
-
|
528 |
-
|
529 |
-
|
530 |
-
|
531 |
-
|
532 |
-
|
533 |
-
|
534 |
-
|
535 |
-
|
536 |
-
|
537 |
-
|
538 |
-
|
539 |
-
|
540 |
-
to
|
541 |
-
|
542 |
-
[
|
543 |
-
|
544 |
-
|
545 |
-
|
546 |
-
|
547 |
-
|
548 |
-
|
549 |
-
|
550 |
-
|
551 |
-
|
552 |
-
|
553 |
-
|
554 |
-
|
555 |
-
|
556 |
-
|
557 |
-
|
558 |
-
|
559 |
-
|
|
|
|
|
|
|
|
|
|
446 |
elif len(selected_models) == 0:
|
447 |
st.warning("β οΈ No models were selected for evaluation! Please select at least one model and try again.")
|
448 |
elif len(selected_models) > 10:
|
449 |
+
st.warning("Only 10 models can be evaluated at once. Please select fewer models and try again.")
|
450 |
else:
|
451 |
+
# Filter out previously evaluated models
|
452 |
selected_models = filter_evaluated_models(
|
453 |
selected_models,
|
454 |
selected_task,
|
|
|
458 |
selected_metrics,
|
459 |
)
|
460 |
print("INFO -- Selected models after filter:", selected_models)
|
461 |
+
if len(selected_models) > 0:
|
462 |
+
project_id = str(uuid.uuid4())[:8]
|
463 |
+
project_payload = {
|
464 |
+
"username": AUTOTRAIN_USERNAME,
|
465 |
+
"proj_name": f"eval-project-{project_id}",
|
466 |
+
"task": TASK_TO_ID[selected_task],
|
467 |
+
"config": {
|
468 |
+
"language": AUTOTRAIN_TASK_TO_LANG[selected_task]
|
469 |
+
if selected_task in AUTOTRAIN_TASK_TO_LANG
|
470 |
+
else "en",
|
471 |
+
"max_models": 5,
|
472 |
+
"instance": {
|
473 |
+
"provider": "aws",
|
474 |
+
"instance_type": "ml.g4dn.4xlarge",
|
475 |
+
"max_runtime_seconds": 172800,
|
476 |
+
"num_instances": 1,
|
477 |
+
"disk_size_gb": 150,
|
478 |
+
},
|
479 |
+
"evaluation": {
|
480 |
+
"metrics": selected_metrics,
|
481 |
+
"models": selected_models,
|
482 |
+
"hf_username": hf_username,
|
483 |
+
},
|
484 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
485 |
}
|
486 |
+
print(f"INFO -- Payload: {project_payload}")
|
487 |
+
project_json_resp = http_post(
|
488 |
+
path="/projects/create",
|
489 |
+
payload=project_payload,
|
490 |
token=HF_TOKEN,
|
491 |
domain=AUTOTRAIN_BACKEND_API,
|
|
|
|
|
|
|
|
|
|
|
492 |
).json()
|
493 |
+
print(f"INFO -- Project creation response: {project_json_resp}")
|
494 |
+
|
495 |
+
if project_json_resp["created"]:
|
496 |
+
data_payload = {
|
497 |
+
"split": 4, # use "auto" split choice in AutoTrain
|
498 |
+
"col_mapping": col_mapping,
|
499 |
+
"load_config": {"max_size_bytes": 0, "shuffle": False},
|
500 |
+
}
|
501 |
+
data_json_resp = http_post(
|
502 |
+
path=f"/projects/{project_json_resp['id']}/data/{selected_dataset}",
|
503 |
+
payload=data_payload,
|
504 |
token=HF_TOKEN,
|
505 |
domain=AUTOTRAIN_BACKEND_API,
|
506 |
+
params={
|
507 |
+
"type": "dataset",
|
508 |
+
"config_name": selected_config,
|
509 |
+
"split_name": selected_split,
|
510 |
+
},
|
511 |
).json()
|
512 |
+
print(f"INFO -- Dataset creation response: {data_json_resp}")
|
513 |
+
if data_json_resp["download_status"] == 1:
|
514 |
+
train_json_resp = http_get(
|
515 |
+
path=f"/projects/{project_json_resp['id']}/data/start_process",
|
516 |
+
token=HF_TOKEN,
|
517 |
+
domain=AUTOTRAIN_BACKEND_API,
|
518 |
+
).json()
|
519 |
+
print(f"INFO -- AutoTrain job response: {train_json_resp}")
|
520 |
+
if train_json_resp["success"]:
|
521 |
+
train_eval_index = {
|
522 |
+
"train-eval-index": [
|
523 |
+
{
|
524 |
+
"config": selected_config,
|
525 |
+
"task": AUTOTRAIN_TASK_TO_HUB_TASK[selected_task],
|
526 |
+
"task_id": selected_task,
|
527 |
+
"splits": {"eval_split": selected_split},
|
528 |
+
"col_mapping": col_mapping,
|
529 |
+
}
|
530 |
+
]
|
531 |
+
}
|
532 |
+
selected_metadata = yaml.dump(train_eval_index, sort_keys=False)
|
533 |
+
dataset_card_url = get_dataset_card_url(selected_dataset)
|
534 |
+
st.success("β
Successfully submitted evaluation job!")
|
535 |
+
st.markdown(
|
536 |
+
f"""
|
537 |
+
Evaluation can take up to 1 hour to complete, so grab a βοΈ or π΅ while you wait:
|
538 |
+
|
539 |
+
* π A [Hub pull request](https://huggingface.co/docs/hub/repositories-pull-requests-discussions) with the evaluation results will be opened for each model you selected. Check your email for notifications.
|
540 |
+
* π Click [here](https://hf.co/spaces/autoevaluate/leaderboards?dataset={selected_dataset}) to view the results from your submission once the Hub pull request is merged.
|
541 |
+
* π₯± Tired of configuring evaluations? Add the following metadata to the [dataset card]({dataset_card_url}) to enable 1-click evaluations:
|
542 |
+
""" # noqa
|
543 |
+
)
|
544 |
+
st.markdown(
|
545 |
+
f"""
|
546 |
+
```yaml
|
547 |
+
{selected_metadata}
|
548 |
+
"""
|
549 |
+
)
|
550 |
+
print("INFO -- Pushing evaluation job logs to the Hub")
|
551 |
+
evaluation_log = {}
|
552 |
+
evaluation_log["payload"] = project_payload
|
553 |
+
evaluation_log["project_creation_response"] = project_json_resp
|
554 |
+
evaluation_log["dataset_creation_response"] = data_json_resp
|
555 |
+
evaluation_log["autotrain_job_response"] = train_json_resp
|
556 |
+
commit_evaluation_log(evaluation_log, hf_access_token=HF_TOKEN)
|
557 |
+
else:
|
558 |
+
st.error("π Oh no, there was an error submitting your evaluation job!")
|
559 |
+
else:
|
560 |
+
st.warning("β οΈ No models left to evaluate! Please select other models and try again.")
|
evaluation.py
CHANGED
@@ -6,7 +6,7 @@ from huggingface_hub import DatasetFilter, HfApi
|
|
6 |
from huggingface_hub.hf_api import DatasetInfo
|
7 |
|
8 |
|
9 |
-
@dataclass(frozen=True, eq=True
|
10 |
class EvaluationInfo:
|
11 |
task: str
|
12 |
model: str
|
|
|
6 |
from huggingface_hub.hf_api import DatasetInfo
|
7 |
|
8 |
|
9 |
+
@dataclass(frozen=True, eq=True)
|
10 |
class EvaluationInfo:
|
11 |
task: str
|
12 |
model: str
|