Spaces:
Runtime error
Runtime error
Refactor evaluation logic
Browse files- evaluation.py +14 -15
evaluation.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
from dataclasses import dataclass
|
2 |
|
3 |
import streamlit as st
|
@@ -5,7 +6,7 @@ from huggingface_hub import DatasetFilter, HfApi
|
|
5 |
from huggingface_hub.hf_api import DatasetInfo
|
6 |
|
7 |
|
8 |
-
@dataclass(frozen=True, eq=True)
|
9 |
class EvaluationInfo:
|
10 |
task: str
|
11 |
model: str
|
@@ -15,30 +16,29 @@ class EvaluationInfo:
|
|
15 |
metrics: set
|
16 |
|
17 |
|
18 |
-
def
|
19 |
if dataset_info.cardData is not None:
|
20 |
metadata = dataset_info.cardData["eval_info"]
|
21 |
metadata.pop("col_mapping", None)
|
22 |
# TODO(lewtun): populate dataset cards with metric info
|
23 |
if "metrics" not in metadata:
|
24 |
metadata["metrics"] = frozenset()
|
25 |
-
|
26 |
-
|
27 |
-
return
|
28 |
-
else:
|
29 |
-
return None
|
30 |
|
31 |
|
32 |
-
def
|
33 |
filt = DatasetFilter(author="autoevaluate")
|
34 |
evaluation_datasets = HfApi().list_datasets(filter=filt, full=True)
|
35 |
-
return [
|
36 |
|
37 |
|
38 |
def filter_evaluated_models(models, task, dataset_name, dataset_config, dataset_split, metrics):
|
39 |
-
|
|
|
40 |
|
41 |
-
for
|
42 |
evaluation_info = EvaluationInfo(
|
43 |
task=task,
|
44 |
model=model,
|
@@ -47,12 +47,11 @@ def filter_evaluated_models(models, task, dataset_name, dataset_config, dataset_
|
|
47 |
dataset_split=dataset_split,
|
48 |
metrics=frozenset(metrics),
|
49 |
)
|
50 |
-
|
51 |
-
if candidate_id in evaluation_ids:
|
52 |
st.info(
|
53 |
-
f"Model `{model}` has already been evaluated on this configuration. \
|
54 |
This model will be excluded from the evaluation job..."
|
55 |
)
|
56 |
-
models.
|
57 |
|
58 |
return models
|
|
|
1 |
+
import copy
|
2 |
from dataclasses import dataclass
|
3 |
|
4 |
import streamlit as st
|
|
|
6 |
from huggingface_hub.hf_api import DatasetInfo
|
7 |
|
8 |
|
9 |
+
@dataclass(frozen=True, eq=True, unsafe_hash=True)
|
10 |
class EvaluationInfo:
|
11 |
task: str
|
12 |
model: str
|
|
|
16 |
metrics: set
|
17 |
|
18 |
|
19 |
+
def create_evaluation_info(dataset_info: DatasetInfo) -> int:
|
20 |
if dataset_info.cardData is not None:
|
21 |
metadata = dataset_info.cardData["eval_info"]
|
22 |
metadata.pop("col_mapping", None)
|
23 |
# TODO(lewtun): populate dataset cards with metric info
|
24 |
if "metrics" not in metadata:
|
25 |
metadata["metrics"] = frozenset()
|
26 |
+
else:
|
27 |
+
metadata["metrics"] = frozenset(metadata["metrics"])
|
28 |
+
return EvaluationInfo(**metadata)
|
|
|
|
|
29 |
|
30 |
|
31 |
+
def get_evaluation_infos():
|
32 |
filt = DatasetFilter(author="autoevaluate")
|
33 |
evaluation_datasets = HfApi().list_datasets(filter=filt, full=True)
|
34 |
+
return [create_evaluation_info(dset) for dset in evaluation_datasets]
|
35 |
|
36 |
|
37 |
def filter_evaluated_models(models, task, dataset_name, dataset_config, dataset_split, metrics):
|
38 |
+
evaluation_infos = get_evaluation_infos()
|
39 |
+
models_to_filter = copy.copy(models)
|
40 |
|
41 |
+
for model in models_to_filter:
|
42 |
evaluation_info = EvaluationInfo(
|
43 |
task=task,
|
44 |
model=model,
|
|
|
47 |
dataset_split=dataset_split,
|
48 |
metrics=frozenset(metrics),
|
49 |
)
|
50 |
+
if evaluation_info in evaluation_infos:
|
|
|
51 |
st.info(
|
52 |
+
f"Model [`{model}`](https://huggingface.co/{model}) has already been evaluated on this configuration. \
|
53 |
This model will be excluded from the evaluation job..."
|
54 |
)
|
55 |
+
models.remove(model)
|
56 |
|
57 |
return models
|