Spaces:
Runtime error
Runtime error
Add eval caching
Browse files- app.py +20 -0
- evaluation.py +26 -0
- utils.py +1 -1
app.py
CHANGED
@@ -8,6 +8,8 @@ from datasets import get_dataset_config_names
|
|
8 |
from dotenv import load_dotenv
|
9 |
from huggingface_hub import list_datasets
|
10 |
|
|
|
|
|
11 |
from utils import (get_compatible_models, get_key, get_metadata, http_get,
|
12 |
http_post)
|
13 |
|
@@ -244,6 +246,24 @@ with st.form(key="form"):
|
|
244 |
|
245 |
selected_models = st.multiselect("Select the models you wish to evaluate", compatible_models)
|
246 |
print("Selected models:", selected_models)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
247 |
submit_button = st.form_submit_button("Make submission")
|
248 |
|
249 |
if submit_button:
|
|
|
8 |
from dotenv import load_dotenv
|
9 |
from huggingface_hub import list_datasets
|
10 |
|
11 |
+
from evaluation import (EvaluationInfo, compute_evaluation_id,
|
12 |
+
get_evaluation_ids)
|
13 |
from utils import (get_compatible_models, get_key, get_metadata, http_get,
|
14 |
http_post)
|
15 |
|
|
|
246 |
|
247 |
selected_models = st.multiselect("Select the models you wish to evaluate", compatible_models)
|
248 |
print("Selected models:", selected_models)
|
249 |
+
|
250 |
+
evaluation_ids = get_evaluation_ids()
|
251 |
+
|
252 |
+
for idx, model in enumerate(selected_models):
|
253 |
+
eval_info = EvaluationInfo(
|
254 |
+
task=selected_task,
|
255 |
+
model=model,
|
256 |
+
dataset_name=selected_dataset,
|
257 |
+
dataset_config=selected_config,
|
258 |
+
dataset_split=selected_split,
|
259 |
+
)
|
260 |
+
candidate_id = hash(eval_info)
|
261 |
+
if candidate_id in evaluation_ids:
|
262 |
+
st.info(f"Model {model} has already been evaluated on this configuration. Skipping ...")
|
263 |
+
selected_models.pop(idx)
|
264 |
+
|
265 |
+
print("Selected models:", selected_models)
|
266 |
+
|
267 |
submit_button = st.form_submit_button("Make submission")
|
268 |
|
269 |
if submit_button:
|
evaluation.py
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from dataclasses import dataclass
|
2 |
+
|
3 |
+
from huggingface_hub import DatasetFilter, HfApi
|
4 |
+
from huggingface_hub.hf_api import DatasetInfo
|
5 |
+
|
6 |
+
|
7 |
+
@dataclass(frozen=True, eq=True)
|
8 |
+
class EvaluationInfo:
|
9 |
+
task: str
|
10 |
+
model: str
|
11 |
+
dataset_name: str
|
12 |
+
dataset_config: str
|
13 |
+
dataset_split: str
|
14 |
+
|
15 |
+
|
16 |
+
def compute_evaluation_id(dataset_info: DatasetInfo) -> int:
|
17 |
+
metadata = dataset_info.cardData["eval_info"]
|
18 |
+
metadata.pop("col_mapping", None)
|
19 |
+
evaluation_info = EvaluationInfo(**metadata)
|
20 |
+
return hash(evaluation_info)
|
21 |
+
|
22 |
+
|
23 |
+
def get_evaluation_ids():
|
24 |
+
filt = DatasetFilter(author="autoevaluate")
|
25 |
+
evaluation_datasets = HfApi().list_datasets(filter=filt, full=True)
|
26 |
+
return [compute_evaluation_id(dset) for dset in evaluation_datasets]
|
utils.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
from typing import Dict, Union
|
2 |
|
3 |
import requests
|
4 |
-
from huggingface_hub import
|
5 |
|
6 |
AUTOTRAIN_TASK_TO_HUB_TASK = {
|
7 |
"binary_classification": "text-classification",
|
|
|
1 |
from typing import Dict, Union
|
2 |
|
3 |
import requests
|
4 |
+
from huggingface_hub import HfApi, ModelFilter
|
5 |
|
6 |
AUTOTRAIN_TASK_TO_HUB_TASK = {
|
7 |
"binary_classification": "text-classification",
|