taxfree_python
Add functions to submit models
b29fd2d
raw
history blame
2.03 kB
import os
import re
import tempfile
from urllib.parse import urlparse
import joblib
import numpy as np
from datasets import load_dataset
from dotenv import load_dotenv
from huggingface_hub import hf_hub_download
load_dotenv()
TEST_DATA_ID = os.environ.get("TEST_DATA_ID", None)
def relative_error_loss(predicted_age, true_age):
true_age_safe = np.where(true_age == 0, 0.1, true_age)
relative_error = np.abs((true_age - predicted_age) / true_age_safe)
return np.mean(relative_error)
def parse_model_url(model_url: str):
parsed = urlparse(model_url)
path_parts = parsed.path.strip("/").split("/")
if len(path_parts) < 5:
raise ValueError("Unexpected URL format. Make sure it's a Hub URL with /resolve/main/ or /blob/main/")
repo_id = "/".join(path_parts[:2])
revision = path_parts[3]
filename = path_parts[4]
if not filename.endswith(".joblib"):
raise ValueError("The file must be a .joblib file.")
return repo_id, revision, filename
def evaluate_model(model_url: str) -> float:
if not model_url.startswith("https://huggingface.co/"):
raise ValueError("Invalid model URL. Must start with https://huggingface.co/")
repo_id, revision, filename = parse_model_url(model_url)
ds_test_meta = load_dataset(TEST_DATA_ID, "meta")
ds_test_main = load_dataset(TEST_DATA_ID, "main")
X_test = ds_test_main["test"].to_pandas().drop(columns=["SampleID"])
X_test = X_test.values.astype(np.float32)
y_test = np.array(ds_test_meta["test"]["Age"])
with tempfile.TemporaryDirectory() as tmpdir:
local_model_path = hf_hub_download(repo_id=repo_id, filename=filename, revision=revision, cache_dir=tmpdir)
try:
model = joblib.load(local_model_path)
except Exception as e:
raise ValueError(f"Failed to load the model. Please check the .joblib file. Error: {e}")
predicted_age = model.predict(X_test)
score = relative_error_loss(predicted_age, y_test)
return score