Spaces:
Paused
Paused
File size: 2,340 Bytes
08ae6c5 f72e694 08ae6c5 1d6da9d 1ba7e1c 1d6da9d 08ae6c5 1ba7e1c 08ae6c5 72bd0af 08ae6c5 72bd0af 1d6da9d f72e694 1d6da9d 08ae6c5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
import json
import os
import logging
from datetime import datetime
from argparse import Namespace
from lighteval.main_accelerate import main, EnvConfig, create_model_config, load_model
from lighteval.models.endpoint_model import InferenceEndpointModel
from src.envs import RESULTS_REPO, CACHE_PATH, TOKEN
from src.backend.manage_requests import EvalRequest
InferenceEndpointModel.add_special_tokens = lambda *args, **kwargs: None
logging.getLogger("openai").setLevel(logging.WARNING)
class DefaultNamespace(Namespace):
def __getattr__(self, name):
return self.__dict__.get(name, None)
def run_evaluation(eval_request: EvalRequest, task_names: str, batch_size: int, local_dir: str, accelerator: str, region: str, vendor: str, instance_size: str, instance_type: str, limit=None):
if limit:
print("WARNING: --limit SHOULD ONLY BE USED FOR TESTING. REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT.")
args = DefaultNamespace(**{
"endpoint_model_name":f"{eval_request.model}_{eval_request.precision}".lower(),
"accelerator": accelerator,
"vendor": vendor,
"region": region,
"instance_size": instance_size,
"instance_type": instance_type,
"max_samples": limit,
"job_id": str(datetime.now()),
"push_results_to_hub": True,
"save_details": True,
"push_details_to_hub": True,
"public_run": False,
"cache_dir": CACHE_PATH,
"results_org": RESULTS_REPO,
"output_dir": local_dir,
"override_batch_size": batch_size,
"custom_tasks": "custom_tasks.py",
"tasks": task_names
})
try:
results = main(args)
results["config"]["model_dtype"] = eval_request.precision
results["config"]["model_name"] = eval_request.model
results["config"]["model_sha"] = eval_request.revision
dumped = json.dumps(results, indent=2)
print(dumped)
except Exception: # if eval failed, we force a cleanup
env_config = EnvConfig(token=TOKEN, cache_dir=args.cache_dir)
model_config = create_model_config(args=args, accelerator=accelerator)
model, _ = load_model(config=model_config, env_config=env_config)
model.cleanup()
return results
|