leaderboard / src /backend /run_eval_suite.py
Miaoran000's picture
minor updates in publishing and logging results
2b9835a
raw
history blame
No virus
2.95 kB
import json
import os
import logging
from datetime import datetime
import src.envs as envs
from src.backend.manage_requests import EvalRequest
from src.backend.evaluate_model import Evaluator
# Configure logging
logging.basicConfig(level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s')
logging.getLogger("openai").setLevel(logging.WARNING)
def run_evaluation(eval_request: EvalRequest, batch_size, device,
local_dir: str, results_repo: str, no_cache=True, limit=None,
need_check=True, write_results=False):
"""
Run the evaluation for a given model and upload the results.
Args:
eval_request (EvalRequest): The evaluation request object containing model details.
num_fewshot (int): Number of few-shot examples.
batch_size (int): Batch size for processing.
device (str): The device to run the evaluation on.
local_dir (str): Local directory path for saving results.
results_repo (str): Repository ID where results will be uploaded.
no_cache (bool): Whether to disable caching.
limit (int, optional): Limit on the number of items to process. Use with caution.
Returns:
dict: A dictionary containing evaluation results.
"""
if limit:
logging.warning("WARNING: --limit SHOULD ONLY BE USED FOR TESTING. REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT.")
output_folder = os.path.join(local_dir, *eval_request.model.split("/"))
# if os.path.exists(output_folder):
# f_name = os.listdir(output_folder)[-1]
# print(f"Loading results from {os.path.join(output_folder, f_name)}")
# results = json.loads(os.path.join(output_folder, f_name))
# dumped = json.dumps(results, indent=2)
# logging.info(dumped)
# else:
try:
evaluator = Evaluator(eval_request.model, eval_request.revision, eval_request.precision,
batch_size, device, no_cache, limit, write_out=True,
output_base_path='logs')
results = evaluator.evaluate()
evaluator.write_results()
except Exception as e:
logging.error(f"Error during evaluation: {e}")
raise
dumped = json.dumps(results, indent=2)
logging.info(dumped)
output_path = os.path.join(output_folder,
f"results_{datetime.now()}.json") #
os.makedirs(output_folder, exist_ok=True)
with open(output_path, "w") as f:
f.write(dumped)
print(f"Results have been saved to{output_path}")
if not need_check:
print("Path in the repo:", f"{eval_request.model}/results_{datetime.now()}.json")
envs.API.upload_file(
path_or_fileobj=output_path,
path_in_repo=f"{eval_request.model}/results_{datetime.now()}.json",
repo_id=results_repo,
repo_type="dataset",
)
return results