Spaces:
Running
Running
File size: 2,763 Bytes
d317f64 d0e8be9 d317f64 d3db3e5 d317f64 d0e8be9 d317f64 d0e8be9 d317f64 d0e8be9 d317f64 a70555b d317f64 d0e8be9 d3db3e5 d317f64 d3db3e5 d317f64 d0e8be9 d317f64 d3db3e5 d0e8be9 d3db3e5 80fb33a d0e8be9 d3db3e5 d0e8be9 80fb33a d0e8be9 80fb33a d0e8be9 d3db3e5 d0e8be9 80fb33a d0e8be9 d3db3e5 d317f64 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
import json
import logging
import os
import subprocess
import time
import pandas as pd
from huggingface_hub import snapshot_download
from src.envs import EVAL_RESULTS_PATH
# Configure logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
def time_diff_wrapper(func):
def wrapper(*args, **kwargs):
start_time = time.time()
result = func(*args, **kwargs)
end_time = time.time()
diff = end_time - start_time
logging.info("Time taken for %s: %s seconds", func.__name__, diff)
return result
return wrapper
@time_diff_wrapper
def download_dataset(repo_id, local_dir, repo_type="dataset", max_attempts=3, backoff_factor=1.5):
"""Download dataset with exponential backoff retries."""
attempt = 0
while attempt < max_attempts:
try:
logging.info("Downloading %s to %s", repo_id, local_dir)
snapshot_download(
repo_id=repo_id,
local_dir=local_dir,
repo_type=repo_type,
tqdm_class=None,
token=os.environ.get("HF_TOKEN_PRIVATE"),
etag_timeout=30,
max_workers=8,
)
logging.info("Download successful")
return
except Exception as e:
wait_time = backoff_factor**attempt
logging.error("Error downloading %s: %s, retrying in %ss", repo_id, e, wait_time)
time.sleep(wait_time)
attempt += 1
logging.error("Failed to download %s after %s attempts", repo_id, max_attempts)
def build_leadearboard_df():
"""Initializes the application space, loading only necessary data."""
# download answers of different models that we trust
download_dataset("Vikhrmodels/openbench-eval", EVAL_RESULTS_PATH)
# print(subprocess.Popen('ls src'))
# copy the trusted model answers to data
subprocess.run(
[
"rsync",
"-azP",
"--ignore-existing",
f"{EVAL_RESULTS_PATH}/internal/*",
"data/arena-hard-v0.1/model_answer/internal/",
],
check=False,
)
# copy the judgement pre generated
# Will be rewritten after we switch to new gen for each submit
subprocess.run(
[
"rsync",
"-azP",
"--ignore-existing",
f"{EVAL_RESULTS_PATH}/model_judgment/*",
"data/arena-hard-v0.1/model_judgement/",
],
check=False,
)
# Retrieve the leaderboard DataFrame
with open("eval-results/evals/upd.json", "r", encoding="utf-8") as eval_file:
leaderboard_df = pd.DataFrame.from_records(json.load(eval_file))
return leaderboard_df.copy()
|