File size: 2,763 Bytes
d317f64
 
 
 
 
 
 
 
 
 
 
 
d0e8be9
 
d317f64
 
 
 
 
 
 
d3db3e5
d317f64
d0e8be9
d317f64
 
d0e8be9
d317f64
 
 
 
 
 
d0e8be9
d317f64
 
 
 
 
a70555b
d317f64
 
 
 
 
 
d0e8be9
d3db3e5
d317f64
 
d3db3e5
d317f64
d0e8be9
d317f64
 
 
d3db3e5
d0e8be9
 
d3db3e5
80fb33a
d0e8be9
 
 
d3db3e5
d0e8be9
80fb33a
 
d0e8be9
 
 
80fb33a
 
d0e8be9
 
 
d3db3e5
d0e8be9
 
80fb33a
d0e8be9
 
 
 
 
d3db3e5
 
d317f64
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import json
import logging
import os
import subprocess
import time

import pandas as pd
from huggingface_hub import snapshot_download

from src.envs import EVAL_RESULTS_PATH

# Configure logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")


def time_diff_wrapper(func):
    def wrapper(*args, **kwargs):
        start_time = time.time()
        result = func(*args, **kwargs)
        end_time = time.time()
        diff = end_time - start_time
        logging.info("Time taken for %s: %s seconds", func.__name__, diff)
        return result

    return wrapper


@time_diff_wrapper
def download_dataset(repo_id, local_dir, repo_type="dataset", max_attempts=3, backoff_factor=1.5):
    """Download dataset with exponential backoff retries."""
    attempt = 0
    while attempt < max_attempts:
        try:
            logging.info("Downloading %s to %s", repo_id, local_dir)
            snapshot_download(
                repo_id=repo_id,
                local_dir=local_dir,
                repo_type=repo_type,
                tqdm_class=None,
                token=os.environ.get("HF_TOKEN_PRIVATE"),
                etag_timeout=30,
                max_workers=8,
            )
            logging.info("Download successful")
            return
        except Exception as e:
            wait_time = backoff_factor**attempt
            logging.error("Error downloading %s: %s, retrying in %ss", repo_id, e, wait_time)
            time.sleep(wait_time)
            attempt += 1
    logging.error("Failed to download %s after %s attempts", repo_id, max_attempts)


def build_leadearboard_df():
    """Initializes the application space, loading only necessary data."""

    # download answers of different models that we trust
    download_dataset("Vikhrmodels/openbench-eval", EVAL_RESULTS_PATH)
    # print(subprocess.Popen('ls src'))

    # copy the trusted model answers to data
    subprocess.run(
        [
            "rsync",
            "-azP",
            "--ignore-existing",
            f"{EVAL_RESULTS_PATH}/internal/*",
            "data/arena-hard-v0.1/model_answer/internal/",
        ],
        check=False,
    )
    # copy the judgement pre generated
    # Will be rewritten after we switch to new gen for each submit
    subprocess.run(
        [
            "rsync",
            "-azP",
            "--ignore-existing",
            f"{EVAL_RESULTS_PATH}/model_judgment/*",
            "data/arena-hard-v0.1/model_judgement/",
        ],
        check=False,
    )

    # Retrieve the leaderboard DataFrame
    with open("eval-results/evals/upd.json", "r", encoding="utf-8") as eval_file:
        leaderboard_df = pd.DataFrame.from_records(json.load(eval_file))
    return leaderboard_df.copy()