File size: 3,825 Bytes
9f0d781
 
 
 
 
 
4a6c7b9
 
 
 
9f0d781
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4a6c7b9
 
 
 
 
 
 
 
9f0d781
 
 
 
 
 
 
 
 
4a6c7b9
 
 
9f0d781
 
 
4a6c7b9
9f0d781
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import os 
from constants import EVAL_REQUESTS_PATH
from pathlib import Path
from huggingface_hub import HfApi, Repository

TOKEN_HUB = os.environ.get("TOKEN_HUB", None)
QUEUE_REPO = os.environ.get("QUEUE_REPO", "hf-audio/leaderboard-evals")
QUEUE_REPO_WHISPER = os.environ.get("QUEUE_REPO_WHISPER", "Steveeeeeeen/whisper-leaderboard-evals")
QUEUE_PATH = os.environ.get("QUEUE_PATH", "results")
QUEUE_PATH_WHISPER = os.environ.get("QUEUE_PATH_WHISPER", "whisper-results")

hf_api = HfApi(
    endpoint="https://huggingface.co", 
    token=TOKEN_HUB, 
)

def load_all_info_from_dataset_hub():
    eval_queue_repo = None
    requested_models = None

    passed = True
    if TOKEN_HUB is None:
        passed = False
    else:
        print("Pulling evaluation requests and results.")

        eval_queue_repo = Repository(
            local_dir=QUEUE_PATH,
            clone_from=QUEUE_REPO,
            use_auth_token=TOKEN_HUB,
            repo_type="dataset",
        )
        eval_queue_repo.git_pull()

        whisper_eval_queue_repo = Repository(
            local_dir=QUEUE_PATH_WHISPER,
            clone_from=QUEUE_REPO_WHISPER,
            use_auth_token=TOKEN_HUB,
            repo_type="dataset",
        )
        whisper_eval_queue_repo.git_pull()
        
        # Local directory where dataset repo is cloned + folder with eval requests
        directory = QUEUE_PATH / EVAL_REQUESTS_PATH
        requested_models = get_all_requested_models(directory)
        requested_models = [p.stem for p in requested_models]
        # Local directory where dataset repo is cloned
        csv_results = get_csv_with_results(QUEUE_PATH)
        if csv_results is None:
            passed = False
        whisper_csv_results = get_csv_with_results(QUEUE_PATH_WHISPER)
        if whisper_csv_results is None:
            passed = False
    if not passed:
        raise ValueError("No Hugging Face token provided. Skipping evaluation requests and results.")

    return eval_queue_repo, requested_models, csv_results, whisper_eval_queue_repo, whisper_csv_results


def upload_file(requested_model_name, path_or_fileobj):
    dest_repo_file = Path(EVAL_REQUESTS_PATH) / path_or_fileobj.name
    dest_repo_file = str(dest_repo_file)
    hf_api.upload_file(
            path_or_fileobj=path_or_fileobj,
            path_in_repo=str(dest_repo_file),
            repo_id=QUEUE_REPO,
            token=TOKEN_HUB,
            repo_type="dataset",
            commit_message=f"Add {requested_model_name} to eval queue")

def get_all_requested_models(directory):
    directory = Path(directory)
    all_requested_models = list(directory.glob("*.txt"))
    return all_requested_models

def get_csv_with_results(directory):
    directory = Path(directory)
    all_csv_files = list(directory.glob("*.csv"))
    latest = [f for f in all_csv_files if f.stem.endswith("latest")]
    if len(latest) != 1:
        return None
    return latest[0]



def is_model_on_hub(model_name, revision="main") -> bool:
    try:
        model_name = model_name.replace(" ","")
        author = model_name.split("/")[0]
        model_id = model_name.split("/")[1]
        if len(author) == 0 or len(model_id) == 0:
            return False, "is not a valid model name. Please use the format `author/model_name`."
    except Exception as e:
        return False, "is not a valid model name. Please use the format `author/model_name`."

    try:
        models = list(hf_api.list_models(author=author, search=model_id))
        matched = [model_name for m in models if m.modelId == model_name]
        if len(matched) != 1:
            return False, "was not found on the hub!"
        else:
            return True, None
    except Exception as e:
        print(f"Could not get the model from the hub.: {e}")
        return False, "was not found on hub!"