File size: 5,301 Bytes
d489aeb
 
 
 
 
 
 
 
632dd72
d489aeb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a88d51c
d489aeb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
#!/usr/bin/env python

import json
import os

from datetime import datetime, timezone

from src.envs import API, EVAL_REQUESTS_PATH, H4_TOKEN, QUEUE_REPO
from src.submission.check_validity import already_submitted_models, get_model_size, is_model_on_hub


def add_new_eval(model: str, base_model: str, revision: str, precision: str, private: bool, weight_type: str, model_type: str):
    REQUESTED_MODELS, USERS_TO_SUBMISSION_DATES = already_submitted_models(EVAL_REQUESTS_PATH)

    user_name = ""
    model_path = model
    if "/" in model:
        tokens = model.split("/")
        user_name = tokens[0]
        model_path = tokens[1]

    precision = precision.split(" ")[0]
    current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")

    if model_type is None or model_type == "":
        return print("Please select a model type.")

    # Does the model actually exist?
    if revision == "":
        revision = "main"

    # Is the model on the hub?
    if weight_type in ["Delta", "Adapter"]:
        base_model_on_hub, error, _ = is_model_on_hub(model_name=base_model, revision=revision, token=H4_TOKEN, test_tokenizer=True)
        if not base_model_on_hub:
            print(f'Base model "{base_model}" {error}')
            return

    if not weight_type == "Adapter":
        model_on_hub, error, _ = is_model_on_hub(model_name=model, revision=revision, test_tokenizer=True)
        if not model_on_hub:
            print(f'Model "{model}" {error}')
            return

    # Is the model info correctly filled?
    try:
        model_info = API.model_info(repo_id=model, revision=revision)
    except Exception:
        print("Could not get your model information. Please fill it up properly.")
        return

    model_size = get_model_size(model_info=model_info, precision=precision)

    license = 'none'
    try:
        license = model_info.cardData["license"]
    except Exception:
        print("Please select a license for your model")
        # return

    # modelcard_OK, error_msg = check_model_card(model)
    # if not modelcard_OK:
    #     print(error_msg)
    #     return

    # Seems good, creating the eval
    print("Adding new eval")

    eval_entry = {
        "model": model,
        "base_model": base_model,
        "revision": revision,
        "private": private,
        "precision": precision,
        "weight_type": weight_type,
        "status": "PENDING",
        "submitted_time": current_time,
        "model_type": model_type,
        "likes": model_info.likes,
        "params": model_size,
        "license": license,
    }

    # Check for duplicate submission
    if f"{model}_{revision}_{precision}" in REQUESTED_MODELS:
        print("This model has been already submitted.")
        return

    print("Creating eval file")
    OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
    os.makedirs(OUT_DIR, exist_ok=True)
    out_path = f"{OUT_DIR}/{model_path}_eval_request_{private}_{precision}_{weight_type}.json"

    with open(out_path, "w") as f:
        f.write(json.dumps(eval_entry))

    print("Uploading eval file")
    API.upload_file(path_or_fileobj=out_path, path_in_repo=out_path.split("eval-queue/")[1],
                    repo_id=QUEUE_REPO, repo_type="dataset", commit_message=f"Add {model} to eval queue")

    # Remove the local file
    os.remove(out_path)

    print("Your request has been submitted to the evaluation queue!\nPlease wait for up to an hour for the model to show in the PENDING list.")
    return


def main():
    from huggingface_hub import HfApi

    api = HfApi()
    model_lst = api.list_models()

    model_lst = [m for m in model_lst]

    def custom_filter(m) -> bool:
        return m.pipeline_tag in {'text-generation'} and 'en' in m.tags and m.private is False

    filtered_model_lst = sorted([m for m in model_lst if custom_filter(m)], key=lambda m: m.downloads, reverse=True)

    for i in range(min(200, len(filtered_model_lst))):
        model = filtered_model_lst[i]

        print(f'Considering {model.id} ..')

        from huggingface_hub import snapshot_download
        from src.backend.envs import EVAL_REQUESTS_PATH_BACKEND
        from src.backend.manage_requests import get_eval_requests
        from src.backend.manage_requests import EvalRequest

        snapshot_download(repo_id=QUEUE_REPO, revision="main", local_dir=EVAL_REQUESTS_PATH_BACKEND, repo_type="dataset", max_workers=60)

        PENDING_STATUS = "PENDING"
        RUNNING_STATUS = "RUNNING"
        FINISHED_STATUS = "FINISHED"
        FAILED_STATUS = "FAILED"

        status = [PENDING_STATUS, RUNNING_STATUS, FINISHED_STATUS, FAILED_STATUS]

        # Get all eval request that are FINISHED, if you want to run other evals, change this parameter
        eval_requests: list[EvalRequest] = get_eval_requests(job_status=status, hf_repo=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH_BACKEND)

        requested_model_names = {e.model for e in eval_requests}

        if model.id not in requested_model_names:
            add_new_eval(model=model.id, base_model='', revision='main', precision='float32', private=False, weight_type='Original', model_type='pretrained')
        else:
            print(f'Model {model.id} already added, not adding it to the queue again.')


if __name__ == "__main__":
    main()