File size: 3,180 Bytes
8e8c463
d262fb3
 
 
 
 
 
 
 
 
 
 
 
 
 
80a8e25
d262fb3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e747f4e
 
 
 
 
 
 
 
8e8c463
996d4ed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import re
from huggingface_hub import HfApi, Repository


def restart_space(LLM_PERF_LEADERBOARD_REPO, OPTIMUM_TOKEN):
    HfApi().restart_space(
        repo_id=LLM_PERF_LEADERBOARD_REPO, token=OPTIMUM_TOKEN
    )


def load_dataset_repo(LLM_PERF_DATASET_REPO, OPTIMUM_TOKEN):
    llm_perf_repo = None
    if OPTIMUM_TOKEN:
        print("Loading LLM-Perf-Dataset from Hub...")
        llm_perf_repo = Repository(
            local_dir="./llm-perf-dataset",
            clone_from=LLM_PERF_DATASET_REPO,
            token=OPTIMUM_TOKEN,
            repo_type="dataset",
        )
        llm_perf_repo.git_pull()

    return llm_perf_repo


LLAMAS = ["huggingface/llama-7b", "huggingface/llama-13b",
          "huggingface/llama-30b", "huggingface/llama-65b"]
KOALA_LINK = "https://huggingface.co/TheBloke/koala-13B-HF"
VICUNA_LINK = "https://huggingface.co/lmsys/vicuna-13b-delta-v1.1"
OASST_LINK = "https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5"
DOLLY_LINK = "https://huggingface.co/databricks/dolly-v2-12b"
MODEL_PAGE = "https://huggingface.co/models"
LLAMA_LINK = "https://ai.facebook.com/blog/large-language-model-llama-meta-ai/"
VICUNA_LINK = "https://huggingface.co/CarperAI/stable-vicuna-13b-delta"
ALPACA_LINK = "https://crfm.stanford.edu/2023/03/13/alpaca.html"


def model_hyperlink(link, model_name):
    return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'


def make_clickable_model(model_name):
    link = f"https://huggingface.co/{model_name}"

    if model_name in LLAMAS:
        link = LLAMA_LINK
        model_name = model_name.split("/")[1]
    elif model_name == "HuggingFaceH4/stable-vicuna-13b-2904":
        link = VICUNA_LINK
        model_name = "stable-vicuna-13b"
    elif model_name == "HuggingFaceH4/llama-7b-ift-alpaca":
        link = ALPACA_LINK
        model_name = "alpaca-13b"
    if model_name == "dolly-12b":
        link = DOLLY_LINK
    elif model_name == "vicuna-13b":
        link = VICUNA_LINK
    elif model_name == "koala-13b":
        link = KOALA_LINK
    elif model_name == "oasst-12b":
        link = OASST_LINK

    return model_hyperlink(link, model_name)


def make_clickable_score(score):
    link = f"https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard"
    return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{score}</a>'


def extract_score_from_clickable(clickable_score) -> float:
    return float(re.findall(r"\d+\.\d+", clickable_score)[-1])


def submit_query(text, backends, datatypes, threshold, raw_df):
    raw_df["H4 Score ⬆️"] = raw_df["H4 Score ⬆️"].apply(
        extract_score_from_clickable)

    filtered_df = raw_df[
        raw_df["Model 🤗"].str.lower().str.contains(text.lower()) &
        raw_df["Backend 🏭"].isin(backends) &
        raw_df["Datatype 📥"].isin(datatypes) &
        (raw_df["H4 Score ⬆️"] >= threshold)
    ]

    filtered_df["H4 Score ⬆️"] = filtered_df["H4 Score ⬆️"].apply(
        make_clickable_score)
    return filtered_df