File size: 3,065 Bytes
b1b6ed6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
from transformers import AutoConfig

LLM_MODEL_ARCHS = {
    "stablelm_epoch": "πŸ”΄ StableLM-Epoch",
    "stablelm_alpha": "πŸ”΄ StableLM-Alpha",
    "mixformer-sequential": "πŸ§‘β€πŸ’» Phi Ο†",
    "RefinedWebModel": "πŸ¦… Falcon",
    "gpt_bigcode": "⭐ StarCoder",
    "RefinedWeb": "πŸ¦… Falcon",
    "baichuan": "🌊 Baichuan 百川",  # river
    "internlm": "πŸ§‘β€πŸŽ“ InternLM δΉ¦η”Ÿ",  # scholar
    "mistral": "Ⓜ️ Mistral",
    "mixtral": "Ⓜ️ Mixtral",
    "codegen": "♾️ CodeGen",
    "chatglm": "πŸ’¬ ChatGLM",
    "falcon": "πŸ¦… Falcon",
    "bloom": "🌸 Bloom",
    "llama": "πŸ¦™ LLaMA",
    "rwkv": "πŸ¦β€β¬› RWKV",
    "deci": "πŸ”΅ deci",
    "Yi": "πŸ«‚ Yi δΊΊ",  # people
    "mpt": "🧱 MPT",
    # suggest something
    "gpt_neox": "GPT-NeoX",
    "gpt_neo": "GPT-Neo",
    "gpt2": "GPT-2",
    "gptj": "GPT-J",
    "bart": "BART",
}


def model_hyperlink(link, model_name):
    return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'


def process_architectures(model):
    # return "Unknown"
    try:
        config = AutoConfig.from_pretrained(model, trust_remote_code=True)
        return LLM_MODEL_ARCHS.get(config.model_type, "Unknown")
    except Exception:
        return "Unknown"


def process_score(score, quantization):
    if quantization != "Unquantized":
        return f"{score:.2f}*"
    else:
        return f"{score:.2f} "


def process_quantizations(x):
    if (
        x["config.backend.quantization_scheme"] == "bnb"
        and x["config.backend.quantization_config.load_in_4bit"] is True
    ):
        return "BnB.4bit"
    elif (
        x["config.backend.quantization_scheme"] == "bnb"
        and x["config.backend.quantization_config.load_in_8bit"] is True
    ):
        return "BnB.8bit"
    elif x["config.backend.quantization_scheme"] == "gptq" and x["config.backend.quantization_config.bits"] == 4:
        return "GPTQ.4bit"
    elif x["config.backend.quantization_scheme"] == "awq" and x["config.backend.quantization_config.bits"] == 4:
        return "AWQ.4bit"
    else:
        return "Unquantized"


def process_kernels(x):
    if x["config.backend.quantization_scheme"] == "gptq" and x["config.backend.quantization_config.version"] == 1:
        return "GPTQ.ExllamaV1"

    elif x["config.backend.quantization_scheme"] == "gptq" and x["config.backend.quantization_config.version"] == 2:
        return "GPTQ.ExllamaV2"
    elif (
        x["config.backend.quantization_scheme"] == "awq" and x["config.backend.quantization_config.version"] == "gemm"
    ):
        return "AWQ.GEMM"
    elif (
        x["config.backend.quantization_scheme"] == "awq" and x["config.backend.quantization_config.version"] == "gemv"
    ):
        return "AWQ.GEMV"
    else:
        return "No Kernel"


def test():
    model = "Qwen/Qwen1.5-32B"
    config = AutoConfig.from_pretrained(model, trust_remote_code=True)

    import pdb

    pdb.set_trace()


if __name__ == "__main__":
    test()