|
from transformers import AutoConfig |
|
|
|
LLM_MODEL_ARCHS = { |
|
"stablelm_epoch": "π΄ StableLM-Epoch", |
|
"stablelm_alpha": "π΄ StableLM-Alpha", |
|
"mixformer-sequential": "π§βπ» Phi Ο", |
|
"RefinedWebModel": "π¦
Falcon", |
|
"gpt_bigcode": "β StarCoder", |
|
"RefinedWeb": "π¦
Falcon", |
|
"baichuan": "π Baichuan ηΎε·", |
|
"internlm": "π§βπ InternLM δΉ¦η", |
|
"mistral": "βοΈ Mistral", |
|
"mixtral": "βοΈ Mixtral", |
|
"codegen": "βΎοΈ CodeGen", |
|
"chatglm": "π¬ ChatGLM", |
|
"falcon": "π¦
Falcon", |
|
"bloom": "πΈ Bloom", |
|
"llama": "π¦ LLaMA", |
|
"rwkv": "π¦ββ¬ RWKV", |
|
"deci": "π΅ deci", |
|
"Yi": "π« Yi δΊΊ", |
|
"mpt": "𧱠MPT", |
|
|
|
"gpt_neox": "GPT-NeoX", |
|
"gpt_neo": "GPT-Neo", |
|
"gpt2": "GPT-2", |
|
"gptj": "GPT-J", |
|
"bart": "BART", |
|
} |
|
|
|
|
|
def model_hyperlink(link, model_name): |
|
return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>' |
|
|
|
|
|
def process_architectures(model): |
|
|
|
try: |
|
config = AutoConfig.from_pretrained(model, trust_remote_code=True) |
|
return LLM_MODEL_ARCHS.get(config.model_type, "Unknown") |
|
except Exception: |
|
return "Unknown" |
|
|
|
|
|
def process_score(score, quantization): |
|
if quantization != "Unquantized": |
|
return f"{score:.2f}*" |
|
else: |
|
return f"{score:.2f} " |
|
|
|
|
|
def process_quantizations(x): |
|
if ( |
|
x["config.backend.quantization_scheme"] == "bnb" |
|
and x["config.backend.quantization_config.load_in_4bit"] is True |
|
): |
|
return "BnB.4bit" |
|
elif ( |
|
x["config.backend.quantization_scheme"] == "bnb" |
|
and x["config.backend.quantization_config.load_in_8bit"] is True |
|
): |
|
return "BnB.8bit" |
|
elif x["config.backend.quantization_scheme"] == "gptq" and x["config.backend.quantization_config.bits"] == 4: |
|
return "GPTQ.4bit" |
|
elif x["config.backend.quantization_scheme"] == "awq" and x["config.backend.quantization_config.bits"] == 4: |
|
return "AWQ.4bit" |
|
else: |
|
return "Unquantized" |
|
|
|
|
|
def process_kernels(x): |
|
if x["config.backend.quantization_scheme"] == "gptq" and x["config.backend.quantization_config.version"] == 1: |
|
return "GPTQ.ExllamaV1" |
|
|
|
elif x["config.backend.quantization_scheme"] == "gptq" and x["config.backend.quantization_config.version"] == 2: |
|
return "GPTQ.ExllamaV2" |
|
elif ( |
|
x["config.backend.quantization_scheme"] == "awq" and x["config.backend.quantization_config.version"] == "gemm" |
|
): |
|
return "AWQ.GEMM" |
|
elif ( |
|
x["config.backend.quantization_scheme"] == "awq" and x["config.backend.quantization_config.version"] == "gemv" |
|
): |
|
return "AWQ.GEMV" |
|
else: |
|
return "No Kernel" |
|
|
|
|
|
def test(): |
|
model = "Qwen/Qwen1.5-32B" |
|
config = AutoConfig.from_pretrained(model, trust_remote_code=True) |
|
|
|
import pdb |
|
|
|
pdb.set_trace() |
|
|
|
|
|
if __name__ == "__main__": |
|
test() |
|
|