Upload README.md with huggingface_hub
Browse files
README.md
CHANGED
@@ -8,20 +8,7 @@ tags:
|
|
8 |
- llama-2
|
9 |
- llama
|
10 |
base_model: meta-llama/Llama-2-7b-hf
|
11 |
-
model_name:
|
12 |
-
\ (embedding): Embedding(32000, 4096)\n (blocks): ModuleList(\n \
|
13 |
-
\ (0-31): 32 x LlamaLikeBlock(\n (norm_1): FasterTransformerRMSNorm()\n\
|
14 |
-
\ (attn): QuantAttentionFused(\n (qkv_proj): WQLinear_GEMM(in_features=4096,\
|
15 |
-
\ out_features=12288, bias=False, w_bit=4, group_size=128)\n (o_proj):\
|
16 |
-
\ WQLinear_GEMM(in_features=4096, out_features=4096, bias=False, w_bit=4, group_size=128)\n\
|
17 |
-
\ (rope): RoPE()\n )\n (norm_2): FasterTransformerRMSNorm()\n\
|
18 |
-
\ (mlp): LlamaMLP(\n (gate_proj): WQLinear_GEMM(in_features=4096,\
|
19 |
-
\ out_features=11008, bias=False, w_bit=4, group_size=128)\n (up_proj):\
|
20 |
-
\ WQLinear_GEMM(in_features=4096, out_features=11008, bias=False, w_bit=4, group_size=128)\n\
|
21 |
-
\ (down_proj): WQLinear_GEMM(in_features=11008, out_features=4096, bias=False,\
|
22 |
-
\ w_bit=4, group_size=128)\n (act_fn): SiLU()\n )\n )\n\
|
23 |
-
\ )\n (norm): LlamaRMSNorm()\n )\n (lm_head): Linear(in_features=4096,\
|
24 |
-
\ out_features=32000, bias=False)\n )\n)"
|
25 |
library:
|
26 |
- Transformers
|
27 |
- AWQ
|
|
|
8 |
- llama-2
|
9 |
- llama
|
10 |
base_model: meta-llama/Llama-2-7b-hf
|
11 |
+
model_name: Llama-2-7b-hf-AWQ
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
library:
|
13 |
- Transformers
|
14 |
- AWQ
|