twhoool02 commited on
Commit
35b5b0e
1 Parent(s): 2742689

Upload README.md with huggingface_hub

Browse files
Files changed (1) hide show
  1. README.md +1 -14
README.md CHANGED
@@ -8,20 +8,7 @@ tags:
8
  - llama-2
9
  - llama
10
  base_model: meta-llama/Llama-2-7b-hf
11
- model_name: "LlamaAWQForCausalLM(\n (model): LlamaForCausalLM(\n (model): LlamaLikeModel(\n\
12
- \ (embedding): Embedding(32000, 4096)\n (blocks): ModuleList(\n \
13
- \ (0-31): 32 x LlamaLikeBlock(\n (norm_1): FasterTransformerRMSNorm()\n\
14
- \ (attn): QuantAttentionFused(\n (qkv_proj): WQLinear_GEMM(in_features=4096,\
15
- \ out_features=12288, bias=False, w_bit=4, group_size=128)\n (o_proj):\
16
- \ WQLinear_GEMM(in_features=4096, out_features=4096, bias=False, w_bit=4, group_size=128)\n\
17
- \ (rope): RoPE()\n )\n (norm_2): FasterTransformerRMSNorm()\n\
18
- \ (mlp): LlamaMLP(\n (gate_proj): WQLinear_GEMM(in_features=4096,\
19
- \ out_features=11008, bias=False, w_bit=4, group_size=128)\n (up_proj):\
20
- \ WQLinear_GEMM(in_features=4096, out_features=11008, bias=False, w_bit=4, group_size=128)\n\
21
- \ (down_proj): WQLinear_GEMM(in_features=11008, out_features=4096, bias=False,\
22
- \ w_bit=4, group_size=128)\n (act_fn): SiLU()\n )\n )\n\
23
- \ )\n (norm): LlamaRMSNorm()\n )\n (lm_head): Linear(in_features=4096,\
24
- \ out_features=32000, bias=False)\n )\n)"
25
  library:
26
  - Transformers
27
  - AWQ
 
8
  - llama-2
9
  - llama
10
  base_model: meta-llama/Llama-2-7b-hf
11
+ model_name: Llama-2-7b-hf-AWQ
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  library:
13
  - Transformers
14
  - AWQ