jondurbin commited on
Commit
0d253b4
1 Parent(s): 686771b

Upload LlamaForCausalLM

Browse files
config.json CHANGED
@@ -14,7 +14,7 @@
14
  "max_position_embeddings": 32768,
15
  "model_type": "llama",
16
  "num_attention_heads": 32,
17
- "num_hidden_layers": 33,
18
  "num_key_value_heads": 8,
19
  "pretraining_tp": 1,
20
  "rms_norm_eps": 1e-05,
 
14
  "max_position_embeddings": 32768,
15
  "model_type": "llama",
16
  "num_attention_heads": 32,
17
+ "num_hidden_layers": 32,
18
  "num_key_value_heads": 8,
19
  "pretraining_tp": 1,
20
  "rms_norm_eps": 1e-05,
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2e706e37a2f591daf1edfd5c19292ae308e03ccf1b6ff6762952d9cb043a98c3
3
  size 4943162336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b479a9aafb0fa6e3062c7d864a07628c99faf8e3e37cb9c0800330fa05a8ce3e
3
  size 4943162336
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5bfbd32f09b70eb83179885d37902ba9b927309127b3151584d47eef1b078a6d
3
  size 4999819336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79e06f9a6d24fecb328632236fec322a849bfb6419d40eb5405fdb327542d096
3
  size 4999819336
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bfaa98d1465405a58ab8b473bfb33603981f0c5af990fb33004d9e82bf076f4f
3
- size 4976741400
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5d8bd27dcb1dbaa61bdbfe5ad7f77fdd5427d875b76dee5caeafc679fad8753
3
+ size 4540516344
model.safetensors.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 14919688192
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00003-of-00003.safetensors",
@@ -239,15 +239,6 @@
239
  "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
240
  "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
241
  "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
242
- "model.layers.32.input_layernorm.weight": "model-00003-of-00003.safetensors",
243
- "model.layers.32.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
244
- "model.layers.32.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
245
- "model.layers.32.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
246
- "model.layers.32.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
247
- "model.layers.32.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
248
- "model.layers.32.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
249
- "model.layers.32.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
250
- "model.layers.32.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
251
  "model.layers.4.input_layernorm.weight": "model-00001-of-00003.safetensors",
252
  "model.layers.4.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
253
  "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_size": 14483464192
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00003-of-00003.safetensors",
 
239
  "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
240
  "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
241
  "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
 
 
 
 
 
 
 
 
 
242
  "model.layers.4.input_layernorm.weight": "model-00001-of-00003.safetensors",
243
  "model.layers.4.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
244
  "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",