jondurbin commited on
Commit
594bf24
1 Parent(s): 36c633b

Upload LlamaForCausalLM

Browse files
config.json CHANGED
@@ -14,15 +14,16 @@
14
  "max_position_embeddings": 32768,
15
  "model_type": "llama",
16
  "num_attention_heads": 32,
17
- "num_hidden_layers": 33,
18
  "num_key_value_heads": 8,
19
  "pretraining_tp": 1,
20
  "rms_norm_eps": 1e-05,
21
  "rope_scaling": null,
22
  "rope_theta": 10000.0,
 
23
  "tie_word_embeddings": false,
24
  "torch_dtype": "bfloat16",
25
  "transformers_version": "4.37.2",
26
- "use_cache": false,
27
  "vocab_size": 32000
28
  }
 
14
  "max_position_embeddings": 32768,
15
  "model_type": "llama",
16
  "num_attention_heads": 32,
17
+ "num_hidden_layers": 32,
18
  "num_key_value_heads": 8,
19
  "pretraining_tp": 1,
20
  "rms_norm_eps": 1e-05,
21
  "rope_scaling": null,
22
  "rope_theta": 10000.0,
23
+ "sliding_window": 4096,
24
  "tie_word_embeddings": false,
25
  "torch_dtype": "bfloat16",
26
  "transformers_version": "4.37.2",
27
+ "use_cache": true,
28
  "vocab_size": 32000
29
  }
generation_config.json CHANGED
@@ -2,6 +2,5 @@
2
  "_from_model_config": true,
3
  "bos_token_id": 1,
4
  "eos_token_id": 2,
5
- "transformers_version": "4.37.2",
6
- "use_cache": false
7
  }
 
2
  "_from_model_config": true,
3
  "bos_token_id": 1,
4
  "eos_token_id": 2,
5
+ "transformers_version": "4.37.2"
 
6
  }
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fd112b457029a18fa6a7830ecd99cd3e0b4f8637f698761ca2d1dadb08ec11d1
3
  size 4943162336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e2da0ebde768bac242f7cbf832e1a248519d1c1d26a1d7355465b1648c39757
3
  size 4943162336
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b8d2b624b3a983c5055e015181aa58ef01242bffaa1eed36b2bfab4b5399130e
3
  size 4999819336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a591cb9adbcfac23f8a7e3832ed7afdce769e6f45607f7c69232b92aa70e8ab9
3
  size 4999819336
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:60238df99dc2ba1f44c32ec2e57c265596ef41e23bf19f60f67c9e05cee81330
3
- size 4976741400
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9b905956328ae22115e71e19f8d84e182d46a20f280321d19ee56d4ed540831
3
+ size 4540516344
model.safetensors.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 14919688192
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00003-of-00003.safetensors",
@@ -239,15 +239,6 @@
239
  "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
240
  "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
241
  "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
242
- "model.layers.32.input_layernorm.weight": "model-00003-of-00003.safetensors",
243
- "model.layers.32.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
244
- "model.layers.32.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
245
- "model.layers.32.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
246
- "model.layers.32.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
247
- "model.layers.32.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
248
- "model.layers.32.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
249
- "model.layers.32.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
250
- "model.layers.32.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
251
  "model.layers.4.input_layernorm.weight": "model-00001-of-00003.safetensors",
252
  "model.layers.4.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
253
  "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_size": 14483464192
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00003-of-00003.safetensors",
 
239
  "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
240
  "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
241
  "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
 
 
 
 
 
 
 
 
 
242
  "model.layers.4.input_layernorm.weight": "model-00001-of-00003.safetensors",
243
  "model.layers.4.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
244
  "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",