jondurbin commited on
Commit
48c76df
1 Parent(s): bf0fdc7

Upload LlamaForCausalLM

Browse files
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "/workspace/toupload",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
@@ -14,7 +14,7 @@
14
  "max_position_embeddings": 32768,
15
  "model_type": "llama",
16
  "num_attention_heads": 32,
17
- "num_hidden_layers": 32,
18
  "num_key_value_heads": 8,
19
  "pretraining_tp": 1,
20
  "rms_norm_eps": 1e-05,
@@ -22,7 +22,7 @@
22
  "rope_theta": 10000.0,
23
  "tie_word_embeddings": false,
24
  "torch_dtype": "bfloat16",
25
- "transformers_version": "4.36.2",
26
- "use_cache": true,
27
  "vocab_size": 32000
28
  }
 
1
  {
2
+ "_name_or_path": "/workspace/weeeeee.1",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
 
14
  "max_position_embeddings": 32768,
15
  "model_type": "llama",
16
  "num_attention_heads": 32,
17
+ "num_hidden_layers": 33,
18
  "num_key_value_heads": 8,
19
  "pretraining_tp": 1,
20
  "rms_norm_eps": 1e-05,
 
22
  "rope_theta": 10000.0,
23
  "tie_word_embeddings": false,
24
  "torch_dtype": "bfloat16",
25
+ "transformers_version": "4.37.2",
26
+ "use_cache": false,
27
  "vocab_size": 32000
28
  }
generation_config.json CHANGED
@@ -2,6 +2,6 @@
2
  "_from_model_config": true,
3
  "bos_token_id": 1,
4
  "eos_token_id": 2,
5
- "transformers_version": "4.36.2",
6
  "use_cache": false
7
  }
 
2
  "_from_model_config": true,
3
  "bos_token_id": 1,
4
  "eos_token_id": 2,
5
+ "transformers_version": "4.37.2",
6
  "use_cache": false
7
  }
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2040625673494ce165d11e609379da33b6d7475c2f76eb16e5ffc5a3bf7ae3d4
3
  size 4943162336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e706e37a2f591daf1edfd5c19292ae308e03ccf1b6ff6762952d9cb043a98c3
3
  size 4943162336
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:96f4eb4e925aca6629287184da8049fdec2cc3138aaf5d28206f7ab7c09dc94e
3
  size 4999819336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5bfbd32f09b70eb83179885d37902ba9b927309127b3151584d47eef1b078a6d
3
  size 4999819336
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b48d2e9efd7770deec1b86fcff990a5b90365af562b64001882a0a75f253e2eb
3
- size 4540516344
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfaa98d1465405a58ab8b473bfb33603981f0c5af990fb33004d9e82bf076f4f
3
+ size 4976741400
model.safetensors.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 14483464192
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00003-of-00003.safetensors",
@@ -239,6 +239,15 @@
239
  "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
240
  "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
241
  "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
 
 
 
 
 
 
 
 
 
242
  "model.layers.4.input_layernorm.weight": "model-00001-of-00003.safetensors",
243
  "model.layers.4.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
244
  "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_size": 14919688192
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00003-of-00003.safetensors",
 
239
  "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
240
  "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
241
  "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
242
+ "model.layers.32.input_layernorm.weight": "model-00003-of-00003.safetensors",
243
+ "model.layers.32.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
244
+ "model.layers.32.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
245
+ "model.layers.32.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
246
+ "model.layers.32.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
247
+ "model.layers.32.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
248
+ "model.layers.32.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
249
+ "model.layers.32.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
250
+ "model.layers.32.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
251
  "model.layers.4.input_layernorm.weight": "model-00001-of-00003.safetensors",
252
  "model.layers.4.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
253
  "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",