Text Generation
Transformers
PyTorch
Safetensors
English
llama
finance
text-generation-inference
Inference Endpoints
AdaptLLM commited on
Commit
4e7dc56
1 Parent(s): 05e98aa

AdaptLLM-fin-v0

Browse files
config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LLaMAForCausalLM"
4
+ ],
5
+ "bos_token_id": 0,
6
+ "eos_token_id": 1,
7
+ "hidden_act": "silu",
8
+ "hidden_size": 4096,
9
+ "intermediate_size": 11008,
10
+ "initializer_range": 0.02,
11
+ "max_sequence_length": 2048,
12
+ "model_type": "llama",
13
+ "num_attention_heads": 32,
14
+ "num_hidden_layers": 32,
15
+ "pad_token_id": 32000,
16
+ "rms_norm_eps": 1e-06,
17
+ "torch_dtype": "float16",
18
+ "transformers_version": "4.27.0.dev0",
19
+ "use_cache": true,
20
+ "vocab_size": 32001
21
+ }
generation_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"_from_model_config": true, "bos_token_id": 0, "eos_token_id": 1, "pad_token_id": 0, "transformers_version": "4.27.0.dev0"}
pytorch_model-00001-of-00033.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f497287f6d431d1c72c5a5e3d1f3d1b82182a8b8d561e4f43463af5ed8802aa
3
+ size 809520963
pytorch_model-00002-of-00033.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53aabd332380d30623bae7bc9089d35d252cce61ce55b6f98ca833045959058f
3
+ size 809520963
pytorch_model-00003-of-00033.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e5602a29de104aff5b4c7adf77af212ee22c00abaa89fc1e46b62182f374f66
3
+ size 809520963
pytorch_model-00004-of-00033.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb7eb657bffb1b1f8a598143dbbe9e805e787f5bf95e1bf2b6b136346c880d4d
3
+ size 809520963
pytorch_model-00005-of-00033.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13a06dfbb61c8b157f81cfb5dc82236342969d1b44de53133a258e6f7e2ada78
3
+ size 809520963
pytorch_model-00006-of-00033.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2c7c2d0a137fa9f8b73d370af959ae1ff02683b142ec97f74dacba0eeb4bf46
3
+ size 809520963
pytorch_model-00007-of-00033.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93630213b38a35fd418a248e7a3ff93e09b7d64eaa455b6796d80b130e1f3e0f
3
+ size 809520963
pytorch_model-00008-of-00033.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6fb4b9a03a5fd266571ab04de70a990931fcb2aedfd6e22bf8012b529774a07
3
+ size 809520963
pytorch_model-00009-of-00033.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5468c3de55161346f83b1d8a4e63764520f167a036d99f99acd998d3a4aa1a0d
3
+ size 809520963
pytorch_model-00010-of-00033.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76b094949c8d581b7f7cd2c6cd430a479ca3a2b6fb07ca1f9348cd9e45eaec5b
3
+ size 809520963
pytorch_model-00011-of-00033.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:310f296ad260bd01ce5124430a7edd73002a313729d15f95cfd2bf64290b003e
3
+ size 809520963
pytorch_model-00012-of-00033.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a4792ed0c72509c15d333022b83be14589f0cb8b41f2a6e3730a3ac5246ce7b
3
+ size 809520963
pytorch_model-00013-of-00033.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90821af788fea488ab2af37f4a7608f4a4daf12615169919955353d612ae9721
3
+ size 809520963
pytorch_model-00014-of-00033.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4132626256e328619132b2f6228748ba4d33fa7a30c0bb0b4ba9b85eb3250af
3
+ size 809520963
pytorch_model-00015-of-00033.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa0d079951f5684179147af0141d44880f1ead168593b8081b83b0cb94cf4b25
3
+ size 809520963
pytorch_model-00016-of-00033.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45ee7a48b4193bb2d1379076daf5901da1e8f27edeb008b2c3c310da4d68d0a3
3
+ size 809520963
pytorch_model-00017-of-00033.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03ecb710daf7a1d4fe57dd7e2db5cc7bc153c38b7c2c06fba9e24a32ff18502a
3
+ size 809520963
pytorch_model-00018-of-00033.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97d6eddecd48e542fd5a0c13b4249fda6ffcc1fd3ed6ac5709ad676ddf0f81a8
3
+ size 809520963
pytorch_model-00019-of-00033.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59a6846610a343e97cf7cb35310d40f015ceeff8f758be989ef0c6bddcef4678
3
+ size 809520963
pytorch_model-00020-of-00033.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16610b735ad1487465331c2fb59df272ba54f648edf6b15e767b09c2d5f62d06
3
+ size 809520963
pytorch_model-00021-of-00033.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5777cfd2fe8474ba80b1990250adf7d051437df18d15fc4e588d696578e30b34
3
+ size 809520963
pytorch_model-00022-of-00033.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:358afb41d4fe2d38a70b7f635815e29a4fea49331ba53c18d9fdf5d416ac24b5
3
+ size 809520963
pytorch_model-00023-of-00033.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc563420e8b1b6ec97cb09991a6586bd014158f6a67bd1ea7088a5ba0456c712
3
+ size 809520963
pytorch_model-00024-of-00033.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:762d7cc907c0c72a82f477468e6a23bd498c7363b9a5aa3bfb52c51d4913472a
3
+ size 809520963
pytorch_model-00025-of-00033.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff9a57224ef95e3764b6b8c9108aaa84080838eca338228065ab29ea3ba9c7fd
3
+ size 809520963
pytorch_model-00026-of-00033.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8116bfccc6972fd390993a9230c689d6274310b75536fd2a2d3f7be01c63c074
3
+ size 809520963
pytorch_model-00027-of-00033.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1db59894aa7ee782f06043998a00389f3dba6c163c91be8e877b5c07749016c7
3
+ size 809520963
pytorch_model-00028-of-00033.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:983be7ab1625b1d6f65b3fa2d2911a2da9f30527e2f17c4a12c575393aefa8c8
3
+ size 809520963
pytorch_model-00029-of-00033.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:135d40ca39798c4f84c7bd0ebeb9f4da36eca4f5f71987c6632985198de21897
3
+ size 809520963
pytorch_model-00030-of-00033.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7b7b252d87399647a0e5b96355a1453157d6feda4e54b33d108383f52f3cd85
3
+ size 809520963
pytorch_model-00031-of-00033.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9309b218cc2fcbdc3d31979c597b44320efa1f8f95b9b6e8b162bb3674d96a37
3
+ size 809520963
pytorch_model-00032-of-00033.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2013662b1238417895d1a32023a207c7fb72ff92918072590cf8fc0157b5f6c1
3
+ size 809520963
pytorch_model-00033-of-00033.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4b50941e07161050d7c8c090d0ec91f939010af3b5f1b446821c6297a111493
3
+ size 524314060
pytorch_model.bin.index.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"weight_map": {"model.layers.0.self_attn.q_proj.weight": "pytorch_model-00001-of-00033.bin", "model.layers.0.self_attn.k_proj.weight": "pytorch_model-00001-of-00033.bin", "model.layers.0.self_attn.v_proj.weight": "pytorch_model-00001-of-00033.bin", "model.layers.0.self_attn.o_proj.weight": "pytorch_model-00001-of-00033.bin", "model.layers.0.mlp.gate_proj.weight": "pytorch_model-00001-of-00033.bin", "model.layers.0.mlp.down_proj.weight": "pytorch_model-00001-of-00033.bin", "model.layers.0.mlp.up_proj.weight": "pytorch_model-00001-of-00033.bin", "model.layers.0.input_layernorm.weight": "pytorch_model-00001-of-00033.bin", "model.layers.0.post_attention_layernorm.weight": "pytorch_model-00001-of-00033.bin", "model.layers.0.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00033.bin", "model.layers.1.self_attn.q_proj.weight": "pytorch_model-00002-of-00033.bin", "model.layers.1.self_attn.k_proj.weight": "pytorch_model-00002-of-00033.bin", "model.layers.1.self_attn.v_proj.weight": "pytorch_model-00002-of-00033.bin", "model.layers.1.self_attn.o_proj.weight": "pytorch_model-00002-of-00033.bin", "model.layers.1.mlp.gate_proj.weight": "pytorch_model-00002-of-00033.bin", "model.layers.1.mlp.down_proj.weight": "pytorch_model-00002-of-00033.bin", "model.layers.1.mlp.up_proj.weight": "pytorch_model-00002-of-00033.bin", "model.layers.1.input_layernorm.weight": "pytorch_model-00002-of-00033.bin", "model.layers.1.post_attention_layernorm.weight": "pytorch_model-00002-of-00033.bin", "model.layers.1.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00033.bin", "model.layers.2.self_attn.q_proj.weight": "pytorch_model-00003-of-00033.bin", "model.layers.2.self_attn.k_proj.weight": "pytorch_model-00003-of-00033.bin", "model.layers.2.self_attn.v_proj.weight": "pytorch_model-00003-of-00033.bin", "model.layers.2.self_attn.o_proj.weight": "pytorch_model-00003-of-00033.bin", "model.layers.2.mlp.gate_proj.weight": "pytorch_model-00003-of-00033.bin", "model.layers.2.mlp.down_proj.weight": "pytorch_model-00003-of-00033.bin", "model.layers.2.mlp.up_proj.weight": "pytorch_model-00003-of-00033.bin", "model.layers.2.input_layernorm.weight": "pytorch_model-00003-of-00033.bin", "model.layers.2.post_attention_layernorm.weight": "pytorch_model-00003-of-00033.bin", "model.layers.2.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00033.bin", "model.layers.3.self_attn.q_proj.weight": "pytorch_model-00004-of-00033.bin", "model.layers.3.self_attn.k_proj.weight": "pytorch_model-00004-of-00033.bin", "model.layers.3.self_attn.v_proj.weight": "pytorch_model-00004-of-00033.bin", "model.layers.3.self_attn.o_proj.weight": "pytorch_model-00004-of-00033.bin", "model.layers.3.mlp.gate_proj.weight": "pytorch_model-00004-of-00033.bin", "model.layers.3.mlp.down_proj.weight": "pytorch_model-00004-of-00033.bin", "model.layers.3.mlp.up_proj.weight": "pytorch_model-00004-of-00033.bin", "model.layers.3.input_layernorm.weight": "pytorch_model-00004-of-00033.bin", "model.layers.3.post_attention_layernorm.weight": "pytorch_model-00004-of-00033.bin", "model.layers.3.self_attn.rotary_emb.inv_freq": "pytorch_model-00004-of-00033.bin", "model.layers.4.self_attn.q_proj.weight": "pytorch_model-00005-of-00033.bin", "model.layers.4.self_attn.k_proj.weight": "pytorch_model-00005-of-00033.bin", "model.layers.4.self_attn.v_proj.weight": "pytorch_model-00005-of-00033.bin", "model.layers.4.self_attn.o_proj.weight": "pytorch_model-00005-of-00033.bin", "model.layers.4.mlp.gate_proj.weight": "pytorch_model-00005-of-00033.bin", "model.layers.4.mlp.down_proj.weight": "pytorch_model-00005-of-00033.bin", "model.layers.4.mlp.up_proj.weight": "pytorch_model-00005-of-00033.bin", "model.layers.4.input_layernorm.weight": "pytorch_model-00005-of-00033.bin", "model.layers.4.post_attention_layernorm.weight": "pytorch_model-00005-of-00033.bin", "model.layers.4.self_attn.rotary_emb.inv_freq": "pytorch_model-00005-of-00033.bin", "model.layers.5.self_attn.q_proj.weight": "pytorch_model-00006-of-00033.bin", "model.layers.5.self_attn.k_proj.weight": "pytorch_model-00006-of-00033.bin", "model.layers.5.self_attn.v_proj.weight": "pytorch_model-00006-of-00033.bin", "model.layers.5.self_attn.o_proj.weight": "pytorch_model-00006-of-00033.bin", "model.layers.5.mlp.gate_proj.weight": "pytorch_model-00006-of-00033.bin", "model.layers.5.mlp.down_proj.weight": "pytorch_model-00006-of-00033.bin", "model.layers.5.mlp.up_proj.weight": "pytorch_model-00006-of-00033.bin", "model.layers.5.input_layernorm.weight": "pytorch_model-00006-of-00033.bin", "model.layers.5.post_attention_layernorm.weight": "pytorch_model-00006-of-00033.bin", "model.layers.5.self_attn.rotary_emb.inv_freq": "pytorch_model-00006-of-00033.bin", "model.layers.6.self_attn.q_proj.weight": "pytorch_model-00007-of-00033.bin", "model.layers.6.self_attn.k_proj.weight": "pytorch_model-00007-of-00033.bin", "model.layers.6.self_attn.v_proj.weight": "pytorch_model-00007-of-00033.bin", "model.layers.6.self_attn.o_proj.weight": "pytorch_model-00007-of-00033.bin", "model.layers.6.mlp.gate_proj.weight": "pytorch_model-00007-of-00033.bin", "model.layers.6.mlp.down_proj.weight": "pytorch_model-00007-of-00033.bin", "model.layers.6.mlp.up_proj.weight": "pytorch_model-00007-of-00033.bin", "model.layers.6.input_layernorm.weight": "pytorch_model-00007-of-00033.bin", "model.layers.6.post_attention_layernorm.weight": "pytorch_model-00007-of-00033.bin", "model.layers.6.self_attn.rotary_emb.inv_freq": "pytorch_model-00007-of-00033.bin", "model.layers.7.self_attn.q_proj.weight": "pytorch_model-00008-of-00033.bin", "model.layers.7.self_attn.k_proj.weight": "pytorch_model-00008-of-00033.bin", "model.layers.7.self_attn.v_proj.weight": "pytorch_model-00008-of-00033.bin", "model.layers.7.self_attn.o_proj.weight": "pytorch_model-00008-of-00033.bin", "model.layers.7.mlp.gate_proj.weight": "pytorch_model-00008-of-00033.bin", "model.layers.7.mlp.down_proj.weight": "pytorch_model-00008-of-00033.bin", "model.layers.7.mlp.up_proj.weight": "pytorch_model-00008-of-00033.bin", "model.layers.7.input_layernorm.weight": "pytorch_model-00008-of-00033.bin", "model.layers.7.post_attention_layernorm.weight": "pytorch_model-00008-of-00033.bin", "model.layers.7.self_attn.rotary_emb.inv_freq": "pytorch_model-00008-of-00033.bin", "model.layers.8.self_attn.q_proj.weight": "pytorch_model-00009-of-00033.bin", "model.layers.8.self_attn.k_proj.weight": "pytorch_model-00009-of-00033.bin", "model.layers.8.self_attn.v_proj.weight": "pytorch_model-00009-of-00033.bin", "model.layers.8.self_attn.o_proj.weight": "pytorch_model-00009-of-00033.bin", "model.layers.8.mlp.gate_proj.weight": "pytorch_model-00009-of-00033.bin", "model.layers.8.mlp.down_proj.weight": "pytorch_model-00009-of-00033.bin", "model.layers.8.mlp.up_proj.weight": "pytorch_model-00009-of-00033.bin", "model.layers.8.input_layernorm.weight": "pytorch_model-00009-of-00033.bin", "model.layers.8.post_attention_layernorm.weight": "pytorch_model-00009-of-00033.bin", "model.layers.8.self_attn.rotary_emb.inv_freq": "pytorch_model-00009-of-00033.bin", "model.layers.9.self_attn.q_proj.weight": "pytorch_model-00010-of-00033.bin", "model.layers.9.self_attn.k_proj.weight": "pytorch_model-00010-of-00033.bin", "model.layers.9.self_attn.v_proj.weight": "pytorch_model-00010-of-00033.bin", "model.layers.9.self_attn.o_proj.weight": "pytorch_model-00010-of-00033.bin", "model.layers.9.mlp.gate_proj.weight": "pytorch_model-00010-of-00033.bin", "model.layers.9.mlp.down_proj.weight": "pytorch_model-00010-of-00033.bin", "model.layers.9.mlp.up_proj.weight": "pytorch_model-00010-of-00033.bin", "model.layers.9.input_layernorm.weight": "pytorch_model-00010-of-00033.bin", "model.layers.9.post_attention_layernorm.weight": "pytorch_model-00010-of-00033.bin", "model.layers.9.self_attn.rotary_emb.inv_freq": "pytorch_model-00010-of-00033.bin", "model.layers.10.self_attn.q_proj.weight": "pytorch_model-00011-of-00033.bin", "model.layers.10.self_attn.k_proj.weight": "pytorch_model-00011-of-00033.bin", "model.layers.10.self_attn.v_proj.weight": "pytorch_model-00011-of-00033.bin", "model.layers.10.self_attn.o_proj.weight": "pytorch_model-00011-of-00033.bin", "model.layers.10.mlp.gate_proj.weight": "pytorch_model-00011-of-00033.bin", "model.layers.10.mlp.down_proj.weight": "pytorch_model-00011-of-00033.bin", "model.layers.10.mlp.up_proj.weight": "pytorch_model-00011-of-00033.bin", "model.layers.10.input_layernorm.weight": "pytorch_model-00011-of-00033.bin", "model.layers.10.post_attention_layernorm.weight": "pytorch_model-00011-of-00033.bin", "model.layers.10.self_attn.rotary_emb.inv_freq": "pytorch_model-00011-of-00033.bin", "model.layers.11.self_attn.q_proj.weight": "pytorch_model-00012-of-00033.bin", "model.layers.11.self_attn.k_proj.weight": "pytorch_model-00012-of-00033.bin", "model.layers.11.self_attn.v_proj.weight": "pytorch_model-00012-of-00033.bin", "model.layers.11.self_attn.o_proj.weight": "pytorch_model-00012-of-00033.bin", "model.layers.11.mlp.gate_proj.weight": "pytorch_model-00012-of-00033.bin", "model.layers.11.mlp.down_proj.weight": "pytorch_model-00012-of-00033.bin", "model.layers.11.mlp.up_proj.weight": "pytorch_model-00012-of-00033.bin", "model.layers.11.input_layernorm.weight": "pytorch_model-00012-of-00033.bin", "model.layers.11.post_attention_layernorm.weight": "pytorch_model-00012-of-00033.bin", "model.layers.11.self_attn.rotary_emb.inv_freq": "pytorch_model-00012-of-00033.bin", "model.layers.12.self_attn.q_proj.weight": "pytorch_model-00013-of-00033.bin", "model.layers.12.self_attn.k_proj.weight": "pytorch_model-00013-of-00033.bin", "model.layers.12.self_attn.v_proj.weight": "pytorch_model-00013-of-00033.bin", "model.layers.12.self_attn.o_proj.weight": "pytorch_model-00013-of-00033.bin", "model.layers.12.mlp.gate_proj.weight": "pytorch_model-00013-of-00033.bin", "model.layers.12.mlp.down_proj.weight": "pytorch_model-00013-of-00033.bin", "model.layers.12.mlp.up_proj.weight": "pytorch_model-00013-of-00033.bin", "model.layers.12.input_layernorm.weight": "pytorch_model-00013-of-00033.bin", "model.layers.12.post_attention_layernorm.weight": "pytorch_model-00013-of-00033.bin", "model.layers.12.self_attn.rotary_emb.inv_freq": "pytorch_model-00013-of-00033.bin", "model.layers.13.self_attn.q_proj.weight": "pytorch_model-00014-of-00033.bin", "model.layers.13.self_attn.k_proj.weight": "pytorch_model-00014-of-00033.bin", "model.layers.13.self_attn.v_proj.weight": "pytorch_model-00014-of-00033.bin", "model.layers.13.self_attn.o_proj.weight": "pytorch_model-00014-of-00033.bin", "model.layers.13.mlp.gate_proj.weight": "pytorch_model-00014-of-00033.bin", "model.layers.13.mlp.down_proj.weight": "pytorch_model-00014-of-00033.bin", "model.layers.13.mlp.up_proj.weight": "pytorch_model-00014-of-00033.bin", "model.layers.13.input_layernorm.weight": "pytorch_model-00014-of-00033.bin", "model.layers.13.post_attention_layernorm.weight": "pytorch_model-00014-of-00033.bin", "model.layers.13.self_attn.rotary_emb.inv_freq": "pytorch_model-00014-of-00033.bin", "model.layers.14.self_attn.q_proj.weight": "pytorch_model-00015-of-00033.bin", "model.layers.14.self_attn.k_proj.weight": "pytorch_model-00015-of-00033.bin", "model.layers.14.self_attn.v_proj.weight": "pytorch_model-00015-of-00033.bin", "model.layers.14.self_attn.o_proj.weight": "pytorch_model-00015-of-00033.bin", "model.layers.14.mlp.gate_proj.weight": "pytorch_model-00015-of-00033.bin", "model.layers.14.mlp.down_proj.weight": "pytorch_model-00015-of-00033.bin", "model.layers.14.mlp.up_proj.weight": "pytorch_model-00015-of-00033.bin", "model.layers.14.input_layernorm.weight": "pytorch_model-00015-of-00033.bin", "model.layers.14.post_attention_layernorm.weight": "pytorch_model-00015-of-00033.bin", "model.layers.14.self_attn.rotary_emb.inv_freq": "pytorch_model-00015-of-00033.bin", "model.layers.15.self_attn.q_proj.weight": "pytorch_model-00016-of-00033.bin", "model.layers.15.self_attn.k_proj.weight": "pytorch_model-00016-of-00033.bin", "model.layers.15.self_attn.v_proj.weight": "pytorch_model-00016-of-00033.bin", "model.layers.15.self_attn.o_proj.weight": "pytorch_model-00016-of-00033.bin", "model.layers.15.mlp.gate_proj.weight": "pytorch_model-00016-of-00033.bin", "model.layers.15.mlp.down_proj.weight": "pytorch_model-00016-of-00033.bin", "model.layers.15.mlp.up_proj.weight": "pytorch_model-00016-of-00033.bin", "model.layers.15.input_layernorm.weight": "pytorch_model-00016-of-00033.bin", "model.layers.15.post_attention_layernorm.weight": "pytorch_model-00016-of-00033.bin", "model.layers.15.self_attn.rotary_emb.inv_freq": "pytorch_model-00016-of-00033.bin", "model.layers.16.self_attn.q_proj.weight": "pytorch_model-00017-of-00033.bin", "model.layers.16.self_attn.k_proj.weight": "pytorch_model-00017-of-00033.bin", "model.layers.16.self_attn.v_proj.weight": "pytorch_model-00017-of-00033.bin", "model.layers.16.self_attn.o_proj.weight": "pytorch_model-00017-of-00033.bin", "model.layers.16.mlp.gate_proj.weight": "pytorch_model-00017-of-00033.bin", "model.layers.16.mlp.down_proj.weight": "pytorch_model-00017-of-00033.bin", "model.layers.16.mlp.up_proj.weight": "pytorch_model-00017-of-00033.bin", "model.layers.16.input_layernorm.weight": "pytorch_model-00017-of-00033.bin", "model.layers.16.post_attention_layernorm.weight": "pytorch_model-00017-of-00033.bin", "model.layers.16.self_attn.rotary_emb.inv_freq": "pytorch_model-00017-of-00033.bin", "model.layers.17.self_attn.q_proj.weight": "pytorch_model-00018-of-00033.bin", "model.layers.17.self_attn.k_proj.weight": "pytorch_model-00018-of-00033.bin", "model.layers.17.self_attn.v_proj.weight": "pytorch_model-00018-of-00033.bin", "model.layers.17.self_attn.o_proj.weight": "pytorch_model-00018-of-00033.bin", "model.layers.17.mlp.gate_proj.weight": "pytorch_model-00018-of-00033.bin", "model.layers.17.mlp.down_proj.weight": "pytorch_model-00018-of-00033.bin", "model.layers.17.mlp.up_proj.weight": "pytorch_model-00018-of-00033.bin", "model.layers.17.input_layernorm.weight": "pytorch_model-00018-of-00033.bin", "model.layers.17.post_attention_layernorm.weight": "pytorch_model-00018-of-00033.bin", "model.layers.17.self_attn.rotary_emb.inv_freq": "pytorch_model-00018-of-00033.bin", "model.layers.18.self_attn.q_proj.weight": "pytorch_model-00019-of-00033.bin", "model.layers.18.self_attn.k_proj.weight": "pytorch_model-00019-of-00033.bin", "model.layers.18.self_attn.v_proj.weight": "pytorch_model-00019-of-00033.bin", "model.layers.18.self_attn.o_proj.weight": "pytorch_model-00019-of-00033.bin", "model.layers.18.mlp.gate_proj.weight": "pytorch_model-00019-of-00033.bin", "model.layers.18.mlp.down_proj.weight": "pytorch_model-00019-of-00033.bin", "model.layers.18.mlp.up_proj.weight": "pytorch_model-00019-of-00033.bin", "model.layers.18.input_layernorm.weight": "pytorch_model-00019-of-00033.bin", "model.layers.18.post_attention_layernorm.weight": "pytorch_model-00019-of-00033.bin", "model.layers.18.self_attn.rotary_emb.inv_freq": "pytorch_model-00019-of-00033.bin", "model.layers.19.self_attn.q_proj.weight": "pytorch_model-00020-of-00033.bin", "model.layers.19.self_attn.k_proj.weight": "pytorch_model-00020-of-00033.bin", "model.layers.19.self_attn.v_proj.weight": "pytorch_model-00020-of-00033.bin", "model.layers.19.self_attn.o_proj.weight": "pytorch_model-00020-of-00033.bin", "model.layers.19.mlp.gate_proj.weight": "pytorch_model-00020-of-00033.bin", "model.layers.19.mlp.down_proj.weight": "pytorch_model-00020-of-00033.bin", "model.layers.19.mlp.up_proj.weight": "pytorch_model-00020-of-00033.bin", "model.layers.19.input_layernorm.weight": "pytorch_model-00020-of-00033.bin", "model.layers.19.post_attention_layernorm.weight": "pytorch_model-00020-of-00033.bin", "model.layers.19.self_attn.rotary_emb.inv_freq": "pytorch_model-00020-of-00033.bin", "model.layers.20.self_attn.q_proj.weight": "pytorch_model-00021-of-00033.bin", "model.layers.20.self_attn.k_proj.weight": "pytorch_model-00021-of-00033.bin", "model.layers.20.self_attn.v_proj.weight": "pytorch_model-00021-of-00033.bin", "model.layers.20.self_attn.o_proj.weight": "pytorch_model-00021-of-00033.bin", "model.layers.20.mlp.gate_proj.weight": "pytorch_model-00021-of-00033.bin", "model.layers.20.mlp.down_proj.weight": "pytorch_model-00021-of-00033.bin", "model.layers.20.mlp.up_proj.weight": "pytorch_model-00021-of-00033.bin", "model.layers.20.input_layernorm.weight": "pytorch_model-00021-of-00033.bin", "model.layers.20.post_attention_layernorm.weight": "pytorch_model-00021-of-00033.bin", "model.layers.20.self_attn.rotary_emb.inv_freq": "pytorch_model-00021-of-00033.bin", "model.layers.21.self_attn.q_proj.weight": "pytorch_model-00022-of-00033.bin", "model.layers.21.self_attn.k_proj.weight": "pytorch_model-00022-of-00033.bin", "model.layers.21.self_attn.v_proj.weight": "pytorch_model-00022-of-00033.bin", "model.layers.21.self_attn.o_proj.weight": "pytorch_model-00022-of-00033.bin", "model.layers.21.mlp.gate_proj.weight": "pytorch_model-00022-of-00033.bin", "model.layers.21.mlp.down_proj.weight": "pytorch_model-00022-of-00033.bin", "model.layers.21.mlp.up_proj.weight": "pytorch_model-00022-of-00033.bin", "model.layers.21.input_layernorm.weight": "pytorch_model-00022-of-00033.bin", "model.layers.21.post_attention_layernorm.weight": "pytorch_model-00022-of-00033.bin", "model.layers.21.self_attn.rotary_emb.inv_freq": "pytorch_model-00022-of-00033.bin", "model.layers.22.self_attn.q_proj.weight": "pytorch_model-00023-of-00033.bin", "model.layers.22.self_attn.k_proj.weight": "pytorch_model-00023-of-00033.bin", "model.layers.22.self_attn.v_proj.weight": "pytorch_model-00023-of-00033.bin", "model.layers.22.self_attn.o_proj.weight": "pytorch_model-00023-of-00033.bin", "model.layers.22.mlp.gate_proj.weight": "pytorch_model-00023-of-00033.bin", "model.layers.22.mlp.down_proj.weight": "pytorch_model-00023-of-00033.bin", "model.layers.22.mlp.up_proj.weight": "pytorch_model-00023-of-00033.bin", "model.layers.22.input_layernorm.weight": "pytorch_model-00023-of-00033.bin", "model.layers.22.post_attention_layernorm.weight": "pytorch_model-00023-of-00033.bin", "model.layers.22.self_attn.rotary_emb.inv_freq": "pytorch_model-00023-of-00033.bin", "model.layers.23.self_attn.q_proj.weight": "pytorch_model-00024-of-00033.bin", "model.layers.23.self_attn.k_proj.weight": "pytorch_model-00024-of-00033.bin", "model.layers.23.self_attn.v_proj.weight": "pytorch_model-00024-of-00033.bin", "model.layers.23.self_attn.o_proj.weight": "pytorch_model-00024-of-00033.bin", "model.layers.23.mlp.gate_proj.weight": "pytorch_model-00024-of-00033.bin", "model.layers.23.mlp.down_proj.weight": "pytorch_model-00024-of-00033.bin", "model.layers.23.mlp.up_proj.weight": "pytorch_model-00024-of-00033.bin", "model.layers.23.input_layernorm.weight": "pytorch_model-00024-of-00033.bin", "model.layers.23.post_attention_layernorm.weight": "pytorch_model-00024-of-00033.bin", "model.layers.23.self_attn.rotary_emb.inv_freq": "pytorch_model-00024-of-00033.bin", "model.layers.24.self_attn.q_proj.weight": "pytorch_model-00025-of-00033.bin", "model.layers.24.self_attn.k_proj.weight": "pytorch_model-00025-of-00033.bin", "model.layers.24.self_attn.v_proj.weight": "pytorch_model-00025-of-00033.bin", "model.layers.24.self_attn.o_proj.weight": "pytorch_model-00025-of-00033.bin", "model.layers.24.mlp.gate_proj.weight": "pytorch_model-00025-of-00033.bin", "model.layers.24.mlp.down_proj.weight": "pytorch_model-00025-of-00033.bin", "model.layers.24.mlp.up_proj.weight": "pytorch_model-00025-of-00033.bin", "model.layers.24.input_layernorm.weight": "pytorch_model-00025-of-00033.bin", "model.layers.24.post_attention_layernorm.weight": "pytorch_model-00025-of-00033.bin", "model.layers.24.self_attn.rotary_emb.inv_freq": "pytorch_model-00025-of-00033.bin", "model.layers.25.self_attn.q_proj.weight": "pytorch_model-00026-of-00033.bin", "model.layers.25.self_attn.k_proj.weight": "pytorch_model-00026-of-00033.bin", "model.layers.25.self_attn.v_proj.weight": "pytorch_model-00026-of-00033.bin", "model.layers.25.self_attn.o_proj.weight": "pytorch_model-00026-of-00033.bin", "model.layers.25.mlp.gate_proj.weight": "pytorch_model-00026-of-00033.bin", "model.layers.25.mlp.down_proj.weight": "pytorch_model-00026-of-00033.bin", "model.layers.25.mlp.up_proj.weight": "pytorch_model-00026-of-00033.bin", "model.layers.25.input_layernorm.weight": "pytorch_model-00026-of-00033.bin", "model.layers.25.post_attention_layernorm.weight": "pytorch_model-00026-of-00033.bin", "model.layers.25.self_attn.rotary_emb.inv_freq": "pytorch_model-00026-of-00033.bin", "model.layers.26.self_attn.q_proj.weight": "pytorch_model-00027-of-00033.bin", "model.layers.26.self_attn.k_proj.weight": "pytorch_model-00027-of-00033.bin", "model.layers.26.self_attn.v_proj.weight": "pytorch_model-00027-of-00033.bin", "model.layers.26.self_attn.o_proj.weight": "pytorch_model-00027-of-00033.bin", "model.layers.26.mlp.gate_proj.weight": "pytorch_model-00027-of-00033.bin", "model.layers.26.mlp.down_proj.weight": "pytorch_model-00027-of-00033.bin", "model.layers.26.mlp.up_proj.weight": "pytorch_model-00027-of-00033.bin", "model.layers.26.input_layernorm.weight": "pytorch_model-00027-of-00033.bin", "model.layers.26.post_attention_layernorm.weight": "pytorch_model-00027-of-00033.bin", "model.layers.26.self_attn.rotary_emb.inv_freq": "pytorch_model-00027-of-00033.bin", "model.layers.27.self_attn.q_proj.weight": "pytorch_model-00028-of-00033.bin", "model.layers.27.self_attn.k_proj.weight": "pytorch_model-00028-of-00033.bin", "model.layers.27.self_attn.v_proj.weight": "pytorch_model-00028-of-00033.bin", "model.layers.27.self_attn.o_proj.weight": "pytorch_model-00028-of-00033.bin", "model.layers.27.mlp.gate_proj.weight": "pytorch_model-00028-of-00033.bin", "model.layers.27.mlp.down_proj.weight": "pytorch_model-00028-of-00033.bin", "model.layers.27.mlp.up_proj.weight": "pytorch_model-00028-of-00033.bin", "model.layers.27.input_layernorm.weight": "pytorch_model-00028-of-00033.bin", "model.layers.27.post_attention_layernorm.weight": "pytorch_model-00028-of-00033.bin", "model.layers.27.self_attn.rotary_emb.inv_freq": "pytorch_model-00028-of-00033.bin", "model.layers.28.self_attn.q_proj.weight": "pytorch_model-00029-of-00033.bin", "model.layers.28.self_attn.k_proj.weight": "pytorch_model-00029-of-00033.bin", "model.layers.28.self_attn.v_proj.weight": "pytorch_model-00029-of-00033.bin", "model.layers.28.self_attn.o_proj.weight": "pytorch_model-00029-of-00033.bin", "model.layers.28.mlp.gate_proj.weight": "pytorch_model-00029-of-00033.bin", "model.layers.28.mlp.down_proj.weight": "pytorch_model-00029-of-00033.bin", "model.layers.28.mlp.up_proj.weight": "pytorch_model-00029-of-00033.bin", "model.layers.28.input_layernorm.weight": "pytorch_model-00029-of-00033.bin", "model.layers.28.post_attention_layernorm.weight": "pytorch_model-00029-of-00033.bin", "model.layers.28.self_attn.rotary_emb.inv_freq": "pytorch_model-00029-of-00033.bin", "model.layers.29.self_attn.q_proj.weight": "pytorch_model-00030-of-00033.bin", "model.layers.29.self_attn.k_proj.weight": "pytorch_model-00030-of-00033.bin", "model.layers.29.self_attn.v_proj.weight": "pytorch_model-00030-of-00033.bin", "model.layers.29.self_attn.o_proj.weight": "pytorch_model-00030-of-00033.bin", "model.layers.29.mlp.gate_proj.weight": "pytorch_model-00030-of-00033.bin", "model.layers.29.mlp.down_proj.weight": "pytorch_model-00030-of-00033.bin", "model.layers.29.mlp.up_proj.weight": "pytorch_model-00030-of-00033.bin", "model.layers.29.input_layernorm.weight": "pytorch_model-00030-of-00033.bin", "model.layers.29.post_attention_layernorm.weight": "pytorch_model-00030-of-00033.bin", "model.layers.29.self_attn.rotary_emb.inv_freq": "pytorch_model-00030-of-00033.bin", "model.layers.30.self_attn.q_proj.weight": "pytorch_model-00031-of-00033.bin", "model.layers.30.self_attn.k_proj.weight": "pytorch_model-00031-of-00033.bin", "model.layers.30.self_attn.v_proj.weight": "pytorch_model-00031-of-00033.bin", "model.layers.30.self_attn.o_proj.weight": "pytorch_model-00031-of-00033.bin", "model.layers.30.mlp.gate_proj.weight": "pytorch_model-00031-of-00033.bin", "model.layers.30.mlp.down_proj.weight": "pytorch_model-00031-of-00033.bin", "model.layers.30.mlp.up_proj.weight": "pytorch_model-00031-of-00033.bin", "model.layers.30.input_layernorm.weight": "pytorch_model-00031-of-00033.bin", "model.layers.30.post_attention_layernorm.weight": "pytorch_model-00031-of-00033.bin", "model.layers.30.self_attn.rotary_emb.inv_freq": "pytorch_model-00031-of-00033.bin", "model.layers.31.self_attn.q_proj.weight": "pytorch_model-00032-of-00033.bin", "model.layers.31.self_attn.k_proj.weight": "pytorch_model-00032-of-00033.bin", "model.layers.31.self_attn.v_proj.weight": "pytorch_model-00032-of-00033.bin", "model.layers.31.self_attn.o_proj.weight": "pytorch_model-00032-of-00033.bin", "model.layers.31.mlp.gate_proj.weight": "pytorch_model-00032-of-00033.bin", "model.layers.31.mlp.down_proj.weight": "pytorch_model-00032-of-00033.bin", "model.layers.31.mlp.up_proj.weight": "pytorch_model-00032-of-00033.bin", "model.layers.31.input_layernorm.weight": "pytorch_model-00032-of-00033.bin", "model.layers.31.post_attention_layernorm.weight": "pytorch_model-00032-of-00033.bin", "model.layers.31.self_attn.rotary_emb.inv_freq": "pytorch_model-00032-of-00033.bin", "model.embed_tokens.weight": "pytorch_model-00033-of-00033.bin", "model.norm.weight": "pytorch_model-00033-of-00033.bin", "lm_head.weight": "pytorch_model-00033-of-00033.bin"}, "metadata": {"total_size": 13476835328}}
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {}
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa299a0662fc3bf7ada4d816b1cb9fdeb472e9edf6c2ffbc7f00e1b5ff5ff968
3
+ size 499739
tokenizer_config.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "eos_token": "</s>",
4
+ "model_max_length": 2048,
5
+ "tokenizer_class": "LlamaTokenizer",
6
+ "unk_token": "<unk>",
7
+ "pad_token": "<pad>"
8
+ }