AdaptLLM-med-v0
Browse files- config.json +21 -0
- generation_config.json +1 -0
- pytorch_model-00001-of-00033.bin +3 -0
- pytorch_model-00002-of-00033.bin +3 -0
- pytorch_model-00003-of-00033.bin +3 -0
- pytorch_model-00004-of-00033.bin +3 -0
- pytorch_model-00005-of-00033.bin +3 -0
- pytorch_model-00006-of-00033.bin +3 -0
- pytorch_model-00007-of-00033.bin +3 -0
- pytorch_model-00008-of-00033.bin +3 -0
- pytorch_model-00009-of-00033.bin +3 -0
- pytorch_model-00010-of-00033.bin +3 -0
- pytorch_model-00011-of-00033.bin +3 -0
- pytorch_model-00012-of-00033.bin +3 -0
- pytorch_model-00013-of-00033.bin +3 -0
- pytorch_model-00014-of-00033.bin +3 -0
- pytorch_model-00015-of-00033.bin +3 -0
- pytorch_model-00016-of-00033.bin +3 -0
- pytorch_model-00017-of-00033.bin +3 -0
- pytorch_model-00018-of-00033.bin +3 -0
- pytorch_model-00019-of-00033.bin +3 -0
- pytorch_model-00020-of-00033.bin +3 -0
- pytorch_model-00021-of-00033.bin +3 -0
- pytorch_model-00022-of-00033.bin +3 -0
- pytorch_model-00023-of-00033.bin +3 -0
- pytorch_model-00024-of-00033.bin +3 -0
- pytorch_model-00025-of-00033.bin +3 -0
- pytorch_model-00026-of-00033.bin +3 -0
- pytorch_model-00027-of-00033.bin +3 -0
- pytorch_model-00028-of-00033.bin +3 -0
- pytorch_model-00029-of-00033.bin +3 -0
- pytorch_model-00030-of-00033.bin +3 -0
- pytorch_model-00031-of-00033.bin +3 -0
- pytorch_model-00032-of-00033.bin +3 -0
- pytorch_model-00033-of-00033.bin +3 -0
- pytorch_model.bin.index.json +1 -0
- special_tokens_map.json +1 -0
- tokenizer.model +3 -0
- tokenizer_config.json +8 -0
config.json
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"LLaMAForCausalLM"
|
4 |
+
],
|
5 |
+
"bos_token_id": 0,
|
6 |
+
"eos_token_id": 1,
|
7 |
+
"hidden_act": "silu",
|
8 |
+
"hidden_size": 4096,
|
9 |
+
"intermediate_size": 11008,
|
10 |
+
"initializer_range": 0.02,
|
11 |
+
"max_sequence_length": 2048,
|
12 |
+
"model_type": "llama",
|
13 |
+
"num_attention_heads": 32,
|
14 |
+
"num_hidden_layers": 32,
|
15 |
+
"pad_token_id": 32000,
|
16 |
+
"rms_norm_eps": 1e-06,
|
17 |
+
"torch_dtype": "float16",
|
18 |
+
"transformers_version": "4.27.0.dev0",
|
19 |
+
"use_cache": true,
|
20 |
+
"vocab_size": 32001
|
21 |
+
}
|
generation_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"_from_model_config": true, "bos_token_id": 0, "eos_token_id": 1, "pad_token_id": 0, "transformers_version": "4.27.0.dev0"}
|
pytorch_model-00001-of-00033.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cfa3fc237bbecb8982e67a92d410e1daa48c720a9d566e1c4c18b3e3890c71bd
|
3 |
+
size 809520963
|
pytorch_model-00002-of-00033.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:24146cba79a478ecea5d900870c75b248aee39afd5995e123cdb31be12a754aa
|
3 |
+
size 809520963
|
pytorch_model-00003-of-00033.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:43a4ab801023b1acacb638877c74aed27bd9e12898571fce97af0f264d05f7de
|
3 |
+
size 809520963
|
pytorch_model-00004-of-00033.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3b2d6973e059cd01ca44a7a19c87b1a3eb8c1b2f4e82c6dc009abdf9d2f3b755
|
3 |
+
size 809520963
|
pytorch_model-00005-of-00033.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d12b9adb06870196c9c33c6bc53b90faf767d4a34f0eca34910667431c3b1c4c
|
3 |
+
size 809520963
|
pytorch_model-00006-of-00033.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ee4188d6fcd05385e65a5137e00fdff80fb43df7f23e14db1990517a8a881932
|
3 |
+
size 809520963
|
pytorch_model-00007-of-00033.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0eeb9db5cb455aa3c30ace5182ea655131a10d21e8621efd091771d9dd5ec3ac
|
3 |
+
size 809520963
|
pytorch_model-00008-of-00033.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:84afeffda180a5a5f893178e290c12a69aa86a6b8cf306e7152b8ad68ef8e07a
|
3 |
+
size 809520963
|
pytorch_model-00009-of-00033.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f0c48999b6911c0e2531e6c52fd100761bb97c8c903b0e46f0310346878bc099
|
3 |
+
size 809520963
|
pytorch_model-00010-of-00033.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1dbc33e2ee3e5399a99d8e6456cb226914df88bc0ca9a70c8dc816fab026421c
|
3 |
+
size 809520963
|
pytorch_model-00011-of-00033.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6a01412018382124a7cef1363771dd3c9e2371b02fd2291c19b82caf8de8b180
|
3 |
+
size 809520963
|
pytorch_model-00012-of-00033.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:548ba11c335a6b8193ad1a64baf9ac6537af66a971763fff2928e50eac7f59c4
|
3 |
+
size 809520963
|
pytorch_model-00013-of-00033.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c4af705eb4be81c0cc700be69ccc2c30f64f4dca5da97edfd3adfd2164c34f1f
|
3 |
+
size 809520963
|
pytorch_model-00014-of-00033.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:84f6d4d1e17f34524de3fa901642e048ce9434ed8ac62dd0339d6957e7c590dc
|
3 |
+
size 809520963
|
pytorch_model-00015-of-00033.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6b051151d1a160c0e5a506bbeb499f2596f8a43a9c78d83bbdf2628b7a2dcf41
|
3 |
+
size 809520963
|
pytorch_model-00016-of-00033.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:79c0d5f3bf78378cf0fee2d31277b7658cf7691a992c3079435003db7e795735
|
3 |
+
size 809520963
|
pytorch_model-00017-of-00033.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1d8a2dd3e8a495f2f154bfa2388f6ebc52de27dfcbd075c264846f02db98c6ce
|
3 |
+
size 809520963
|
pytorch_model-00018-of-00033.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:85d1792e4ffe493d73f204b26d8e95c96a91cbcca081cc7c8d7702c16b3f8988
|
3 |
+
size 809520963
|
pytorch_model-00019-of-00033.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5df54ebad10b28bd0d865e214e8c1b6a860fdf1c144cbff635604b7e0e535420
|
3 |
+
size 809520963
|
pytorch_model-00020-of-00033.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:da1aa4c2f6c0fcc651af7e0a1328e3813d8f6fc6e3ad01fb68afee8d357e6d6e
|
3 |
+
size 809520963
|
pytorch_model-00021-of-00033.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0690ce2c8e0fb80e267c7964adfbb14ea60fc38a9b75ad1e575658160bb5af50
|
3 |
+
size 809520963
|
pytorch_model-00022-of-00033.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9a9a2286a812fd3dae3138c616aa8f340725a6d2b4e0ea4097d18dc1464af561
|
3 |
+
size 809520963
|
pytorch_model-00023-of-00033.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1104557819a42e487d5ee4051339d28f80aacc518644ecbdee8abb4a7d01d328
|
3 |
+
size 809520963
|
pytorch_model-00024-of-00033.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:041135a90bf937d0d3c00dd536160569e53f921d8d941e981c531b21bdad47a1
|
3 |
+
size 809520963
|
pytorch_model-00025-of-00033.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f1f6674f85d535eb2d80d370c991fbcdd1ae8fe8a79947a16d898aba1cc0c1a0
|
3 |
+
size 809520963
|
pytorch_model-00026-of-00033.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7cb846aedb10220d05979eee91be214cde6d6c59441be65edf90a7036827ba9b
|
3 |
+
size 809520963
|
pytorch_model-00027-of-00033.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ab91db96ed9bb22fe4afbc76750608d71eec6a1ec28d65e562dc5e82f058cf9f
|
3 |
+
size 809520963
|
pytorch_model-00028-of-00033.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7ec4a1561691311072407da847d933e7ac826f7b7d471b7384d876aa3dd5d6cb
|
3 |
+
size 809520963
|
pytorch_model-00029-of-00033.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f769ff6cca6fc7892e6c78e5938694f1908346800bb724bb338bb54020b95a07
|
3 |
+
size 809520963
|
pytorch_model-00030-of-00033.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:157fd26afac79204fcf0964270b4bc54d2a0655a9b008eeede2c85b5caa6f703
|
3 |
+
size 809520963
|
pytorch_model-00031-of-00033.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8c94c0f7d851aa4b3479459cbf760cb7b1740508d422d0f4ce0efec9361729b4
|
3 |
+
size 809520963
|
pytorch_model-00032-of-00033.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cc45d3e8518655ed9b78dfe5766dd5ace7048f8959621b4e5c51e609bd772570
|
3 |
+
size 809520963
|
pytorch_model-00033-of-00033.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5fe79967f185823af08d57640c33cb55464d86895b514435cc2ebaf61d73a9d1
|
3 |
+
size 524314060
|
pytorch_model.bin.index.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"weight_map": {"model.layers.0.self_attn.q_proj.weight": "pytorch_model-00001-of-00033.bin", "model.layers.0.self_attn.k_proj.weight": "pytorch_model-00001-of-00033.bin", "model.layers.0.self_attn.v_proj.weight": "pytorch_model-00001-of-00033.bin", "model.layers.0.self_attn.o_proj.weight": "pytorch_model-00001-of-00033.bin", "model.layers.0.mlp.gate_proj.weight": "pytorch_model-00001-of-00033.bin", "model.layers.0.mlp.down_proj.weight": "pytorch_model-00001-of-00033.bin", "model.layers.0.mlp.up_proj.weight": "pytorch_model-00001-of-00033.bin", "model.layers.0.input_layernorm.weight": "pytorch_model-00001-of-00033.bin", "model.layers.0.post_attention_layernorm.weight": "pytorch_model-00001-of-00033.bin", "model.layers.0.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00033.bin", "model.layers.1.self_attn.q_proj.weight": "pytorch_model-00002-of-00033.bin", "model.layers.1.self_attn.k_proj.weight": "pytorch_model-00002-of-00033.bin", "model.layers.1.self_attn.v_proj.weight": "pytorch_model-00002-of-00033.bin", "model.layers.1.self_attn.o_proj.weight": "pytorch_model-00002-of-00033.bin", "model.layers.1.mlp.gate_proj.weight": "pytorch_model-00002-of-00033.bin", "model.layers.1.mlp.down_proj.weight": "pytorch_model-00002-of-00033.bin", "model.layers.1.mlp.up_proj.weight": "pytorch_model-00002-of-00033.bin", "model.layers.1.input_layernorm.weight": "pytorch_model-00002-of-00033.bin", "model.layers.1.post_attention_layernorm.weight": "pytorch_model-00002-of-00033.bin", "model.layers.1.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00033.bin", "model.layers.2.self_attn.q_proj.weight": "pytorch_model-00003-of-00033.bin", "model.layers.2.self_attn.k_proj.weight": "pytorch_model-00003-of-00033.bin", "model.layers.2.self_attn.v_proj.weight": "pytorch_model-00003-of-00033.bin", "model.layers.2.self_attn.o_proj.weight": "pytorch_model-00003-of-00033.bin", "model.layers.2.mlp.gate_proj.weight": "pytorch_model-00003-of-00033.bin", "model.layers.2.mlp.down_proj.weight": "pytorch_model-00003-of-00033.bin", "model.layers.2.mlp.up_proj.weight": "pytorch_model-00003-of-00033.bin", "model.layers.2.input_layernorm.weight": "pytorch_model-00003-of-00033.bin", "model.layers.2.post_attention_layernorm.weight": "pytorch_model-00003-of-00033.bin", "model.layers.2.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00033.bin", "model.layers.3.self_attn.q_proj.weight": "pytorch_model-00004-of-00033.bin", "model.layers.3.self_attn.k_proj.weight": "pytorch_model-00004-of-00033.bin", "model.layers.3.self_attn.v_proj.weight": "pytorch_model-00004-of-00033.bin", "model.layers.3.self_attn.o_proj.weight": "pytorch_model-00004-of-00033.bin", "model.layers.3.mlp.gate_proj.weight": "pytorch_model-00004-of-00033.bin", "model.layers.3.mlp.down_proj.weight": "pytorch_model-00004-of-00033.bin", "model.layers.3.mlp.up_proj.weight": "pytorch_model-00004-of-00033.bin", "model.layers.3.input_layernorm.weight": "pytorch_model-00004-of-00033.bin", "model.layers.3.post_attention_layernorm.weight": "pytorch_model-00004-of-00033.bin", "model.layers.3.self_attn.rotary_emb.inv_freq": "pytorch_model-00004-of-00033.bin", "model.layers.4.self_attn.q_proj.weight": "pytorch_model-00005-of-00033.bin", "model.layers.4.self_attn.k_proj.weight": "pytorch_model-00005-of-00033.bin", "model.layers.4.self_attn.v_proj.weight": "pytorch_model-00005-of-00033.bin", "model.layers.4.self_attn.o_proj.weight": "pytorch_model-00005-of-00033.bin", "model.layers.4.mlp.gate_proj.weight": "pytorch_model-00005-of-00033.bin", "model.layers.4.mlp.down_proj.weight": "pytorch_model-00005-of-00033.bin", "model.layers.4.mlp.up_proj.weight": "pytorch_model-00005-of-00033.bin", "model.layers.4.input_layernorm.weight": "pytorch_model-00005-of-00033.bin", "model.layers.4.post_attention_layernorm.weight": "pytorch_model-00005-of-00033.bin", "model.layers.4.self_attn.rotary_emb.inv_freq": "pytorch_model-00005-of-00033.bin", "model.layers.5.self_attn.q_proj.weight": "pytorch_model-00006-of-00033.bin", "model.layers.5.self_attn.k_proj.weight": "pytorch_model-00006-of-00033.bin", "model.layers.5.self_attn.v_proj.weight": "pytorch_model-00006-of-00033.bin", "model.layers.5.self_attn.o_proj.weight": "pytorch_model-00006-of-00033.bin", "model.layers.5.mlp.gate_proj.weight": "pytorch_model-00006-of-00033.bin", "model.layers.5.mlp.down_proj.weight": "pytorch_model-00006-of-00033.bin", "model.layers.5.mlp.up_proj.weight": "pytorch_model-00006-of-00033.bin", "model.layers.5.input_layernorm.weight": "pytorch_model-00006-of-00033.bin", "model.layers.5.post_attention_layernorm.weight": "pytorch_model-00006-of-00033.bin", "model.layers.5.self_attn.rotary_emb.inv_freq": "pytorch_model-00006-of-00033.bin", "model.layers.6.self_attn.q_proj.weight": "pytorch_model-00007-of-00033.bin", "model.layers.6.self_attn.k_proj.weight": "pytorch_model-00007-of-00033.bin", "model.layers.6.self_attn.v_proj.weight": "pytorch_model-00007-of-00033.bin", "model.layers.6.self_attn.o_proj.weight": "pytorch_model-00007-of-00033.bin", "model.layers.6.mlp.gate_proj.weight": "pytorch_model-00007-of-00033.bin", "model.layers.6.mlp.down_proj.weight": "pytorch_model-00007-of-00033.bin", "model.layers.6.mlp.up_proj.weight": "pytorch_model-00007-of-00033.bin", "model.layers.6.input_layernorm.weight": "pytorch_model-00007-of-00033.bin", "model.layers.6.post_attention_layernorm.weight": "pytorch_model-00007-of-00033.bin", "model.layers.6.self_attn.rotary_emb.inv_freq": "pytorch_model-00007-of-00033.bin", "model.layers.7.self_attn.q_proj.weight": "pytorch_model-00008-of-00033.bin", "model.layers.7.self_attn.k_proj.weight": "pytorch_model-00008-of-00033.bin", "model.layers.7.self_attn.v_proj.weight": "pytorch_model-00008-of-00033.bin", "model.layers.7.self_attn.o_proj.weight": "pytorch_model-00008-of-00033.bin", "model.layers.7.mlp.gate_proj.weight": "pytorch_model-00008-of-00033.bin", "model.layers.7.mlp.down_proj.weight": "pytorch_model-00008-of-00033.bin", "model.layers.7.mlp.up_proj.weight": "pytorch_model-00008-of-00033.bin", "model.layers.7.input_layernorm.weight": "pytorch_model-00008-of-00033.bin", "model.layers.7.post_attention_layernorm.weight": "pytorch_model-00008-of-00033.bin", "model.layers.7.self_attn.rotary_emb.inv_freq": "pytorch_model-00008-of-00033.bin", "model.layers.8.self_attn.q_proj.weight": "pytorch_model-00009-of-00033.bin", "model.layers.8.self_attn.k_proj.weight": "pytorch_model-00009-of-00033.bin", "model.layers.8.self_attn.v_proj.weight": "pytorch_model-00009-of-00033.bin", "model.layers.8.self_attn.o_proj.weight": "pytorch_model-00009-of-00033.bin", "model.layers.8.mlp.gate_proj.weight": "pytorch_model-00009-of-00033.bin", "model.layers.8.mlp.down_proj.weight": "pytorch_model-00009-of-00033.bin", "model.layers.8.mlp.up_proj.weight": "pytorch_model-00009-of-00033.bin", "model.layers.8.input_layernorm.weight": "pytorch_model-00009-of-00033.bin", "model.layers.8.post_attention_layernorm.weight": "pytorch_model-00009-of-00033.bin", "model.layers.8.self_attn.rotary_emb.inv_freq": "pytorch_model-00009-of-00033.bin", "model.layers.9.self_attn.q_proj.weight": "pytorch_model-00010-of-00033.bin", "model.layers.9.self_attn.k_proj.weight": "pytorch_model-00010-of-00033.bin", "model.layers.9.self_attn.v_proj.weight": "pytorch_model-00010-of-00033.bin", "model.layers.9.self_attn.o_proj.weight": "pytorch_model-00010-of-00033.bin", "model.layers.9.mlp.gate_proj.weight": "pytorch_model-00010-of-00033.bin", "model.layers.9.mlp.down_proj.weight": "pytorch_model-00010-of-00033.bin", "model.layers.9.mlp.up_proj.weight": "pytorch_model-00010-of-00033.bin", "model.layers.9.input_layernorm.weight": "pytorch_model-00010-of-00033.bin", "model.layers.9.post_attention_layernorm.weight": "pytorch_model-00010-of-00033.bin", "model.layers.9.self_attn.rotary_emb.inv_freq": "pytorch_model-00010-of-00033.bin", "model.layers.10.self_attn.q_proj.weight": "pytorch_model-00011-of-00033.bin", "model.layers.10.self_attn.k_proj.weight": "pytorch_model-00011-of-00033.bin", "model.layers.10.self_attn.v_proj.weight": "pytorch_model-00011-of-00033.bin", "model.layers.10.self_attn.o_proj.weight": "pytorch_model-00011-of-00033.bin", "model.layers.10.mlp.gate_proj.weight": "pytorch_model-00011-of-00033.bin", "model.layers.10.mlp.down_proj.weight": "pytorch_model-00011-of-00033.bin", "model.layers.10.mlp.up_proj.weight": "pytorch_model-00011-of-00033.bin", "model.layers.10.input_layernorm.weight": "pytorch_model-00011-of-00033.bin", "model.layers.10.post_attention_layernorm.weight": "pytorch_model-00011-of-00033.bin", "model.layers.10.self_attn.rotary_emb.inv_freq": "pytorch_model-00011-of-00033.bin", "model.layers.11.self_attn.q_proj.weight": "pytorch_model-00012-of-00033.bin", "model.layers.11.self_attn.k_proj.weight": "pytorch_model-00012-of-00033.bin", "model.layers.11.self_attn.v_proj.weight": "pytorch_model-00012-of-00033.bin", "model.layers.11.self_attn.o_proj.weight": "pytorch_model-00012-of-00033.bin", "model.layers.11.mlp.gate_proj.weight": "pytorch_model-00012-of-00033.bin", "model.layers.11.mlp.down_proj.weight": "pytorch_model-00012-of-00033.bin", "model.layers.11.mlp.up_proj.weight": "pytorch_model-00012-of-00033.bin", "model.layers.11.input_layernorm.weight": "pytorch_model-00012-of-00033.bin", "model.layers.11.post_attention_layernorm.weight": "pytorch_model-00012-of-00033.bin", "model.layers.11.self_attn.rotary_emb.inv_freq": "pytorch_model-00012-of-00033.bin", "model.layers.12.self_attn.q_proj.weight": "pytorch_model-00013-of-00033.bin", "model.layers.12.self_attn.k_proj.weight": "pytorch_model-00013-of-00033.bin", "model.layers.12.self_attn.v_proj.weight": "pytorch_model-00013-of-00033.bin", "model.layers.12.self_attn.o_proj.weight": "pytorch_model-00013-of-00033.bin", "model.layers.12.mlp.gate_proj.weight": "pytorch_model-00013-of-00033.bin", "model.layers.12.mlp.down_proj.weight": "pytorch_model-00013-of-00033.bin", "model.layers.12.mlp.up_proj.weight": "pytorch_model-00013-of-00033.bin", "model.layers.12.input_layernorm.weight": "pytorch_model-00013-of-00033.bin", "model.layers.12.post_attention_layernorm.weight": "pytorch_model-00013-of-00033.bin", "model.layers.12.self_attn.rotary_emb.inv_freq": "pytorch_model-00013-of-00033.bin", "model.layers.13.self_attn.q_proj.weight": "pytorch_model-00014-of-00033.bin", "model.layers.13.self_attn.k_proj.weight": "pytorch_model-00014-of-00033.bin", "model.layers.13.self_attn.v_proj.weight": "pytorch_model-00014-of-00033.bin", "model.layers.13.self_attn.o_proj.weight": "pytorch_model-00014-of-00033.bin", "model.layers.13.mlp.gate_proj.weight": "pytorch_model-00014-of-00033.bin", "model.layers.13.mlp.down_proj.weight": "pytorch_model-00014-of-00033.bin", "model.layers.13.mlp.up_proj.weight": "pytorch_model-00014-of-00033.bin", "model.layers.13.input_layernorm.weight": "pytorch_model-00014-of-00033.bin", "model.layers.13.post_attention_layernorm.weight": "pytorch_model-00014-of-00033.bin", "model.layers.13.self_attn.rotary_emb.inv_freq": "pytorch_model-00014-of-00033.bin", "model.layers.14.self_attn.q_proj.weight": "pytorch_model-00015-of-00033.bin", "model.layers.14.self_attn.k_proj.weight": "pytorch_model-00015-of-00033.bin", "model.layers.14.self_attn.v_proj.weight": "pytorch_model-00015-of-00033.bin", "model.layers.14.self_attn.o_proj.weight": "pytorch_model-00015-of-00033.bin", "model.layers.14.mlp.gate_proj.weight": "pytorch_model-00015-of-00033.bin", "model.layers.14.mlp.down_proj.weight": "pytorch_model-00015-of-00033.bin", "model.layers.14.mlp.up_proj.weight": "pytorch_model-00015-of-00033.bin", "model.layers.14.input_layernorm.weight": "pytorch_model-00015-of-00033.bin", "model.layers.14.post_attention_layernorm.weight": "pytorch_model-00015-of-00033.bin", "model.layers.14.self_attn.rotary_emb.inv_freq": "pytorch_model-00015-of-00033.bin", "model.layers.15.self_attn.q_proj.weight": "pytorch_model-00016-of-00033.bin", "model.layers.15.self_attn.k_proj.weight": "pytorch_model-00016-of-00033.bin", "model.layers.15.self_attn.v_proj.weight": "pytorch_model-00016-of-00033.bin", "model.layers.15.self_attn.o_proj.weight": "pytorch_model-00016-of-00033.bin", "model.layers.15.mlp.gate_proj.weight": "pytorch_model-00016-of-00033.bin", "model.layers.15.mlp.down_proj.weight": "pytorch_model-00016-of-00033.bin", "model.layers.15.mlp.up_proj.weight": "pytorch_model-00016-of-00033.bin", "model.layers.15.input_layernorm.weight": "pytorch_model-00016-of-00033.bin", "model.layers.15.post_attention_layernorm.weight": "pytorch_model-00016-of-00033.bin", "model.layers.15.self_attn.rotary_emb.inv_freq": "pytorch_model-00016-of-00033.bin", "model.layers.16.self_attn.q_proj.weight": "pytorch_model-00017-of-00033.bin", "model.layers.16.self_attn.k_proj.weight": "pytorch_model-00017-of-00033.bin", "model.layers.16.self_attn.v_proj.weight": "pytorch_model-00017-of-00033.bin", "model.layers.16.self_attn.o_proj.weight": "pytorch_model-00017-of-00033.bin", "model.layers.16.mlp.gate_proj.weight": "pytorch_model-00017-of-00033.bin", "model.layers.16.mlp.down_proj.weight": "pytorch_model-00017-of-00033.bin", "model.layers.16.mlp.up_proj.weight": "pytorch_model-00017-of-00033.bin", "model.layers.16.input_layernorm.weight": "pytorch_model-00017-of-00033.bin", "model.layers.16.post_attention_layernorm.weight": "pytorch_model-00017-of-00033.bin", "model.layers.16.self_attn.rotary_emb.inv_freq": "pytorch_model-00017-of-00033.bin", "model.layers.17.self_attn.q_proj.weight": "pytorch_model-00018-of-00033.bin", "model.layers.17.self_attn.k_proj.weight": "pytorch_model-00018-of-00033.bin", "model.layers.17.self_attn.v_proj.weight": "pytorch_model-00018-of-00033.bin", "model.layers.17.self_attn.o_proj.weight": "pytorch_model-00018-of-00033.bin", "model.layers.17.mlp.gate_proj.weight": "pytorch_model-00018-of-00033.bin", "model.layers.17.mlp.down_proj.weight": "pytorch_model-00018-of-00033.bin", "model.layers.17.mlp.up_proj.weight": "pytorch_model-00018-of-00033.bin", "model.layers.17.input_layernorm.weight": "pytorch_model-00018-of-00033.bin", "model.layers.17.post_attention_layernorm.weight": "pytorch_model-00018-of-00033.bin", "model.layers.17.self_attn.rotary_emb.inv_freq": "pytorch_model-00018-of-00033.bin", "model.layers.18.self_attn.q_proj.weight": "pytorch_model-00019-of-00033.bin", "model.layers.18.self_attn.k_proj.weight": "pytorch_model-00019-of-00033.bin", "model.layers.18.self_attn.v_proj.weight": "pytorch_model-00019-of-00033.bin", "model.layers.18.self_attn.o_proj.weight": "pytorch_model-00019-of-00033.bin", "model.layers.18.mlp.gate_proj.weight": "pytorch_model-00019-of-00033.bin", "model.layers.18.mlp.down_proj.weight": "pytorch_model-00019-of-00033.bin", "model.layers.18.mlp.up_proj.weight": "pytorch_model-00019-of-00033.bin", "model.layers.18.input_layernorm.weight": "pytorch_model-00019-of-00033.bin", "model.layers.18.post_attention_layernorm.weight": "pytorch_model-00019-of-00033.bin", "model.layers.18.self_attn.rotary_emb.inv_freq": "pytorch_model-00019-of-00033.bin", "model.layers.19.self_attn.q_proj.weight": "pytorch_model-00020-of-00033.bin", "model.layers.19.self_attn.k_proj.weight": "pytorch_model-00020-of-00033.bin", "model.layers.19.self_attn.v_proj.weight": "pytorch_model-00020-of-00033.bin", "model.layers.19.self_attn.o_proj.weight": "pytorch_model-00020-of-00033.bin", "model.layers.19.mlp.gate_proj.weight": "pytorch_model-00020-of-00033.bin", "model.layers.19.mlp.down_proj.weight": "pytorch_model-00020-of-00033.bin", "model.layers.19.mlp.up_proj.weight": "pytorch_model-00020-of-00033.bin", "model.layers.19.input_layernorm.weight": "pytorch_model-00020-of-00033.bin", "model.layers.19.post_attention_layernorm.weight": "pytorch_model-00020-of-00033.bin", "model.layers.19.self_attn.rotary_emb.inv_freq": "pytorch_model-00020-of-00033.bin", "model.layers.20.self_attn.q_proj.weight": "pytorch_model-00021-of-00033.bin", "model.layers.20.self_attn.k_proj.weight": "pytorch_model-00021-of-00033.bin", "model.layers.20.self_attn.v_proj.weight": "pytorch_model-00021-of-00033.bin", "model.layers.20.self_attn.o_proj.weight": "pytorch_model-00021-of-00033.bin", "model.layers.20.mlp.gate_proj.weight": "pytorch_model-00021-of-00033.bin", "model.layers.20.mlp.down_proj.weight": "pytorch_model-00021-of-00033.bin", "model.layers.20.mlp.up_proj.weight": "pytorch_model-00021-of-00033.bin", "model.layers.20.input_layernorm.weight": "pytorch_model-00021-of-00033.bin", "model.layers.20.post_attention_layernorm.weight": "pytorch_model-00021-of-00033.bin", "model.layers.20.self_attn.rotary_emb.inv_freq": "pytorch_model-00021-of-00033.bin", "model.layers.21.self_attn.q_proj.weight": "pytorch_model-00022-of-00033.bin", "model.layers.21.self_attn.k_proj.weight": "pytorch_model-00022-of-00033.bin", "model.layers.21.self_attn.v_proj.weight": "pytorch_model-00022-of-00033.bin", "model.layers.21.self_attn.o_proj.weight": "pytorch_model-00022-of-00033.bin", "model.layers.21.mlp.gate_proj.weight": "pytorch_model-00022-of-00033.bin", "model.layers.21.mlp.down_proj.weight": "pytorch_model-00022-of-00033.bin", "model.layers.21.mlp.up_proj.weight": "pytorch_model-00022-of-00033.bin", "model.layers.21.input_layernorm.weight": "pytorch_model-00022-of-00033.bin", "model.layers.21.post_attention_layernorm.weight": "pytorch_model-00022-of-00033.bin", "model.layers.21.self_attn.rotary_emb.inv_freq": "pytorch_model-00022-of-00033.bin", "model.layers.22.self_attn.q_proj.weight": "pytorch_model-00023-of-00033.bin", "model.layers.22.self_attn.k_proj.weight": "pytorch_model-00023-of-00033.bin", "model.layers.22.self_attn.v_proj.weight": "pytorch_model-00023-of-00033.bin", "model.layers.22.self_attn.o_proj.weight": "pytorch_model-00023-of-00033.bin", "model.layers.22.mlp.gate_proj.weight": "pytorch_model-00023-of-00033.bin", "model.layers.22.mlp.down_proj.weight": "pytorch_model-00023-of-00033.bin", "model.layers.22.mlp.up_proj.weight": "pytorch_model-00023-of-00033.bin", "model.layers.22.input_layernorm.weight": "pytorch_model-00023-of-00033.bin", "model.layers.22.post_attention_layernorm.weight": "pytorch_model-00023-of-00033.bin", "model.layers.22.self_attn.rotary_emb.inv_freq": "pytorch_model-00023-of-00033.bin", "model.layers.23.self_attn.q_proj.weight": "pytorch_model-00024-of-00033.bin", "model.layers.23.self_attn.k_proj.weight": "pytorch_model-00024-of-00033.bin", "model.layers.23.self_attn.v_proj.weight": "pytorch_model-00024-of-00033.bin", "model.layers.23.self_attn.o_proj.weight": "pytorch_model-00024-of-00033.bin", "model.layers.23.mlp.gate_proj.weight": "pytorch_model-00024-of-00033.bin", "model.layers.23.mlp.down_proj.weight": "pytorch_model-00024-of-00033.bin", "model.layers.23.mlp.up_proj.weight": "pytorch_model-00024-of-00033.bin", "model.layers.23.input_layernorm.weight": "pytorch_model-00024-of-00033.bin", "model.layers.23.post_attention_layernorm.weight": "pytorch_model-00024-of-00033.bin", "model.layers.23.self_attn.rotary_emb.inv_freq": "pytorch_model-00024-of-00033.bin", "model.layers.24.self_attn.q_proj.weight": "pytorch_model-00025-of-00033.bin", "model.layers.24.self_attn.k_proj.weight": "pytorch_model-00025-of-00033.bin", "model.layers.24.self_attn.v_proj.weight": "pytorch_model-00025-of-00033.bin", "model.layers.24.self_attn.o_proj.weight": "pytorch_model-00025-of-00033.bin", "model.layers.24.mlp.gate_proj.weight": "pytorch_model-00025-of-00033.bin", "model.layers.24.mlp.down_proj.weight": "pytorch_model-00025-of-00033.bin", "model.layers.24.mlp.up_proj.weight": "pytorch_model-00025-of-00033.bin", "model.layers.24.input_layernorm.weight": "pytorch_model-00025-of-00033.bin", "model.layers.24.post_attention_layernorm.weight": "pytorch_model-00025-of-00033.bin", "model.layers.24.self_attn.rotary_emb.inv_freq": "pytorch_model-00025-of-00033.bin", "model.layers.25.self_attn.q_proj.weight": "pytorch_model-00026-of-00033.bin", "model.layers.25.self_attn.k_proj.weight": "pytorch_model-00026-of-00033.bin", "model.layers.25.self_attn.v_proj.weight": "pytorch_model-00026-of-00033.bin", "model.layers.25.self_attn.o_proj.weight": "pytorch_model-00026-of-00033.bin", "model.layers.25.mlp.gate_proj.weight": "pytorch_model-00026-of-00033.bin", "model.layers.25.mlp.down_proj.weight": "pytorch_model-00026-of-00033.bin", "model.layers.25.mlp.up_proj.weight": "pytorch_model-00026-of-00033.bin", "model.layers.25.input_layernorm.weight": "pytorch_model-00026-of-00033.bin", "model.layers.25.post_attention_layernorm.weight": "pytorch_model-00026-of-00033.bin", "model.layers.25.self_attn.rotary_emb.inv_freq": "pytorch_model-00026-of-00033.bin", "model.layers.26.self_attn.q_proj.weight": "pytorch_model-00027-of-00033.bin", "model.layers.26.self_attn.k_proj.weight": "pytorch_model-00027-of-00033.bin", "model.layers.26.self_attn.v_proj.weight": "pytorch_model-00027-of-00033.bin", "model.layers.26.self_attn.o_proj.weight": "pytorch_model-00027-of-00033.bin", "model.layers.26.mlp.gate_proj.weight": "pytorch_model-00027-of-00033.bin", "model.layers.26.mlp.down_proj.weight": "pytorch_model-00027-of-00033.bin", "model.layers.26.mlp.up_proj.weight": "pytorch_model-00027-of-00033.bin", "model.layers.26.input_layernorm.weight": "pytorch_model-00027-of-00033.bin", "model.layers.26.post_attention_layernorm.weight": "pytorch_model-00027-of-00033.bin", "model.layers.26.self_attn.rotary_emb.inv_freq": "pytorch_model-00027-of-00033.bin", "model.layers.27.self_attn.q_proj.weight": "pytorch_model-00028-of-00033.bin", "model.layers.27.self_attn.k_proj.weight": "pytorch_model-00028-of-00033.bin", "model.layers.27.self_attn.v_proj.weight": "pytorch_model-00028-of-00033.bin", "model.layers.27.self_attn.o_proj.weight": "pytorch_model-00028-of-00033.bin", "model.layers.27.mlp.gate_proj.weight": "pytorch_model-00028-of-00033.bin", "model.layers.27.mlp.down_proj.weight": "pytorch_model-00028-of-00033.bin", "model.layers.27.mlp.up_proj.weight": "pytorch_model-00028-of-00033.bin", "model.layers.27.input_layernorm.weight": "pytorch_model-00028-of-00033.bin", "model.layers.27.post_attention_layernorm.weight": "pytorch_model-00028-of-00033.bin", "model.layers.27.self_attn.rotary_emb.inv_freq": "pytorch_model-00028-of-00033.bin", "model.layers.28.self_attn.q_proj.weight": "pytorch_model-00029-of-00033.bin", "model.layers.28.self_attn.k_proj.weight": "pytorch_model-00029-of-00033.bin", "model.layers.28.self_attn.v_proj.weight": "pytorch_model-00029-of-00033.bin", "model.layers.28.self_attn.o_proj.weight": "pytorch_model-00029-of-00033.bin", "model.layers.28.mlp.gate_proj.weight": "pytorch_model-00029-of-00033.bin", "model.layers.28.mlp.down_proj.weight": "pytorch_model-00029-of-00033.bin", "model.layers.28.mlp.up_proj.weight": "pytorch_model-00029-of-00033.bin", "model.layers.28.input_layernorm.weight": "pytorch_model-00029-of-00033.bin", "model.layers.28.post_attention_layernorm.weight": "pytorch_model-00029-of-00033.bin", "model.layers.28.self_attn.rotary_emb.inv_freq": "pytorch_model-00029-of-00033.bin", "model.layers.29.self_attn.q_proj.weight": "pytorch_model-00030-of-00033.bin", "model.layers.29.self_attn.k_proj.weight": "pytorch_model-00030-of-00033.bin", "model.layers.29.self_attn.v_proj.weight": "pytorch_model-00030-of-00033.bin", "model.layers.29.self_attn.o_proj.weight": "pytorch_model-00030-of-00033.bin", "model.layers.29.mlp.gate_proj.weight": "pytorch_model-00030-of-00033.bin", "model.layers.29.mlp.down_proj.weight": "pytorch_model-00030-of-00033.bin", "model.layers.29.mlp.up_proj.weight": "pytorch_model-00030-of-00033.bin", "model.layers.29.input_layernorm.weight": "pytorch_model-00030-of-00033.bin", "model.layers.29.post_attention_layernorm.weight": "pytorch_model-00030-of-00033.bin", "model.layers.29.self_attn.rotary_emb.inv_freq": "pytorch_model-00030-of-00033.bin", "model.layers.30.self_attn.q_proj.weight": "pytorch_model-00031-of-00033.bin", "model.layers.30.self_attn.k_proj.weight": "pytorch_model-00031-of-00033.bin", "model.layers.30.self_attn.v_proj.weight": "pytorch_model-00031-of-00033.bin", "model.layers.30.self_attn.o_proj.weight": "pytorch_model-00031-of-00033.bin", "model.layers.30.mlp.gate_proj.weight": "pytorch_model-00031-of-00033.bin", "model.layers.30.mlp.down_proj.weight": "pytorch_model-00031-of-00033.bin", "model.layers.30.mlp.up_proj.weight": "pytorch_model-00031-of-00033.bin", "model.layers.30.input_layernorm.weight": "pytorch_model-00031-of-00033.bin", "model.layers.30.post_attention_layernorm.weight": "pytorch_model-00031-of-00033.bin", "model.layers.30.self_attn.rotary_emb.inv_freq": "pytorch_model-00031-of-00033.bin", "model.layers.31.self_attn.q_proj.weight": "pytorch_model-00032-of-00033.bin", "model.layers.31.self_attn.k_proj.weight": "pytorch_model-00032-of-00033.bin", "model.layers.31.self_attn.v_proj.weight": "pytorch_model-00032-of-00033.bin", "model.layers.31.self_attn.o_proj.weight": "pytorch_model-00032-of-00033.bin", "model.layers.31.mlp.gate_proj.weight": "pytorch_model-00032-of-00033.bin", "model.layers.31.mlp.down_proj.weight": "pytorch_model-00032-of-00033.bin", "model.layers.31.mlp.up_proj.weight": "pytorch_model-00032-of-00033.bin", "model.layers.31.input_layernorm.weight": "pytorch_model-00032-of-00033.bin", "model.layers.31.post_attention_layernorm.weight": "pytorch_model-00032-of-00033.bin", "model.layers.31.self_attn.rotary_emb.inv_freq": "pytorch_model-00032-of-00033.bin", "model.embed_tokens.weight": "pytorch_model-00033-of-00033.bin", "model.norm.weight": "pytorch_model-00033-of-00033.bin", "lm_head.weight": "pytorch_model-00033-of-00033.bin"}, "metadata": {"total_size": 13476835328}}
|
special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{}
|
tokenizer.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fa299a0662fc3bf7ada4d816b1cb9fdeb472e9edf6c2ffbc7f00e1b5ff5ff968
|
3 |
+
size 499739
|
tokenizer_config.json
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": "<s>",
|
3 |
+
"eos_token": "</s>",
|
4 |
+
"model_max_length": 2048,
|
5 |
+
"tokenizer_class": "LlamaTokenizer",
|
6 |
+
"unk_token": "<unk>",
|
7 |
+
"pad_token": "<pad>"
|
8 |
+
}
|