diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..5c385778ac3af1b7012a41c684bece16e4a8f62d --- /dev/null +++ b/config.json @@ -0,0 +1,30 @@ +{ + "_name_or_path": "mistralai/Mistral-Large-Instruct-2411", + "architectures": [ + "MistralForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 12288, + "initializer_range": 0.02, + "intermediate_size": 28672, + "max_position_embeddings": 131072, + "model_type": "mistral", + "num_attention_heads": 96, + "num_hidden_layers": 88, + "num_key_value_heads": 8, + "pad_token_id": 748, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.46.3", + "unsloth_version": "2024.11.11", + "use_cache": true, + "vocab_size": 32768 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..4e736504e8bfe3d26ce19ed85c255345d561c29b --- /dev/null +++ b/generation_config.json @@ -0,0 +1,8 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "max_length": 131072, + "pad_token_id": 748, + "transformers_version": "4.46.3" +} diff --git a/model-00001-of-00051.safetensors b/model-00001-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..76074b752aee95a1e45cbebb7f7eb6458402f357 --- /dev/null +++ b/model-00001-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c42d92f6ac562e6b23f12c74827520d4b6c0fbe6f9b19c03b0770d10bcf0ffe8 +size 4932552432 diff --git a/model-00002-of-00051.safetensors b/model-00002-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3aeba1fa47619eb7f5f2fa4150dd476205552f98 --- /dev/null +++ b/model-00002-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b43cd78186ff2e0ad02691f27e6acba2c3c0048d8822180c8e95dc65cd0a44d +size 4831938528 diff --git a/model-00003-of-00051.safetensors b/model-00003-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7386ee69785115e99160c0388bd97b8082613788 --- /dev/null +++ b/model-00003-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80adc4c51653208573e9db3d63163cecbf1fd5517099a0cb8a607bb8f2c19fab +size 4882269816 diff --git a/model-00004-of-00051.safetensors b/model-00004-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..018848723ab990356605966a218711b9fab507ee --- /dev/null +++ b/model-00004-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7d554f8e74cf6b42ef88ee1fd03474b39b466fbc8ac6df2397c756f5cfd144a +size 4831889136 diff --git a/model-00005-of-00051.safetensors b/model-00005-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..715154d00f4e75376972b9aea02e6a401b26c529 --- /dev/null +++ b/model-00005-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8b1c9382f479a2c35d5798bcfb94f0edd3060dab6c67579e20696900dffd28b +size 4831938520 diff --git a/model-00006-of-00051.safetensors b/model-00006-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..041a3fd4bf6b2af13e4daf7e4922502de3b107be --- /dev/null +++ b/model-00006-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:046d8d7a83deffb812592454d983dd2240cf0aba3a7061f0b52a617d7d18764d +size 4831938528 diff --git a/model-00007-of-00051.safetensors b/model-00007-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..57914735727127b9977a622a8928e35007d6d69e --- /dev/null +++ b/model-00007-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f60e301f745396b7479179925e2d2b941a137595d1f82a7aae8a1c9e67222910 +size 4882269832 diff --git a/model-00008-of-00051.safetensors b/model-00008-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ac4655a868f413027da3aaa1b45f2881c9874187 --- /dev/null +++ b/model-00008-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:380cd71c1cb1fd69b4924a76bfd3b760ef334f9509d24ba92d8abc60fec92db8 +size 4831889152 diff --git a/model-00009-of-00051.safetensors b/model-00009-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0827284cf5886a0bc2bb7c83c0ed2ddb3cec634c --- /dev/null +++ b/model-00009-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e46e59f69b03864046d110b76c04c27eef95431f6c9a639a84d9916e7117df2 +size 4831938536 diff --git a/model-00010-of-00051.safetensors b/model-00010-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2ef4cac0a0f8af4653eff4832fc559012e21a5f7 --- /dev/null +++ b/model-00010-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33191523d7696e7419d08df2e884bb5a748d46fd46dfa69689b9ab8cd7217147 +size 4831938544 diff --git a/model-00011-of-00051.safetensors b/model-00011-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..09ac135f462e9ad5ffd076c291f6fe318b603dcc --- /dev/null +++ b/model-00011-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23d633bb3ec29bf88ad204cff4a96347f5235a216509dba753deb2cfb254dad3 +size 4882269832 diff --git a/model-00012-of-00051.safetensors b/model-00012-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c104d041c2f5a944900fc4bcc8fdc04bc01ce61a --- /dev/null +++ b/model-00012-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43a821c901bc7afab06bbfee6f5a082e66ba961277fb123da346f4b35d998b64 +size 4831889152 diff --git a/model-00013-of-00051.safetensors b/model-00013-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a83e50381eac2fd47f171a99e2b9c485a4b22050 --- /dev/null +++ b/model-00013-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7407cc29efb65599c34261815fda052c197b63656ad8cd86ccb881137220cc83 +size 4831938536 diff --git a/model-00014-of-00051.safetensors b/model-00014-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2dcb3df5dcbc6e0d5b9bfd36bad6001a3a539896 --- /dev/null +++ b/model-00014-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:500bec08d9a35c0448f8eabcbe405336f890627811c04c8460445ca12fd3f72b +size 4831938544 diff --git a/model-00015-of-00051.safetensors b/model-00015-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e4b9ac6e63532cee6165554c8b787e714a5dbdd8 --- /dev/null +++ b/model-00015-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac0fe612529e37d9388ad4873535853a1fe0be32cefde172ced76583396ccc5c +size 4882269832 diff --git a/model-00016-of-00051.safetensors b/model-00016-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3a623afb3f68a18187929e6fd2ee95b4afb8a76d --- /dev/null +++ b/model-00016-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f68fbeb599b8b381f98dcad0977acac9925482485ba7b3700f24939532852bfe +size 4831889152 diff --git a/model-00017-of-00051.safetensors b/model-00017-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d9f8c4525626cf1c8ca6eb9a34dcf3a3df9cd46a --- /dev/null +++ b/model-00017-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b7863f45b01357673c956f996d803d69727f0141b8ffd03bab0311e88c4f1dd +size 4831938536 diff --git a/model-00018-of-00051.safetensors b/model-00018-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..276c20a6eb694c73698086be8b9201a9a0ff4489 --- /dev/null +++ b/model-00018-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39c13e7518572244dc9e93d1089630c06f4c9029fbd58ded8b12115277362d24 +size 4831938544 diff --git a/model-00019-of-00051.safetensors b/model-00019-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..795e8a4fe20445ab152ab30464ee1d2fca2297bd --- /dev/null +++ b/model-00019-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6a366f8256fb5bcedfdb0611bc7645d562768b849685064187c0972fa12c2b5 +size 4882269832 diff --git a/model-00020-of-00051.safetensors b/model-00020-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ca63d7e326c381d6f3b68620974439deb991033d --- /dev/null +++ b/model-00020-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1308f110e24f879ae251f4883a81f2ff1277e3164f648fd1a6cd152ab92215b2 +size 4831889152 diff --git a/model-00021-of-00051.safetensors b/model-00021-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2eb2b237490113c34831305967b5fcf8b612530c --- /dev/null +++ b/model-00021-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0493cb438c1e685cc7d9e02bbed5f407a985d3b840516e8cb20826f0ca9e651f +size 4831938536 diff --git a/model-00022-of-00051.safetensors b/model-00022-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b547849d66d3425f1086a6367b3104427318fbe4 --- /dev/null +++ b/model-00022-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:997dd7713b0fef96bfc093dcd5af2af5f728cc0414527f38315b3a2c06abffbd +size 4831938544 diff --git a/model-00023-of-00051.safetensors b/model-00023-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e5c4f4f0fc76c54bd1c0e3e1aa45b7ccc7bf5344 --- /dev/null +++ b/model-00023-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24979f0c565f52d4bad962c8f084a51be136f8cae22d22d4c28d3092b86278bc +size 4882269832 diff --git a/model-00024-of-00051.safetensors b/model-00024-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1003d5cf3d8bc65dd94e68c108b0ece9a2cbed7b --- /dev/null +++ b/model-00024-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbfadc55a680f062e37ba037d947b5b469a87bd0c887241bc2a5d0dd1b95d947 +size 4831889152 diff --git a/model-00025-of-00051.safetensors b/model-00025-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..eaf910ccc3df837af441da08a2342e232477b615 --- /dev/null +++ b/model-00025-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c76e0ade97c05f0dd492fec9ec493361daab711dc66d3c0aab656b23453a228 +size 4831938536 diff --git a/model-00026-of-00051.safetensors b/model-00026-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f9069f79ed7443e86c6be5a73904226b2954c8a0 --- /dev/null +++ b/model-00026-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbaf346f7000b5f5ceb4250bfa82895db7afa83a775cd70d4a8e7bdf045ea20a +size 4831938544 diff --git a/model-00027-of-00051.safetensors b/model-00027-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a8163195c7841d6ae641957b5e7525685042c52d --- /dev/null +++ b/model-00027-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9d83013ef3d32979bf9b423df44f980e4469eb0537a9a11861429ec172d4e6e +size 4882269832 diff --git a/model-00028-of-00051.safetensors b/model-00028-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9660a4a659e663bdf27bd16a668fdc5d4f25ed87 --- /dev/null +++ b/model-00028-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb3b9ca2708d9ba426395dea3a29c89deb3dfb07d288c4be878748e891b09036 +size 4831889152 diff --git a/model-00029-of-00051.safetensors b/model-00029-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..519b10beea6018f97435fd01620c2d41ce76910f --- /dev/null +++ b/model-00029-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:497f1b367090b4715b579fd3fe54cc4f41f0ce3dd4fac049e8de55b8b231955c +size 4831938536 diff --git a/model-00030-of-00051.safetensors b/model-00030-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5d2a2464e1d77e8a5277e56bbd761ec91d7255e1 --- /dev/null +++ b/model-00030-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fb97017ee533a083443f49651d55ad9b8eda1db076219adcc1c7130aae2ab86 +size 4831938544 diff --git a/model-00031-of-00051.safetensors b/model-00031-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..767c9e72727a8f9b6cd070aac0bee4700721c8cd --- /dev/null +++ b/model-00031-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71ec77b32595d84fa32850aaf1737d1c09a788ab264653cb7051f33e053b771d +size 4882269832 diff --git a/model-00032-of-00051.safetensors b/model-00032-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fea1b5620361c7b4bf0c4de3437d1c109c952cde --- /dev/null +++ b/model-00032-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66d3b44283073113410da13e6c017c13cb086903b4f6a76d55fd520d09e976a4 +size 4831889152 diff --git a/model-00033-of-00051.safetensors b/model-00033-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..824390f7c6997a4813459d7a93f2090d1aa4bcb5 --- /dev/null +++ b/model-00033-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2c003949967e792a0afcfd0dd5e4839ce38e660075d66e8cbecb281e4e41bfc +size 4831938536 diff --git a/model-00034-of-00051.safetensors b/model-00034-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..93bbeb8e70e8c8b28cce3f5cdfa4dea651d6eaca --- /dev/null +++ b/model-00034-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b95cc980450fb8fdf5db7c005723848787cb75a0a273f2f3ffb747f6e76f7a6 +size 4831938544 diff --git a/model-00035-of-00051.safetensors b/model-00035-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..635aa7ecc37b93498a4301fc0bac0b90394f2827 --- /dev/null +++ b/model-00035-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d7631936b5c647425875ea2b391fe5d7eef5a8b47baeb31c93b69d851c19b46 +size 4882269832 diff --git a/model-00036-of-00051.safetensors b/model-00036-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7a3b2a9ae5c32899211997f9619a7e36391519b1 --- /dev/null +++ b/model-00036-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:508cd7180f09f51bb2d3ec8d1a50cd583f5c9b8729145a5e8c8f2d49147b2c13 +size 4831889152 diff --git a/model-00037-of-00051.safetensors b/model-00037-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4772957361213607922427f4716a9d262741f7d6 --- /dev/null +++ b/model-00037-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b90105ab811dc387bdcb528654f5b4f4d8e3617263c86e926e7fadca19504244 +size 4831938536 diff --git a/model-00038-of-00051.safetensors b/model-00038-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a465deabc08c00ac5c904898a0e76bbc2efaad3d --- /dev/null +++ b/model-00038-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4caa0b4214fb63ce87d91478435c3309d73a871a1f82ac379fa84950e30693be +size 4831938544 diff --git a/model-00039-of-00051.safetensors b/model-00039-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..98391492c745899f8461207453d7c3fdbd2434da --- /dev/null +++ b/model-00039-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:064593178f2adf6c555d5a2f7e8728065f7bf76b731bec8b4155de84e71a4257 +size 4882269832 diff --git a/model-00040-of-00051.safetensors b/model-00040-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dfe50ebfbd93d2b7b18b8b5b67295cf66fb26d69 --- /dev/null +++ b/model-00040-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59e5806b01703c633f51f837a4eb87b1a5ca91d1fce5e8614eba93ddf5d426cd +size 4831889152 diff --git a/model-00041-of-00051.safetensors b/model-00041-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0a4adb7f7497924934d2594334ea4564e697683d --- /dev/null +++ b/model-00041-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ab1bc80e4a9397d0a4257dd2a5eeff4d275e8b60795fc3aeaa7944588e94abd +size 4831938536 diff --git a/model-00042-of-00051.safetensors b/model-00042-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..11a444d6ff6ce13d102dd5bcfbc93db10fe30f85 --- /dev/null +++ b/model-00042-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9f53fb043febe0a22f90de61bf5206d8c1499cb27fd37f7caadb583d5fef48a +size 4831938544 diff --git a/model-00043-of-00051.safetensors b/model-00043-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8be257a7daf9ee7f4bcbba67cee627590a364522 --- /dev/null +++ b/model-00043-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f2b6967fb46849902c0d77be949e87c2afa054d8b0982276ca2f73f0e080583 +size 4882269832 diff --git a/model-00044-of-00051.safetensors b/model-00044-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d03f24e8e1ddfa529513370a6f835266ce6477c3 --- /dev/null +++ b/model-00044-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da5206d87e7d130f91bea5e01aecb88a8aaf2d8b9459abad51153ac192f38aa4 +size 4831889152 diff --git a/model-00045-of-00051.safetensors b/model-00045-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..91c08ae4fb66bc985646e20d067b61fcce5a7daf --- /dev/null +++ b/model-00045-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1e28395fefc0d79b4ad0bd24c6068fc29519bc020e5def8ca8a75a8a02107ee +size 4831938536 diff --git a/model-00046-of-00051.safetensors b/model-00046-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c4a104895cb57e941944a511ddd5f1ce7cad87dc --- /dev/null +++ b/model-00046-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de7ae846b6e4329836a2a0748a9bee00b64c5b62bb4524d1960902654e4a3896 +size 4831938544 diff --git a/model-00047-of-00051.safetensors b/model-00047-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b832b5ef1c772cc9b9b7419b347a506c3b3761b9 --- /dev/null +++ b/model-00047-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8c4f0442232bc7027d433c932462cc30f224752f320d234f416512ff56b551d +size 4882269832 diff --git a/model-00048-of-00051.safetensors b/model-00048-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bd067d68756f22941d348f6f58df588fbb189ad1 --- /dev/null +++ b/model-00048-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56aa1f2d733591e3909ab87b2f4b3c17256c79a02fe985fc86ec284f954e5397 +size 4831889152 diff --git a/model-00049-of-00051.safetensors b/model-00049-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cf7229d12eb9bdc38957f66f76817ebd1bd2cf00 --- /dev/null +++ b/model-00049-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92f3d4e51be9daf35438a320d4bdecb943d2505356a74a122bfda5aa25f23165 +size 4831938536 diff --git a/model-00050-of-00051.safetensors b/model-00050-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a9f89ae46ed94cd899d556f2e53ce7ca5062edde --- /dev/null +++ b/model-00050-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:206fbe6e9a276a1b363e760238b4aa7bc8d02d8aea0e1fcc8e0a9437e7eecbd4 +size 4831938544 diff --git a/model-00051-of-00051.safetensors b/model-00051-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f081a1b458a5d2b1a76c86bdedd053dcea01a3f0 --- /dev/null +++ b/model-00051-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01258b8f1ba8aa7066f0bfa1d5ea6fbc53aed1bacdcf4732f04e2eec8614d5e0 +size 2919310112 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..43cb90c23f416f3bb649d50540706cf4384b2907 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,802 @@ +{ + "metadata": { + "total_size": 245220139008 + }, + "weight_map": { + "lm_head.weight": "model-00051-of-00051.safetensors", + "model.embed_tokens.weight": "model-00001-of-00051.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00051.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00051.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00051.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00051.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00051.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00051.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00051.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00051.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00051.safetensors", + "model.layers.1.input_layernorm.weight": "model-00002-of-00051.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00002-of-00051.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00051.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00002-of-00051.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00002-of-00051.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00051.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00051.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00051.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00051.safetensors", + "model.layers.10.input_layernorm.weight": "model-00007-of-00051.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00007-of-00051.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00007-of-00051.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00007-of-00051.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00007-of-00051.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00006-of-00051.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00006-of-00051.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00006-of-00051.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00006-of-00051.safetensors", + "model.layers.11.input_layernorm.weight": "model-00007-of-00051.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00007-of-00051.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00007-of-00051.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00007-of-00051.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00007-of-00051.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00007-of-00051.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00007-of-00051.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00007-of-00051.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00007-of-00051.safetensors", + "model.layers.12.input_layernorm.weight": "model-00008-of-00051.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00008-of-00051.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00008-of-00051.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00008-of-00051.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00008-of-00051.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00008-of-00051.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00008-of-00051.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00008-of-00051.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00008-of-00051.safetensors", + "model.layers.13.input_layernorm.weight": "model-00009-of-00051.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00009-of-00051.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00008-of-00051.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00008-of-00051.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00009-of-00051.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00008-of-00051.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00008-of-00051.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00008-of-00051.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00008-of-00051.safetensors", + "model.layers.14.input_layernorm.weight": "model-00009-of-00051.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00009-of-00051.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00009-of-00051.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00009-of-00051.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00009-of-00051.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00009-of-00051.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00009-of-00051.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00009-of-00051.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00009-of-00051.safetensors", + "model.layers.15.input_layernorm.weight": "model-00010-of-00051.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00010-of-00051.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00009-of-00051.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00010-of-00051.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00010-of-00051.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00009-of-00051.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00009-of-00051.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00009-of-00051.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00009-of-00051.safetensors", + "model.layers.16.input_layernorm.weight": "model-00010-of-00051.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00010-of-00051.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00010-of-00051.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00010-of-00051.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00010-of-00051.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00010-of-00051.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00010-of-00051.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00010-of-00051.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00010-of-00051.safetensors", + "model.layers.17.input_layernorm.weight": "model-00011-of-00051.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00011-of-00051.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00011-of-00051.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00011-of-00051.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00011-of-00051.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00010-of-00051.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00010-of-00051.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00010-of-00051.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00010-of-00051.safetensors", + "model.layers.18.input_layernorm.weight": "model-00011-of-00051.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00011-of-00051.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00011-of-00051.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00011-of-00051.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00011-of-00051.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00011-of-00051.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00011-of-00051.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00011-of-00051.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00011-of-00051.safetensors", + "model.layers.19.input_layernorm.weight": "model-00012-of-00051.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00012-of-00051.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00012-of-00051.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00012-of-00051.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00012-of-00051.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00012-of-00051.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00012-of-00051.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00012-of-00051.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00012-of-00051.safetensors", + "model.layers.2.input_layernorm.weight": "model-00002-of-00051.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00002-of-00051.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00002-of-00051.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00002-of-00051.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00002-of-00051.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00002-of-00051.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00002-of-00051.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00002-of-00051.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00002-of-00051.safetensors", + "model.layers.20.input_layernorm.weight": "model-00013-of-00051.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00013-of-00051.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00012-of-00051.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00012-of-00051.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00013-of-00051.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00012-of-00051.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00012-of-00051.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00012-of-00051.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00012-of-00051.safetensors", + "model.layers.21.input_layernorm.weight": "model-00013-of-00051.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00013-of-00051.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00013-of-00051.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00013-of-00051.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00013-of-00051.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00013-of-00051.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00013-of-00051.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00013-of-00051.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00013-of-00051.safetensors", + "model.layers.22.input_layernorm.weight": "model-00014-of-00051.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00014-of-00051.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00013-of-00051.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00014-of-00051.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00014-of-00051.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00013-of-00051.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00013-of-00051.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00013-of-00051.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00013-of-00051.safetensors", + "model.layers.23.input_layernorm.weight": "model-00014-of-00051.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00014-of-00051.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00014-of-00051.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00014-of-00051.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00014-of-00051.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00014-of-00051.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00014-of-00051.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00014-of-00051.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00014-of-00051.safetensors", + "model.layers.24.input_layernorm.weight": "model-00015-of-00051.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00015-of-00051.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00015-of-00051.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00015-of-00051.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00015-of-00051.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00014-of-00051.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00014-of-00051.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00014-of-00051.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00014-of-00051.safetensors", + "model.layers.25.input_layernorm.weight": "model-00015-of-00051.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00015-of-00051.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00015-of-00051.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00015-of-00051.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00015-of-00051.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00015-of-00051.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00015-of-00051.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00015-of-00051.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00015-of-00051.safetensors", + "model.layers.26.input_layernorm.weight": "model-00016-of-00051.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00016-of-00051.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00016-of-00051.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00016-of-00051.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00016-of-00051.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00016-of-00051.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00016-of-00051.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00016-of-00051.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00016-of-00051.safetensors", + "model.layers.27.input_layernorm.weight": "model-00017-of-00051.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00017-of-00051.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00016-of-00051.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00016-of-00051.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00017-of-00051.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00016-of-00051.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00016-of-00051.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00016-of-00051.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00016-of-00051.safetensors", + "model.layers.28.input_layernorm.weight": "model-00017-of-00051.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00017-of-00051.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00017-of-00051.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00017-of-00051.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00017-of-00051.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00017-of-00051.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00017-of-00051.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00017-of-00051.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00017-of-00051.safetensors", + "model.layers.29.input_layernorm.weight": "model-00018-of-00051.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00018-of-00051.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00017-of-00051.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00018-of-00051.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00018-of-00051.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00017-of-00051.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00017-of-00051.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00017-of-00051.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00017-of-00051.safetensors", + "model.layers.3.input_layernorm.weight": "model-00003-of-00051.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00003-of-00051.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00003-of-00051.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00003-of-00051.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00003-of-00051.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00002-of-00051.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00002-of-00051.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00002-of-00051.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00002-of-00051.safetensors", + "model.layers.30.input_layernorm.weight": "model-00018-of-00051.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00018-of-00051.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00018-of-00051.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00018-of-00051.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00018-of-00051.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00018-of-00051.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00018-of-00051.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00018-of-00051.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00018-of-00051.safetensors", + "model.layers.31.input_layernorm.weight": "model-00019-of-00051.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00019-of-00051.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00019-of-00051.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00019-of-00051.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00019-of-00051.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00018-of-00051.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00018-of-00051.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00018-of-00051.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00018-of-00051.safetensors", + "model.layers.32.input_layernorm.weight": "model-00019-of-00051.safetensors", + "model.layers.32.mlp.down_proj.weight": "model-00019-of-00051.safetensors", + "model.layers.32.mlp.gate_proj.weight": "model-00019-of-00051.safetensors", + "model.layers.32.mlp.up_proj.weight": "model-00019-of-00051.safetensors", + "model.layers.32.post_attention_layernorm.weight": "model-00019-of-00051.safetensors", + "model.layers.32.self_attn.k_proj.weight": "model-00019-of-00051.safetensors", + "model.layers.32.self_attn.o_proj.weight": "model-00019-of-00051.safetensors", + "model.layers.32.self_attn.q_proj.weight": "model-00019-of-00051.safetensors", + "model.layers.32.self_attn.v_proj.weight": "model-00019-of-00051.safetensors", + "model.layers.33.input_layernorm.weight": "model-00020-of-00051.safetensors", + "model.layers.33.mlp.down_proj.weight": "model-00020-of-00051.safetensors", + "model.layers.33.mlp.gate_proj.weight": "model-00020-of-00051.safetensors", + "model.layers.33.mlp.up_proj.weight": "model-00020-of-00051.safetensors", + "model.layers.33.post_attention_layernorm.weight": "model-00020-of-00051.safetensors", + "model.layers.33.self_attn.k_proj.weight": "model-00020-of-00051.safetensors", + "model.layers.33.self_attn.o_proj.weight": "model-00020-of-00051.safetensors", + "model.layers.33.self_attn.q_proj.weight": "model-00020-of-00051.safetensors", + "model.layers.33.self_attn.v_proj.weight": "model-00020-of-00051.safetensors", + "model.layers.34.input_layernorm.weight": "model-00021-of-00051.safetensors", + "model.layers.34.mlp.down_proj.weight": "model-00021-of-00051.safetensors", + "model.layers.34.mlp.gate_proj.weight": "model-00020-of-00051.safetensors", + "model.layers.34.mlp.up_proj.weight": "model-00020-of-00051.safetensors", + "model.layers.34.post_attention_layernorm.weight": "model-00021-of-00051.safetensors", + "model.layers.34.self_attn.k_proj.weight": "model-00020-of-00051.safetensors", + "model.layers.34.self_attn.o_proj.weight": "model-00020-of-00051.safetensors", + "model.layers.34.self_attn.q_proj.weight": "model-00020-of-00051.safetensors", + "model.layers.34.self_attn.v_proj.weight": "model-00020-of-00051.safetensors", + "model.layers.35.input_layernorm.weight": "model-00021-of-00051.safetensors", + "model.layers.35.mlp.down_proj.weight": "model-00021-of-00051.safetensors", + "model.layers.35.mlp.gate_proj.weight": "model-00021-of-00051.safetensors", + "model.layers.35.mlp.up_proj.weight": "model-00021-of-00051.safetensors", + "model.layers.35.post_attention_layernorm.weight": "model-00021-of-00051.safetensors", + "model.layers.35.self_attn.k_proj.weight": "model-00021-of-00051.safetensors", + "model.layers.35.self_attn.o_proj.weight": "model-00021-of-00051.safetensors", + "model.layers.35.self_attn.q_proj.weight": "model-00021-of-00051.safetensors", + "model.layers.35.self_attn.v_proj.weight": "model-00021-of-00051.safetensors", + "model.layers.36.input_layernorm.weight": "model-00022-of-00051.safetensors", + "model.layers.36.mlp.down_proj.weight": "model-00022-of-00051.safetensors", + "model.layers.36.mlp.gate_proj.weight": "model-00021-of-00051.safetensors", + "model.layers.36.mlp.up_proj.weight": "model-00022-of-00051.safetensors", + "model.layers.36.post_attention_layernorm.weight": "model-00022-of-00051.safetensors", + "model.layers.36.self_attn.k_proj.weight": "model-00021-of-00051.safetensors", + "model.layers.36.self_attn.o_proj.weight": "model-00021-of-00051.safetensors", + "model.layers.36.self_attn.q_proj.weight": "model-00021-of-00051.safetensors", + "model.layers.36.self_attn.v_proj.weight": "model-00021-of-00051.safetensors", + "model.layers.37.input_layernorm.weight": "model-00022-of-00051.safetensors", + "model.layers.37.mlp.down_proj.weight": "model-00022-of-00051.safetensors", + "model.layers.37.mlp.gate_proj.weight": "model-00022-of-00051.safetensors", + "model.layers.37.mlp.up_proj.weight": "model-00022-of-00051.safetensors", + "model.layers.37.post_attention_layernorm.weight": "model-00022-of-00051.safetensors", + "model.layers.37.self_attn.k_proj.weight": "model-00022-of-00051.safetensors", + "model.layers.37.self_attn.o_proj.weight": "model-00022-of-00051.safetensors", + "model.layers.37.self_attn.q_proj.weight": "model-00022-of-00051.safetensors", + "model.layers.37.self_attn.v_proj.weight": "model-00022-of-00051.safetensors", + "model.layers.38.input_layernorm.weight": "model-00023-of-00051.safetensors", + "model.layers.38.mlp.down_proj.weight": "model-00023-of-00051.safetensors", + "model.layers.38.mlp.gate_proj.weight": "model-00023-of-00051.safetensors", + "model.layers.38.mlp.up_proj.weight": "model-00023-of-00051.safetensors", + "model.layers.38.post_attention_layernorm.weight": "model-00023-of-00051.safetensors", + "model.layers.38.self_attn.k_proj.weight": "model-00022-of-00051.safetensors", + "model.layers.38.self_attn.o_proj.weight": "model-00022-of-00051.safetensors", + "model.layers.38.self_attn.q_proj.weight": "model-00022-of-00051.safetensors", + "model.layers.38.self_attn.v_proj.weight": "model-00022-of-00051.safetensors", + "model.layers.39.input_layernorm.weight": "model-00023-of-00051.safetensors", + "model.layers.39.mlp.down_proj.weight": "model-00023-of-00051.safetensors", + "model.layers.39.mlp.gate_proj.weight": "model-00023-of-00051.safetensors", + "model.layers.39.mlp.up_proj.weight": "model-00023-of-00051.safetensors", + "model.layers.39.post_attention_layernorm.weight": "model-00023-of-00051.safetensors", + "model.layers.39.self_attn.k_proj.weight": "model-00023-of-00051.safetensors", + "model.layers.39.self_attn.o_proj.weight": "model-00023-of-00051.safetensors", + "model.layers.39.self_attn.q_proj.weight": "model-00023-of-00051.safetensors", + "model.layers.39.self_attn.v_proj.weight": "model-00023-of-00051.safetensors", + "model.layers.4.input_layernorm.weight": "model-00003-of-00051.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00003-of-00051.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00003-of-00051.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00003-of-00051.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00003-of-00051.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00003-of-00051.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00003-of-00051.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00003-of-00051.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00003-of-00051.safetensors", + "model.layers.40.input_layernorm.weight": "model-00024-of-00051.safetensors", + "model.layers.40.mlp.down_proj.weight": "model-00024-of-00051.safetensors", + "model.layers.40.mlp.gate_proj.weight": "model-00024-of-00051.safetensors", + "model.layers.40.mlp.up_proj.weight": "model-00024-of-00051.safetensors", + "model.layers.40.post_attention_layernorm.weight": "model-00024-of-00051.safetensors", + "model.layers.40.self_attn.k_proj.weight": "model-00024-of-00051.safetensors", + "model.layers.40.self_attn.o_proj.weight": "model-00024-of-00051.safetensors", + "model.layers.40.self_attn.q_proj.weight": "model-00024-of-00051.safetensors", + "model.layers.40.self_attn.v_proj.weight": "model-00024-of-00051.safetensors", + "model.layers.41.input_layernorm.weight": "model-00025-of-00051.safetensors", + "model.layers.41.mlp.down_proj.weight": "model-00025-of-00051.safetensors", + "model.layers.41.mlp.gate_proj.weight": "model-00024-of-00051.safetensors", + "model.layers.41.mlp.up_proj.weight": "model-00024-of-00051.safetensors", + "model.layers.41.post_attention_layernorm.weight": "model-00025-of-00051.safetensors", + "model.layers.41.self_attn.k_proj.weight": "model-00024-of-00051.safetensors", + "model.layers.41.self_attn.o_proj.weight": "model-00024-of-00051.safetensors", + "model.layers.41.self_attn.q_proj.weight": "model-00024-of-00051.safetensors", + "model.layers.41.self_attn.v_proj.weight": "model-00024-of-00051.safetensors", + "model.layers.42.input_layernorm.weight": "model-00025-of-00051.safetensors", + "model.layers.42.mlp.down_proj.weight": "model-00025-of-00051.safetensors", + "model.layers.42.mlp.gate_proj.weight": "model-00025-of-00051.safetensors", + "model.layers.42.mlp.up_proj.weight": "model-00025-of-00051.safetensors", + "model.layers.42.post_attention_layernorm.weight": "model-00025-of-00051.safetensors", + "model.layers.42.self_attn.k_proj.weight": "model-00025-of-00051.safetensors", + "model.layers.42.self_attn.o_proj.weight": "model-00025-of-00051.safetensors", + "model.layers.42.self_attn.q_proj.weight": "model-00025-of-00051.safetensors", + "model.layers.42.self_attn.v_proj.weight": "model-00025-of-00051.safetensors", + "model.layers.43.input_layernorm.weight": "model-00026-of-00051.safetensors", + "model.layers.43.mlp.down_proj.weight": "model-00026-of-00051.safetensors", + "model.layers.43.mlp.gate_proj.weight": "model-00025-of-00051.safetensors", + "model.layers.43.mlp.up_proj.weight": "model-00026-of-00051.safetensors", + "model.layers.43.post_attention_layernorm.weight": "model-00026-of-00051.safetensors", + "model.layers.43.self_attn.k_proj.weight": "model-00025-of-00051.safetensors", + "model.layers.43.self_attn.o_proj.weight": "model-00025-of-00051.safetensors", + "model.layers.43.self_attn.q_proj.weight": "model-00025-of-00051.safetensors", + "model.layers.43.self_attn.v_proj.weight": "model-00025-of-00051.safetensors", + "model.layers.44.input_layernorm.weight": "model-00026-of-00051.safetensors", + "model.layers.44.mlp.down_proj.weight": "model-00026-of-00051.safetensors", + "model.layers.44.mlp.gate_proj.weight": "model-00026-of-00051.safetensors", + "model.layers.44.mlp.up_proj.weight": "model-00026-of-00051.safetensors", + "model.layers.44.post_attention_layernorm.weight": "model-00026-of-00051.safetensors", + "model.layers.44.self_attn.k_proj.weight": "model-00026-of-00051.safetensors", + "model.layers.44.self_attn.o_proj.weight": "model-00026-of-00051.safetensors", + "model.layers.44.self_attn.q_proj.weight": "model-00026-of-00051.safetensors", + "model.layers.44.self_attn.v_proj.weight": "model-00026-of-00051.safetensors", + "model.layers.45.input_layernorm.weight": "model-00027-of-00051.safetensors", + "model.layers.45.mlp.down_proj.weight": "model-00027-of-00051.safetensors", + "model.layers.45.mlp.gate_proj.weight": "model-00027-of-00051.safetensors", + "model.layers.45.mlp.up_proj.weight": "model-00027-of-00051.safetensors", + "model.layers.45.post_attention_layernorm.weight": "model-00027-of-00051.safetensors", + "model.layers.45.self_attn.k_proj.weight": "model-00026-of-00051.safetensors", + "model.layers.45.self_attn.o_proj.weight": "model-00026-of-00051.safetensors", + "model.layers.45.self_attn.q_proj.weight": "model-00026-of-00051.safetensors", + "model.layers.45.self_attn.v_proj.weight": "model-00026-of-00051.safetensors", + "model.layers.46.input_layernorm.weight": "model-00027-of-00051.safetensors", + "model.layers.46.mlp.down_proj.weight": "model-00027-of-00051.safetensors", + "model.layers.46.mlp.gate_proj.weight": "model-00027-of-00051.safetensors", + "model.layers.46.mlp.up_proj.weight": "model-00027-of-00051.safetensors", + "model.layers.46.post_attention_layernorm.weight": "model-00027-of-00051.safetensors", + "model.layers.46.self_attn.k_proj.weight": "model-00027-of-00051.safetensors", + "model.layers.46.self_attn.o_proj.weight": "model-00027-of-00051.safetensors", + "model.layers.46.self_attn.q_proj.weight": "model-00027-of-00051.safetensors", + "model.layers.46.self_attn.v_proj.weight": "model-00027-of-00051.safetensors", + "model.layers.47.input_layernorm.weight": "model-00028-of-00051.safetensors", + "model.layers.47.mlp.down_proj.weight": "model-00028-of-00051.safetensors", + "model.layers.47.mlp.gate_proj.weight": "model-00028-of-00051.safetensors", + "model.layers.47.mlp.up_proj.weight": "model-00028-of-00051.safetensors", + "model.layers.47.post_attention_layernorm.weight": "model-00028-of-00051.safetensors", + "model.layers.47.self_attn.k_proj.weight": "model-00028-of-00051.safetensors", + "model.layers.47.self_attn.o_proj.weight": "model-00028-of-00051.safetensors", + "model.layers.47.self_attn.q_proj.weight": "model-00028-of-00051.safetensors", + "model.layers.47.self_attn.v_proj.weight": "model-00028-of-00051.safetensors", + "model.layers.48.input_layernorm.weight": "model-00029-of-00051.safetensors", + "model.layers.48.mlp.down_proj.weight": "model-00029-of-00051.safetensors", + "model.layers.48.mlp.gate_proj.weight": "model-00028-of-00051.safetensors", + "model.layers.48.mlp.up_proj.weight": "model-00028-of-00051.safetensors", + "model.layers.48.post_attention_layernorm.weight": "model-00029-of-00051.safetensors", + "model.layers.48.self_attn.k_proj.weight": "model-00028-of-00051.safetensors", + "model.layers.48.self_attn.o_proj.weight": "model-00028-of-00051.safetensors", + "model.layers.48.self_attn.q_proj.weight": "model-00028-of-00051.safetensors", + "model.layers.48.self_attn.v_proj.weight": "model-00028-of-00051.safetensors", + "model.layers.49.input_layernorm.weight": "model-00029-of-00051.safetensors", + "model.layers.49.mlp.down_proj.weight": "model-00029-of-00051.safetensors", + "model.layers.49.mlp.gate_proj.weight": "model-00029-of-00051.safetensors", + "model.layers.49.mlp.up_proj.weight": "model-00029-of-00051.safetensors", + "model.layers.49.post_attention_layernorm.weight": "model-00029-of-00051.safetensors", + "model.layers.49.self_attn.k_proj.weight": "model-00029-of-00051.safetensors", + "model.layers.49.self_attn.o_proj.weight": "model-00029-of-00051.safetensors", + "model.layers.49.self_attn.q_proj.weight": "model-00029-of-00051.safetensors", + "model.layers.49.self_attn.v_proj.weight": "model-00029-of-00051.safetensors", + "model.layers.5.input_layernorm.weight": "model-00004-of-00051.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00004-of-00051.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00004-of-00051.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00004-of-00051.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00004-of-00051.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00004-of-00051.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00004-of-00051.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00004-of-00051.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00004-of-00051.safetensors", + "model.layers.50.input_layernorm.weight": "model-00030-of-00051.safetensors", + "model.layers.50.mlp.down_proj.weight": "model-00030-of-00051.safetensors", + "model.layers.50.mlp.gate_proj.weight": "model-00029-of-00051.safetensors", + "model.layers.50.mlp.up_proj.weight": "model-00030-of-00051.safetensors", + "model.layers.50.post_attention_layernorm.weight": "model-00030-of-00051.safetensors", + "model.layers.50.self_attn.k_proj.weight": "model-00029-of-00051.safetensors", + "model.layers.50.self_attn.o_proj.weight": "model-00029-of-00051.safetensors", + "model.layers.50.self_attn.q_proj.weight": "model-00029-of-00051.safetensors", + "model.layers.50.self_attn.v_proj.weight": "model-00029-of-00051.safetensors", + "model.layers.51.input_layernorm.weight": "model-00030-of-00051.safetensors", + "model.layers.51.mlp.down_proj.weight": "model-00030-of-00051.safetensors", + "model.layers.51.mlp.gate_proj.weight": "model-00030-of-00051.safetensors", + "model.layers.51.mlp.up_proj.weight": "model-00030-of-00051.safetensors", + "model.layers.51.post_attention_layernorm.weight": "model-00030-of-00051.safetensors", + "model.layers.51.self_attn.k_proj.weight": "model-00030-of-00051.safetensors", + "model.layers.51.self_attn.o_proj.weight": "model-00030-of-00051.safetensors", + "model.layers.51.self_attn.q_proj.weight": "model-00030-of-00051.safetensors", + "model.layers.51.self_attn.v_proj.weight": "model-00030-of-00051.safetensors", + "model.layers.52.input_layernorm.weight": "model-00031-of-00051.safetensors", + "model.layers.52.mlp.down_proj.weight": "model-00031-of-00051.safetensors", + "model.layers.52.mlp.gate_proj.weight": "model-00031-of-00051.safetensors", + "model.layers.52.mlp.up_proj.weight": "model-00031-of-00051.safetensors", + "model.layers.52.post_attention_layernorm.weight": "model-00031-of-00051.safetensors", + "model.layers.52.self_attn.k_proj.weight": "model-00030-of-00051.safetensors", + "model.layers.52.self_attn.o_proj.weight": "model-00030-of-00051.safetensors", + "model.layers.52.self_attn.q_proj.weight": "model-00030-of-00051.safetensors", + "model.layers.52.self_attn.v_proj.weight": "model-00030-of-00051.safetensors", + "model.layers.53.input_layernorm.weight": "model-00031-of-00051.safetensors", + "model.layers.53.mlp.down_proj.weight": "model-00031-of-00051.safetensors", + "model.layers.53.mlp.gate_proj.weight": "model-00031-of-00051.safetensors", + "model.layers.53.mlp.up_proj.weight": "model-00031-of-00051.safetensors", + "model.layers.53.post_attention_layernorm.weight": "model-00031-of-00051.safetensors", + "model.layers.53.self_attn.k_proj.weight": "model-00031-of-00051.safetensors", + "model.layers.53.self_attn.o_proj.weight": "model-00031-of-00051.safetensors", + "model.layers.53.self_attn.q_proj.weight": "model-00031-of-00051.safetensors", + "model.layers.53.self_attn.v_proj.weight": "model-00031-of-00051.safetensors", + "model.layers.54.input_layernorm.weight": "model-00032-of-00051.safetensors", + "model.layers.54.mlp.down_proj.weight": "model-00032-of-00051.safetensors", + "model.layers.54.mlp.gate_proj.weight": "model-00032-of-00051.safetensors", + "model.layers.54.mlp.up_proj.weight": "model-00032-of-00051.safetensors", + "model.layers.54.post_attention_layernorm.weight": "model-00032-of-00051.safetensors", + "model.layers.54.self_attn.k_proj.weight": "model-00032-of-00051.safetensors", + "model.layers.54.self_attn.o_proj.weight": "model-00032-of-00051.safetensors", + "model.layers.54.self_attn.q_proj.weight": "model-00032-of-00051.safetensors", + "model.layers.54.self_attn.v_proj.weight": "model-00032-of-00051.safetensors", + "model.layers.55.input_layernorm.weight": "model-00033-of-00051.safetensors", + "model.layers.55.mlp.down_proj.weight": "model-00033-of-00051.safetensors", + "model.layers.55.mlp.gate_proj.weight": "model-00032-of-00051.safetensors", + "model.layers.55.mlp.up_proj.weight": "model-00032-of-00051.safetensors", + "model.layers.55.post_attention_layernorm.weight": "model-00033-of-00051.safetensors", + "model.layers.55.self_attn.k_proj.weight": "model-00032-of-00051.safetensors", + "model.layers.55.self_attn.o_proj.weight": "model-00032-of-00051.safetensors", + "model.layers.55.self_attn.q_proj.weight": "model-00032-of-00051.safetensors", + "model.layers.55.self_attn.v_proj.weight": "model-00032-of-00051.safetensors", + "model.layers.56.input_layernorm.weight": "model-00033-of-00051.safetensors", + "model.layers.56.mlp.down_proj.weight": "model-00033-of-00051.safetensors", + "model.layers.56.mlp.gate_proj.weight": "model-00033-of-00051.safetensors", + "model.layers.56.mlp.up_proj.weight": "model-00033-of-00051.safetensors", + "model.layers.56.post_attention_layernorm.weight": "model-00033-of-00051.safetensors", + "model.layers.56.self_attn.k_proj.weight": "model-00033-of-00051.safetensors", + "model.layers.56.self_attn.o_proj.weight": "model-00033-of-00051.safetensors", + "model.layers.56.self_attn.q_proj.weight": "model-00033-of-00051.safetensors", + "model.layers.56.self_attn.v_proj.weight": "model-00033-of-00051.safetensors", + "model.layers.57.input_layernorm.weight": "model-00034-of-00051.safetensors", + "model.layers.57.mlp.down_proj.weight": "model-00034-of-00051.safetensors", + "model.layers.57.mlp.gate_proj.weight": "model-00033-of-00051.safetensors", + "model.layers.57.mlp.up_proj.weight": "model-00034-of-00051.safetensors", + "model.layers.57.post_attention_layernorm.weight": "model-00034-of-00051.safetensors", + "model.layers.57.self_attn.k_proj.weight": "model-00033-of-00051.safetensors", + "model.layers.57.self_attn.o_proj.weight": "model-00033-of-00051.safetensors", + "model.layers.57.self_attn.q_proj.weight": "model-00033-of-00051.safetensors", + "model.layers.57.self_attn.v_proj.weight": "model-00033-of-00051.safetensors", + "model.layers.58.input_layernorm.weight": "model-00034-of-00051.safetensors", + "model.layers.58.mlp.down_proj.weight": "model-00034-of-00051.safetensors", + "model.layers.58.mlp.gate_proj.weight": "model-00034-of-00051.safetensors", + "model.layers.58.mlp.up_proj.weight": "model-00034-of-00051.safetensors", + "model.layers.58.post_attention_layernorm.weight": "model-00034-of-00051.safetensors", + "model.layers.58.self_attn.k_proj.weight": "model-00034-of-00051.safetensors", + "model.layers.58.self_attn.o_proj.weight": "model-00034-of-00051.safetensors", + "model.layers.58.self_attn.q_proj.weight": "model-00034-of-00051.safetensors", + "model.layers.58.self_attn.v_proj.weight": "model-00034-of-00051.safetensors", + "model.layers.59.input_layernorm.weight": "model-00035-of-00051.safetensors", + "model.layers.59.mlp.down_proj.weight": "model-00035-of-00051.safetensors", + "model.layers.59.mlp.gate_proj.weight": "model-00035-of-00051.safetensors", + "model.layers.59.mlp.up_proj.weight": "model-00035-of-00051.safetensors", + "model.layers.59.post_attention_layernorm.weight": "model-00035-of-00051.safetensors", + "model.layers.59.self_attn.k_proj.weight": "model-00034-of-00051.safetensors", + "model.layers.59.self_attn.o_proj.weight": "model-00034-of-00051.safetensors", + "model.layers.59.self_attn.q_proj.weight": "model-00034-of-00051.safetensors", + "model.layers.59.self_attn.v_proj.weight": "model-00034-of-00051.safetensors", + "model.layers.6.input_layernorm.weight": "model-00005-of-00051.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00005-of-00051.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00004-of-00051.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00004-of-00051.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00005-of-00051.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00004-of-00051.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00004-of-00051.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00004-of-00051.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00004-of-00051.safetensors", + "model.layers.60.input_layernorm.weight": "model-00035-of-00051.safetensors", + "model.layers.60.mlp.down_proj.weight": "model-00035-of-00051.safetensors", + "model.layers.60.mlp.gate_proj.weight": "model-00035-of-00051.safetensors", + "model.layers.60.mlp.up_proj.weight": "model-00035-of-00051.safetensors", + "model.layers.60.post_attention_layernorm.weight": "model-00035-of-00051.safetensors", + "model.layers.60.self_attn.k_proj.weight": "model-00035-of-00051.safetensors", + "model.layers.60.self_attn.o_proj.weight": "model-00035-of-00051.safetensors", + "model.layers.60.self_attn.q_proj.weight": "model-00035-of-00051.safetensors", + "model.layers.60.self_attn.v_proj.weight": "model-00035-of-00051.safetensors", + "model.layers.61.input_layernorm.weight": "model-00036-of-00051.safetensors", + "model.layers.61.mlp.down_proj.weight": "model-00036-of-00051.safetensors", + "model.layers.61.mlp.gate_proj.weight": "model-00036-of-00051.safetensors", + "model.layers.61.mlp.up_proj.weight": "model-00036-of-00051.safetensors", + "model.layers.61.post_attention_layernorm.weight": "model-00036-of-00051.safetensors", + "model.layers.61.self_attn.k_proj.weight": "model-00036-of-00051.safetensors", + "model.layers.61.self_attn.o_proj.weight": "model-00036-of-00051.safetensors", + "model.layers.61.self_attn.q_proj.weight": "model-00036-of-00051.safetensors", + "model.layers.61.self_attn.v_proj.weight": "model-00036-of-00051.safetensors", + "model.layers.62.input_layernorm.weight": "model-00037-of-00051.safetensors", + "model.layers.62.mlp.down_proj.weight": "model-00037-of-00051.safetensors", + "model.layers.62.mlp.gate_proj.weight": "model-00036-of-00051.safetensors", + "model.layers.62.mlp.up_proj.weight": "model-00036-of-00051.safetensors", + "model.layers.62.post_attention_layernorm.weight": "model-00037-of-00051.safetensors", + "model.layers.62.self_attn.k_proj.weight": "model-00036-of-00051.safetensors", + "model.layers.62.self_attn.o_proj.weight": "model-00036-of-00051.safetensors", + "model.layers.62.self_attn.q_proj.weight": "model-00036-of-00051.safetensors", + "model.layers.62.self_attn.v_proj.weight": "model-00036-of-00051.safetensors", + "model.layers.63.input_layernorm.weight": "model-00037-of-00051.safetensors", + "model.layers.63.mlp.down_proj.weight": "model-00037-of-00051.safetensors", + "model.layers.63.mlp.gate_proj.weight": "model-00037-of-00051.safetensors", + "model.layers.63.mlp.up_proj.weight": "model-00037-of-00051.safetensors", + "model.layers.63.post_attention_layernorm.weight": "model-00037-of-00051.safetensors", + "model.layers.63.self_attn.k_proj.weight": "model-00037-of-00051.safetensors", + "model.layers.63.self_attn.o_proj.weight": "model-00037-of-00051.safetensors", + "model.layers.63.self_attn.q_proj.weight": "model-00037-of-00051.safetensors", + "model.layers.63.self_attn.v_proj.weight": "model-00037-of-00051.safetensors", + "model.layers.64.input_layernorm.weight": "model-00038-of-00051.safetensors", + "model.layers.64.mlp.down_proj.weight": "model-00038-of-00051.safetensors", + "model.layers.64.mlp.gate_proj.weight": "model-00037-of-00051.safetensors", + "model.layers.64.mlp.up_proj.weight": "model-00038-of-00051.safetensors", + "model.layers.64.post_attention_layernorm.weight": "model-00038-of-00051.safetensors", + "model.layers.64.self_attn.k_proj.weight": "model-00037-of-00051.safetensors", + "model.layers.64.self_attn.o_proj.weight": "model-00037-of-00051.safetensors", + "model.layers.64.self_attn.q_proj.weight": "model-00037-of-00051.safetensors", + "model.layers.64.self_attn.v_proj.weight": "model-00037-of-00051.safetensors", + "model.layers.65.input_layernorm.weight": "model-00038-of-00051.safetensors", + "model.layers.65.mlp.down_proj.weight": "model-00038-of-00051.safetensors", + "model.layers.65.mlp.gate_proj.weight": "model-00038-of-00051.safetensors", + "model.layers.65.mlp.up_proj.weight": "model-00038-of-00051.safetensors", + "model.layers.65.post_attention_layernorm.weight": "model-00038-of-00051.safetensors", + "model.layers.65.self_attn.k_proj.weight": "model-00038-of-00051.safetensors", + "model.layers.65.self_attn.o_proj.weight": "model-00038-of-00051.safetensors", + "model.layers.65.self_attn.q_proj.weight": "model-00038-of-00051.safetensors", + "model.layers.65.self_attn.v_proj.weight": "model-00038-of-00051.safetensors", + "model.layers.66.input_layernorm.weight": "model-00039-of-00051.safetensors", + "model.layers.66.mlp.down_proj.weight": "model-00039-of-00051.safetensors", + "model.layers.66.mlp.gate_proj.weight": "model-00039-of-00051.safetensors", + "model.layers.66.mlp.up_proj.weight": "model-00039-of-00051.safetensors", + "model.layers.66.post_attention_layernorm.weight": "model-00039-of-00051.safetensors", + "model.layers.66.self_attn.k_proj.weight": "model-00038-of-00051.safetensors", + "model.layers.66.self_attn.o_proj.weight": "model-00038-of-00051.safetensors", + "model.layers.66.self_attn.q_proj.weight": "model-00038-of-00051.safetensors", + "model.layers.66.self_attn.v_proj.weight": "model-00038-of-00051.safetensors", + "model.layers.67.input_layernorm.weight": "model-00039-of-00051.safetensors", + "model.layers.67.mlp.down_proj.weight": "model-00039-of-00051.safetensors", + "model.layers.67.mlp.gate_proj.weight": "model-00039-of-00051.safetensors", + "model.layers.67.mlp.up_proj.weight": "model-00039-of-00051.safetensors", + "model.layers.67.post_attention_layernorm.weight": "model-00039-of-00051.safetensors", + "model.layers.67.self_attn.k_proj.weight": "model-00039-of-00051.safetensors", + "model.layers.67.self_attn.o_proj.weight": "model-00039-of-00051.safetensors", + "model.layers.67.self_attn.q_proj.weight": "model-00039-of-00051.safetensors", + "model.layers.67.self_attn.v_proj.weight": "model-00039-of-00051.safetensors", + "model.layers.68.input_layernorm.weight": "model-00040-of-00051.safetensors", + "model.layers.68.mlp.down_proj.weight": "model-00040-of-00051.safetensors", + "model.layers.68.mlp.gate_proj.weight": "model-00040-of-00051.safetensors", + "model.layers.68.mlp.up_proj.weight": "model-00040-of-00051.safetensors", + "model.layers.68.post_attention_layernorm.weight": "model-00040-of-00051.safetensors", + "model.layers.68.self_attn.k_proj.weight": "model-00040-of-00051.safetensors", + "model.layers.68.self_attn.o_proj.weight": "model-00040-of-00051.safetensors", + "model.layers.68.self_attn.q_proj.weight": "model-00040-of-00051.safetensors", + "model.layers.68.self_attn.v_proj.weight": "model-00040-of-00051.safetensors", + "model.layers.69.input_layernorm.weight": "model-00041-of-00051.safetensors", + "model.layers.69.mlp.down_proj.weight": "model-00041-of-00051.safetensors", + "model.layers.69.mlp.gate_proj.weight": "model-00040-of-00051.safetensors", + "model.layers.69.mlp.up_proj.weight": "model-00040-of-00051.safetensors", + "model.layers.69.post_attention_layernorm.weight": "model-00041-of-00051.safetensors", + "model.layers.69.self_attn.k_proj.weight": "model-00040-of-00051.safetensors", + "model.layers.69.self_attn.o_proj.weight": "model-00040-of-00051.safetensors", + "model.layers.69.self_attn.q_proj.weight": "model-00040-of-00051.safetensors", + "model.layers.69.self_attn.v_proj.weight": "model-00040-of-00051.safetensors", + "model.layers.7.input_layernorm.weight": "model-00005-of-00051.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00005-of-00051.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00005-of-00051.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00005-of-00051.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00005-of-00051.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00005-of-00051.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00005-of-00051.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00005-of-00051.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00005-of-00051.safetensors", + "model.layers.70.input_layernorm.weight": "model-00041-of-00051.safetensors", + "model.layers.70.mlp.down_proj.weight": "model-00041-of-00051.safetensors", + "model.layers.70.mlp.gate_proj.weight": "model-00041-of-00051.safetensors", + "model.layers.70.mlp.up_proj.weight": "model-00041-of-00051.safetensors", + "model.layers.70.post_attention_layernorm.weight": "model-00041-of-00051.safetensors", + "model.layers.70.self_attn.k_proj.weight": "model-00041-of-00051.safetensors", + "model.layers.70.self_attn.o_proj.weight": "model-00041-of-00051.safetensors", + "model.layers.70.self_attn.q_proj.weight": "model-00041-of-00051.safetensors", + "model.layers.70.self_attn.v_proj.weight": "model-00041-of-00051.safetensors", + "model.layers.71.input_layernorm.weight": "model-00042-of-00051.safetensors", + "model.layers.71.mlp.down_proj.weight": "model-00042-of-00051.safetensors", + "model.layers.71.mlp.gate_proj.weight": "model-00041-of-00051.safetensors", + "model.layers.71.mlp.up_proj.weight": "model-00042-of-00051.safetensors", + "model.layers.71.post_attention_layernorm.weight": "model-00042-of-00051.safetensors", + "model.layers.71.self_attn.k_proj.weight": "model-00041-of-00051.safetensors", + "model.layers.71.self_attn.o_proj.weight": "model-00041-of-00051.safetensors", + "model.layers.71.self_attn.q_proj.weight": "model-00041-of-00051.safetensors", + "model.layers.71.self_attn.v_proj.weight": "model-00041-of-00051.safetensors", + "model.layers.72.input_layernorm.weight": "model-00042-of-00051.safetensors", + "model.layers.72.mlp.down_proj.weight": "model-00042-of-00051.safetensors", + "model.layers.72.mlp.gate_proj.weight": "model-00042-of-00051.safetensors", + "model.layers.72.mlp.up_proj.weight": "model-00042-of-00051.safetensors", + "model.layers.72.post_attention_layernorm.weight": "model-00042-of-00051.safetensors", + "model.layers.72.self_attn.k_proj.weight": "model-00042-of-00051.safetensors", + "model.layers.72.self_attn.o_proj.weight": "model-00042-of-00051.safetensors", + "model.layers.72.self_attn.q_proj.weight": "model-00042-of-00051.safetensors", + "model.layers.72.self_attn.v_proj.weight": "model-00042-of-00051.safetensors", + "model.layers.73.input_layernorm.weight": "model-00043-of-00051.safetensors", + "model.layers.73.mlp.down_proj.weight": "model-00043-of-00051.safetensors", + "model.layers.73.mlp.gate_proj.weight": "model-00043-of-00051.safetensors", + "model.layers.73.mlp.up_proj.weight": "model-00043-of-00051.safetensors", + "model.layers.73.post_attention_layernorm.weight": "model-00043-of-00051.safetensors", + "model.layers.73.self_attn.k_proj.weight": "model-00042-of-00051.safetensors", + "model.layers.73.self_attn.o_proj.weight": "model-00042-of-00051.safetensors", + "model.layers.73.self_attn.q_proj.weight": "model-00042-of-00051.safetensors", + "model.layers.73.self_attn.v_proj.weight": "model-00042-of-00051.safetensors", + "model.layers.74.input_layernorm.weight": "model-00043-of-00051.safetensors", + "model.layers.74.mlp.down_proj.weight": "model-00043-of-00051.safetensors", + "model.layers.74.mlp.gate_proj.weight": "model-00043-of-00051.safetensors", + "model.layers.74.mlp.up_proj.weight": "model-00043-of-00051.safetensors", + "model.layers.74.post_attention_layernorm.weight": "model-00043-of-00051.safetensors", + "model.layers.74.self_attn.k_proj.weight": "model-00043-of-00051.safetensors", + "model.layers.74.self_attn.o_proj.weight": "model-00043-of-00051.safetensors", + "model.layers.74.self_attn.q_proj.weight": "model-00043-of-00051.safetensors", + "model.layers.74.self_attn.v_proj.weight": "model-00043-of-00051.safetensors", + "model.layers.75.input_layernorm.weight": "model-00044-of-00051.safetensors", + "model.layers.75.mlp.down_proj.weight": "model-00044-of-00051.safetensors", + "model.layers.75.mlp.gate_proj.weight": "model-00044-of-00051.safetensors", + "model.layers.75.mlp.up_proj.weight": "model-00044-of-00051.safetensors", + "model.layers.75.post_attention_layernorm.weight": "model-00044-of-00051.safetensors", + "model.layers.75.self_attn.k_proj.weight": "model-00044-of-00051.safetensors", + "model.layers.75.self_attn.o_proj.weight": "model-00044-of-00051.safetensors", + "model.layers.75.self_attn.q_proj.weight": "model-00044-of-00051.safetensors", + "model.layers.75.self_attn.v_proj.weight": "model-00044-of-00051.safetensors", + "model.layers.76.input_layernorm.weight": "model-00045-of-00051.safetensors", + "model.layers.76.mlp.down_proj.weight": "model-00045-of-00051.safetensors", + "model.layers.76.mlp.gate_proj.weight": "model-00044-of-00051.safetensors", + "model.layers.76.mlp.up_proj.weight": "model-00044-of-00051.safetensors", + "model.layers.76.post_attention_layernorm.weight": "model-00045-of-00051.safetensors", + "model.layers.76.self_attn.k_proj.weight": "model-00044-of-00051.safetensors", + "model.layers.76.self_attn.o_proj.weight": "model-00044-of-00051.safetensors", + "model.layers.76.self_attn.q_proj.weight": "model-00044-of-00051.safetensors", + "model.layers.76.self_attn.v_proj.weight": "model-00044-of-00051.safetensors", + "model.layers.77.input_layernorm.weight": "model-00045-of-00051.safetensors", + "model.layers.77.mlp.down_proj.weight": "model-00045-of-00051.safetensors", + "model.layers.77.mlp.gate_proj.weight": "model-00045-of-00051.safetensors", + "model.layers.77.mlp.up_proj.weight": "model-00045-of-00051.safetensors", + "model.layers.77.post_attention_layernorm.weight": "model-00045-of-00051.safetensors", + "model.layers.77.self_attn.k_proj.weight": "model-00045-of-00051.safetensors", + "model.layers.77.self_attn.o_proj.weight": "model-00045-of-00051.safetensors", + "model.layers.77.self_attn.q_proj.weight": "model-00045-of-00051.safetensors", + "model.layers.77.self_attn.v_proj.weight": "model-00045-of-00051.safetensors", + "model.layers.78.input_layernorm.weight": "model-00046-of-00051.safetensors", + "model.layers.78.mlp.down_proj.weight": "model-00046-of-00051.safetensors", + "model.layers.78.mlp.gate_proj.weight": "model-00045-of-00051.safetensors", + "model.layers.78.mlp.up_proj.weight": "model-00046-of-00051.safetensors", + "model.layers.78.post_attention_layernorm.weight": "model-00046-of-00051.safetensors", + "model.layers.78.self_attn.k_proj.weight": "model-00045-of-00051.safetensors", + "model.layers.78.self_attn.o_proj.weight": "model-00045-of-00051.safetensors", + "model.layers.78.self_attn.q_proj.weight": "model-00045-of-00051.safetensors", + "model.layers.78.self_attn.v_proj.weight": "model-00045-of-00051.safetensors", + "model.layers.79.input_layernorm.weight": "model-00046-of-00051.safetensors", + "model.layers.79.mlp.down_proj.weight": "model-00046-of-00051.safetensors", + "model.layers.79.mlp.gate_proj.weight": "model-00046-of-00051.safetensors", + "model.layers.79.mlp.up_proj.weight": "model-00046-of-00051.safetensors", + "model.layers.79.post_attention_layernorm.weight": "model-00046-of-00051.safetensors", + "model.layers.79.self_attn.k_proj.weight": "model-00046-of-00051.safetensors", + "model.layers.79.self_attn.o_proj.weight": "model-00046-of-00051.safetensors", + "model.layers.79.self_attn.q_proj.weight": "model-00046-of-00051.safetensors", + "model.layers.79.self_attn.v_proj.weight": "model-00046-of-00051.safetensors", + "model.layers.8.input_layernorm.weight": "model-00006-of-00051.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00006-of-00051.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00005-of-00051.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00006-of-00051.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00006-of-00051.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00005-of-00051.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00005-of-00051.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00005-of-00051.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00005-of-00051.safetensors", + "model.layers.80.input_layernorm.weight": "model-00047-of-00051.safetensors", + "model.layers.80.mlp.down_proj.weight": "model-00047-of-00051.safetensors", + "model.layers.80.mlp.gate_proj.weight": "model-00047-of-00051.safetensors", + "model.layers.80.mlp.up_proj.weight": "model-00047-of-00051.safetensors", + "model.layers.80.post_attention_layernorm.weight": "model-00047-of-00051.safetensors", + "model.layers.80.self_attn.k_proj.weight": "model-00046-of-00051.safetensors", + "model.layers.80.self_attn.o_proj.weight": "model-00046-of-00051.safetensors", + "model.layers.80.self_attn.q_proj.weight": "model-00046-of-00051.safetensors", + "model.layers.80.self_attn.v_proj.weight": "model-00046-of-00051.safetensors", + "model.layers.81.input_layernorm.weight": "model-00047-of-00051.safetensors", + "model.layers.81.mlp.down_proj.weight": "model-00047-of-00051.safetensors", + "model.layers.81.mlp.gate_proj.weight": "model-00047-of-00051.safetensors", + "model.layers.81.mlp.up_proj.weight": "model-00047-of-00051.safetensors", + "model.layers.81.post_attention_layernorm.weight": "model-00047-of-00051.safetensors", + "model.layers.81.self_attn.k_proj.weight": "model-00047-of-00051.safetensors", + "model.layers.81.self_attn.o_proj.weight": "model-00047-of-00051.safetensors", + "model.layers.81.self_attn.q_proj.weight": "model-00047-of-00051.safetensors", + "model.layers.81.self_attn.v_proj.weight": "model-00047-of-00051.safetensors", + "model.layers.82.input_layernorm.weight": "model-00048-of-00051.safetensors", + "model.layers.82.mlp.down_proj.weight": "model-00048-of-00051.safetensors", + "model.layers.82.mlp.gate_proj.weight": "model-00048-of-00051.safetensors", + "model.layers.82.mlp.up_proj.weight": "model-00048-of-00051.safetensors", + "model.layers.82.post_attention_layernorm.weight": "model-00048-of-00051.safetensors", + "model.layers.82.self_attn.k_proj.weight": "model-00048-of-00051.safetensors", + "model.layers.82.self_attn.o_proj.weight": "model-00048-of-00051.safetensors", + "model.layers.82.self_attn.q_proj.weight": "model-00048-of-00051.safetensors", + "model.layers.82.self_attn.v_proj.weight": "model-00048-of-00051.safetensors", + "model.layers.83.input_layernorm.weight": "model-00049-of-00051.safetensors", + "model.layers.83.mlp.down_proj.weight": "model-00049-of-00051.safetensors", + "model.layers.83.mlp.gate_proj.weight": "model-00048-of-00051.safetensors", + "model.layers.83.mlp.up_proj.weight": "model-00048-of-00051.safetensors", + "model.layers.83.post_attention_layernorm.weight": "model-00049-of-00051.safetensors", + "model.layers.83.self_attn.k_proj.weight": "model-00048-of-00051.safetensors", + "model.layers.83.self_attn.o_proj.weight": "model-00048-of-00051.safetensors", + "model.layers.83.self_attn.q_proj.weight": "model-00048-of-00051.safetensors", + "model.layers.83.self_attn.v_proj.weight": "model-00048-of-00051.safetensors", + "model.layers.84.input_layernorm.weight": "model-00049-of-00051.safetensors", + "model.layers.84.mlp.down_proj.weight": "model-00049-of-00051.safetensors", + "model.layers.84.mlp.gate_proj.weight": "model-00049-of-00051.safetensors", + "model.layers.84.mlp.up_proj.weight": "model-00049-of-00051.safetensors", + "model.layers.84.post_attention_layernorm.weight": "model-00049-of-00051.safetensors", + "model.layers.84.self_attn.k_proj.weight": "model-00049-of-00051.safetensors", + "model.layers.84.self_attn.o_proj.weight": "model-00049-of-00051.safetensors", + "model.layers.84.self_attn.q_proj.weight": "model-00049-of-00051.safetensors", + "model.layers.84.self_attn.v_proj.weight": "model-00049-of-00051.safetensors", + "model.layers.85.input_layernorm.weight": "model-00050-of-00051.safetensors", + "model.layers.85.mlp.down_proj.weight": "model-00050-of-00051.safetensors", + "model.layers.85.mlp.gate_proj.weight": "model-00049-of-00051.safetensors", + "model.layers.85.mlp.up_proj.weight": "model-00050-of-00051.safetensors", + "model.layers.85.post_attention_layernorm.weight": "model-00050-of-00051.safetensors", + "model.layers.85.self_attn.k_proj.weight": "model-00049-of-00051.safetensors", + "model.layers.85.self_attn.o_proj.weight": "model-00049-of-00051.safetensors", + "model.layers.85.self_attn.q_proj.weight": "model-00049-of-00051.safetensors", + "model.layers.85.self_attn.v_proj.weight": "model-00049-of-00051.safetensors", + "model.layers.86.input_layernorm.weight": "model-00050-of-00051.safetensors", + "model.layers.86.mlp.down_proj.weight": "model-00050-of-00051.safetensors", + "model.layers.86.mlp.gate_proj.weight": "model-00050-of-00051.safetensors", + "model.layers.86.mlp.up_proj.weight": "model-00050-of-00051.safetensors", + "model.layers.86.post_attention_layernorm.weight": "model-00050-of-00051.safetensors", + "model.layers.86.self_attn.k_proj.weight": "model-00050-of-00051.safetensors", + "model.layers.86.self_attn.o_proj.weight": "model-00050-of-00051.safetensors", + "model.layers.86.self_attn.q_proj.weight": "model-00050-of-00051.safetensors", + "model.layers.86.self_attn.v_proj.weight": "model-00050-of-00051.safetensors", + "model.layers.87.input_layernorm.weight": "model-00051-of-00051.safetensors", + "model.layers.87.mlp.down_proj.weight": "model-00051-of-00051.safetensors", + "model.layers.87.mlp.gate_proj.weight": "model-00051-of-00051.safetensors", + "model.layers.87.mlp.up_proj.weight": "model-00051-of-00051.safetensors", + "model.layers.87.post_attention_layernorm.weight": "model-00051-of-00051.safetensors", + "model.layers.87.self_attn.k_proj.weight": "model-00050-of-00051.safetensors", + "model.layers.87.self_attn.o_proj.weight": "model-00050-of-00051.safetensors", + "model.layers.87.self_attn.q_proj.weight": "model-00050-of-00051.safetensors", + "model.layers.87.self_attn.v_proj.weight": "model-00050-of-00051.safetensors", + "model.layers.9.input_layernorm.weight": "model-00006-of-00051.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00006-of-00051.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00006-of-00051.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00006-of-00051.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00006-of-00051.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00006-of-00051.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00006-of-00051.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00006-of-00051.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00006-of-00051.safetensors", + "model.norm.weight": "model-00051-of-00051.safetensors" + } +}