diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..cd71fd1e9fcc33f99f8e98b82027aef0ad1d4a85 --- /dev/null +++ b/config.json @@ -0,0 +1,28 @@ +{ + "_name_or_path": "model_codellama_70b_multiplt_lua/checkpoint-131", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 8192, + "initializer_range": 0.02, + "intermediate_size": 28672, + "max_position_embeddings": 16384, + "model_type": "llama", + "num_attention_heads": 64, + "num_hidden_layers": 80, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 1000000, + "tie_word_embeddings": false, + "torch_dtype": "float32", + "transformers_version": "4.39.0.dev0", + "use_cache": false, + "vocab_size": 32016 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d84663b8bd0a737e3299f3d2b4be2fe3cd2a0333 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "transformers_version": "4.39.0.dev0" +} diff --git a/model-00001-of-00061.safetensors b/model-00001-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f79be9a88aba51c40854a1d498351d0d2fd65f1a --- /dev/null +++ b/model-00001-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a69dfd5ee1a06662beff820829e14382be8d2346d57ef20ffc0ececb5c00a01f +size 4807263728 diff --git a/model-00002-of-00061.safetensors b/model-00002-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..04d95ba29f121d08e2c068e5853c70efe925c9ca --- /dev/null +++ b/model-00002-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a35b073bef535974fea6b159366c2f1cd51c7a757a279b94fc1034ce3792afec +size 4630578440 diff --git a/model-00003-of-00061.safetensors b/model-00003-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..82f8251f34c6a9eccc220624c25bca30c0fff210 --- /dev/null +++ b/model-00003-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6120a71a99f6e6ca3a19ac36fdf037d582afcaf9a6d4e18db3fab1386467376f +size 4362142864 diff --git a/model-00004-of-00061.safetensors b/model-00004-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a3dcfaa8eefad92d07cd4dbcba17e0aa093a6b17 --- /dev/null +++ b/model-00004-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a27af53eb183acdb28402e962f5686287625866949c0f6381c6ea43914cbe880 +size 4966188864 diff --git a/model-00005-of-00061.safetensors b/model-00005-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2a2d7315a28017b507409c50e48029426e6b4e8c --- /dev/null +++ b/model-00005-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5b0a82f93823ea6ae32692d4532c35edc617b64bb98d62616f40a512fda6ef4 +size 4362142864 diff --git a/model-00006-of-00061.safetensors b/model-00006-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c51573d25f81abfc50cf0e483ad7fac4abde3216 --- /dev/null +++ b/model-00006-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5d3fd5cd0d0b84ce8ff0b36c8dcd78b04a7ee2dad90ccc73c559e04673d20b2 +size 4362142864 diff --git a/model-00007-of-00061.safetensors b/model-00007-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..892cd522817fc9a5a1146e6a7b948d1fd384d49b --- /dev/null +++ b/model-00007-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8419d8c3e4f52e5ba928dc254aa7b35bf506e8a94c09a7dc0bb514a3251f5c22 +size 4966188864 diff --git a/model-00008-of-00061.safetensors b/model-00008-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a87fb884823721c9642af6c9b393365720a4176e --- /dev/null +++ b/model-00008-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccac498898aab041b1406f79d03031ddfe8a807b669f0e572fbcf2cbfe48dd21 +size 4362142880 diff --git a/model-00009-of-00061.safetensors b/model-00009-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cdd761ece4839f641778e9aad10853c96bc1ed05 --- /dev/null +++ b/model-00009-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b18c38d504bcc23e856de38819179b57c96eb3d8741bde27d51d9058067b51e7 +size 4362142872 diff --git a/model-00010-of-00061.safetensors b/model-00010-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c005fc5b85d0cef894e6bbd7988dae19c10b9a13 --- /dev/null +++ b/model-00010-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6110bc405abace0b5aa31b517b6a691328207c7554db99b78540584729dcafe5 +size 4966188880 diff --git a/model-00011-of-00061.safetensors b/model-00011-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c40dd4d495fa50f52bc34d7e023e372d63bc1ec9 --- /dev/null +++ b/model-00011-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:891ff14f6a9b6f51a8cf1f2a7ec577b8259e88b031d1d8701a00ce7efc5422be +size 4362142872 diff --git a/model-00012-of-00061.safetensors b/model-00012-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7f49d8af8b6f1b8ffee42b828c2a5c004e9afe21 --- /dev/null +++ b/model-00012-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0ec55825b7b80581988432d2e5787fa21f727b5a8945da2f8aeae65e72605bf +size 4362142872 diff --git a/model-00013-of-00061.safetensors b/model-00013-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7551aa06ea3649e0e1083f6a29842268f85d4e00 --- /dev/null +++ b/model-00013-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ee7ef79bfc887ca08d41476db7ae6966e6ee584207cc616b82a5349582d2f0d +size 4966188880 diff --git a/model-00014-of-00061.safetensors b/model-00014-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ea0a86765f659488d9958c319ac42d8bdfdb410f --- /dev/null +++ b/model-00014-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6904778922b2eda31cc332e8ac94e02dbc9b0be424d9704c5899bd11a3cd7f51 +size 4362142872 diff --git a/model-00015-of-00061.safetensors b/model-00015-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d4a7374bab1464f198053dc08b24b95f237b54a9 --- /dev/null +++ b/model-00015-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:210ec45aff1a01af5d3c1cbe34cf93cad28adabba40bd4e9b9d5834c60aca43a +size 4362142872 diff --git a/model-00016-of-00061.safetensors b/model-00016-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..09b917fc441a10ad7c52e6e4dda87117bb3f4f79 --- /dev/null +++ b/model-00016-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0d3c0b85b2a95c5be9b73ae18e80ee3765b4f1406d4d1ce07eae4fb8553cc64 +size 4966188880 diff --git a/model-00017-of-00061.safetensors b/model-00017-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b38be2b6f46ac48f03b4632e97b664abe533e468 --- /dev/null +++ b/model-00017-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a29170b5b83960a49ba551c86f3b8acdd9541e96925eb86a32af78ff421c8c5 +size 4362142872 diff --git a/model-00018-of-00061.safetensors b/model-00018-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5c513f73cef5b119fe096921bce5ef48478917ed --- /dev/null +++ b/model-00018-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb8dad3553a77284e771a2ddbac0029f4214247f2c70bc2f68a9461298bba53b +size 4362142872 diff --git a/model-00019-of-00061.safetensors b/model-00019-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c3595c2b37d6d01610d85efe21d3ac04c8351426 --- /dev/null +++ b/model-00019-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f916435467fbb3f10ab3b87b64669b504be6174557a6b87f41b8affb5c9dc375 +size 4966188880 diff --git a/model-00020-of-00061.safetensors b/model-00020-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dc1ce4e860704acaa017619fdc0f653e57133fe1 --- /dev/null +++ b/model-00020-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f911b3689de5e85c2dbca6e33ef6ec6cfd7471d419bd5305d24b71a25f675ba +size 4362142872 diff --git a/model-00021-of-00061.safetensors b/model-00021-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..775959ee40bc354a6796b4d7724cbb111409bd8b --- /dev/null +++ b/model-00021-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:571a79312f8bf576844a29a5342b0d86a882dbcd474b01857fc4329a2baa7143 +size 4362142872 diff --git a/model-00022-of-00061.safetensors b/model-00022-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d26f0aeaf4ad6420934e8d8e938c3b056dcde890 --- /dev/null +++ b/model-00022-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15e6fe66ac1a70217075c1697ed3b37f617233693f83b8b957bca8c7f1afe052 +size 4966188880 diff --git a/model-00023-of-00061.safetensors b/model-00023-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3246171659243bd72f5df1bfe0355661f4026dfe --- /dev/null +++ b/model-00023-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1dc8d2743671482bdf0189910358cd0a0d2a6692ea85e1fdadbaa4a496e770fd +size 4362142872 diff --git a/model-00024-of-00061.safetensors b/model-00024-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..abba1441feedc45a2d45de84e040233c2976620c --- /dev/null +++ b/model-00024-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54df69f00e3155c435a6a0bf90b9893d7461277c4560a05d0b744441f513671d +size 4362142872 diff --git a/model-00025-of-00061.safetensors b/model-00025-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3a9d6edd8b93ba9c0f368b14caa1a328afc32401 --- /dev/null +++ b/model-00025-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ee47d54e701888290af8bb96dee14361479bced396bd0d848def8cd9e66c9bd +size 4966188880 diff --git a/model-00026-of-00061.safetensors b/model-00026-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d5fab37dec95b8109c3c878da5b4136f4bc89cd3 --- /dev/null +++ b/model-00026-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f74e9f7899feadd86e11e26bd7dac67dea8c771bf1641dd08759582c404c72c7 +size 4362142872 diff --git a/model-00027-of-00061.safetensors b/model-00027-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1f8d6a30351c971e28c802c4632073f35b41faba --- /dev/null +++ b/model-00027-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15dd973890679b7981941a6f713b84081c6c8a7c5cd816cdf9130bded7ac48e9 +size 4362142872 diff --git a/model-00028-of-00061.safetensors b/model-00028-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2d1a0986e10a9fd26394c1e780860f370872be46 --- /dev/null +++ b/model-00028-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da5eff7ee8ee0c88402d96f7d061cde78c8ca8cdaff85658bf9a42dc14795d54 +size 4966188880 diff --git a/model-00029-of-00061.safetensors b/model-00029-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..500ffc7bdefaa5fedca0239dc5d7b156b0d3dccb --- /dev/null +++ b/model-00029-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44e32d45f872b736a3f9a49c2a9ef12c04fc2018228d8ccdcf0b17306e91e1d8 +size 4362142872 diff --git a/model-00030-of-00061.safetensors b/model-00030-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9b95a2a822d77b9a67f8c691226b5a8d41f4c6d3 --- /dev/null +++ b/model-00030-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3516301f2b35e9fd041eb8aff2b388e007e13ad1d4857097386ad80a2efc9597 +size 4362142872 diff --git a/model-00031-of-00061.safetensors b/model-00031-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..aacf5d38dfd733f8e3d1ef87928e6f7ff5bfd7b6 --- /dev/null +++ b/model-00031-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:608fa0daea94b8ad11d1681b95b5fe5118554081d7f348f6c74714e3f78b6d11 +size 4966188880 diff --git a/model-00032-of-00061.safetensors b/model-00032-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..100ec5e58a2d3de4dc3d044cc1e9c3f30ff57661 --- /dev/null +++ b/model-00032-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7a7e561484b4cb6e0154928ae894ebfe04fb2a6652f4c2c64eba31c2467541b +size 4362142872 diff --git a/model-00033-of-00061.safetensors b/model-00033-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a7691bcae4fe971b2737e09824c571b31357bd0e --- /dev/null +++ b/model-00033-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c73f389a99980e6e0286ff855103f21bbba3380dae427a3d58899faec89c184f +size 4362142872 diff --git a/model-00034-of-00061.safetensors b/model-00034-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2bc54155247d8fc4e3fe69f9e539b5376cd92b3a --- /dev/null +++ b/model-00034-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22f2497e84d8482108db7a090d0efa3648e530e4ac2f0a49960daa3c7319ab53 +size 4966188880 diff --git a/model-00035-of-00061.safetensors b/model-00035-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2c9ef125bf196ec3e8feff53cf94ebbe4488e969 --- /dev/null +++ b/model-00035-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c92cd2bc66fd820eb6ed84259dd9f63dec70fc06dd6815d0d604641cc303a73 +size 4362142872 diff --git a/model-00036-of-00061.safetensors b/model-00036-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..20ebc31eed9cf470e72054ce707e80588cbe7780 --- /dev/null +++ b/model-00036-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:241985ef914a2019085434e8d0cda22f0107f061cffc63c49b2df372f98b1bc5 +size 4362142872 diff --git a/model-00037-of-00061.safetensors b/model-00037-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..53deee343e7c25cbb2d125a69fd17f9cc38493ef --- /dev/null +++ b/model-00037-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7970a1984a6fa64c200ec5571e3301f3ae58ec396fdf49729a7c44094f23e722 +size 4966188880 diff --git a/model-00038-of-00061.safetensors b/model-00038-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..06985c76b772a1f10a34a3c85d471dd0694403f9 --- /dev/null +++ b/model-00038-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f188dd289fd3d00713f23e4a941d55ee7216185e80fbf2a671b072c4dfd113b +size 4362142872 diff --git a/model-00039-of-00061.safetensors b/model-00039-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..696f1ff87057e850d988176b07c17baddb27301f --- /dev/null +++ b/model-00039-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05a208ff3a2906dc226a25cef17b16d18e3d0877ebbbf1fe2eeeb22c82dba17c +size 4362142872 diff --git a/model-00040-of-00061.safetensors b/model-00040-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..324b3d39e0245b2212411d816608d875e8f9183d --- /dev/null +++ b/model-00040-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28d213ac6d7c8e9ad108ced9eb275df4f19119af06940c7aceb7678e05ce764d +size 4966188880 diff --git a/model-00041-of-00061.safetensors b/model-00041-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8b9569fcba880241d77c270a9fe1828bcd8acb60 --- /dev/null +++ b/model-00041-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e2cc770c4772b37d1a0b7668609004227e3da355bb879bbcc3a3aa03badfd46 +size 4362142872 diff --git a/model-00042-of-00061.safetensors b/model-00042-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..615fd14ded5d40dde69597c974af7ab4d4479a37 --- /dev/null +++ b/model-00042-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c5396335263c6114b9df22638b6b8aa36139f65fff81b571e9c94b392322f0b +size 4362142872 diff --git a/model-00043-of-00061.safetensors b/model-00043-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..012ec08c0a1ba31a1f45de026c10bd04a5ac1320 --- /dev/null +++ b/model-00043-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee075e925c2acc20e7373237021f00d6140eabda29e31b0a56022b9dbc1da910 +size 4966188880 diff --git a/model-00044-of-00061.safetensors b/model-00044-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..da0e549c8545442d6c18f917a91c022c27879aaa --- /dev/null +++ b/model-00044-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c4c3a39d833464496b23f22dc373ad14ad97c5fe038c65f774746325c235828 +size 4362142872 diff --git a/model-00045-of-00061.safetensors b/model-00045-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c56593d8a5a36bfcf11b5b76664b2dec67b499a8 --- /dev/null +++ b/model-00045-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88097fdd95d4727411f227c42c5c7a5bab1e0ec907c3fd9b5624802b45284227 +size 4362142872 diff --git a/model-00046-of-00061.safetensors b/model-00046-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4754f564f8ede8eb9f5b13d6e22f0ef8d48f9888 --- /dev/null +++ b/model-00046-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d63f07ebd61815c83bc24be6f1a0ec9fb9d1d182923fc0a682a9484891541588 +size 4966188880 diff --git a/model-00047-of-00061.safetensors b/model-00047-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b5176fc6d777e2d5a01c8b49a723c8123beec361 --- /dev/null +++ b/model-00047-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddbd5054b860749d587f59998fbccd77457f2d204c9d002a204710fdc587680b +size 4362142872 diff --git a/model-00048-of-00061.safetensors b/model-00048-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d9472e7b554447c8bca279f63990a244eb9f3e78 --- /dev/null +++ b/model-00048-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:174e62b6ef109f6ba55e387c2e7375dff5a553455f002dfe879149d182797840 +size 4362142872 diff --git a/model-00049-of-00061.safetensors b/model-00049-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e9bf162ac453e0150548bc1b3e1a50fb929a2768 --- /dev/null +++ b/model-00049-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cefb833a9fa01a9af3d35a50b6598aefb562779098928923a98952ab5512d9a0 +size 4966188880 diff --git a/model-00050-of-00061.safetensors b/model-00050-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f04c455fbd74d30c72c638e4bb2aeb30a284ba1f --- /dev/null +++ b/model-00050-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf272f607c70d289f267b654de7f6a1d1731db43ef230358da05e000f3ba6f3d +size 4362142872 diff --git a/model-00051-of-00061.safetensors b/model-00051-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8cefaf721330900263387dd88c74dfcdbede9262 --- /dev/null +++ b/model-00051-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a3b698775ccb2b5b22f5d3e15cdba713b70669906137c2357656db69c995bbc +size 4362142872 diff --git a/model-00052-of-00061.safetensors b/model-00052-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..19f68768c579f5d70b983ab07d3181ab3b5eb0ca --- /dev/null +++ b/model-00052-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f87603d59902bf27b24c312a9fb865e21163dac3075f45e6f2fdddb8004c8147 +size 4966188880 diff --git a/model-00053-of-00061.safetensors b/model-00053-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e6b079ac693fa4a337efe6891e9db5e3fabaaa63 --- /dev/null +++ b/model-00053-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12fb45e2074fce77b0df4f38323701c33e8223c966a74a9fe6a919173b121422 +size 4362142872 diff --git a/model-00054-of-00061.safetensors b/model-00054-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..87c70df7b164d136565be83de0fe20c6f634fde4 --- /dev/null +++ b/model-00054-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f8f6c0d669a42cbb110c876886f45f4f0c285a81f8be8cf02e7f4d428a8ace1 +size 4362142872 diff --git a/model-00055-of-00061.safetensors b/model-00055-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c4bee52d840894def4b36223a1d0308be798678b --- /dev/null +++ b/model-00055-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fc66230a75d781dc1c9c905c0824b4db6273c9b84f6df11620ad30747874c1b +size 4966188880 diff --git a/model-00056-of-00061.safetensors b/model-00056-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..553486ecde6785a46d562e205951aad4db1105af --- /dev/null +++ b/model-00056-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e600d4ca5e8aeeae294d8c66def4ea229227c872f92f13781a673c8373f31246 +size 4362142872 diff --git a/model-00057-of-00061.safetensors b/model-00057-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cbc65c8bac206257f4fe55a59026e46dfc8793b0 --- /dev/null +++ b/model-00057-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f548b8e9744897aab83f0b2a4e5646f9e3b708f4d156733e8cdea49e867e6ad3 +size 4362142872 diff --git a/model-00058-of-00061.safetensors b/model-00058-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d6b702eeafa2d69756b47b32116df198adcfc01b --- /dev/null +++ b/model-00058-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe2eef906903301b635bb71b754911a405a647f35595c9c9442f898d5c059606 +size 4966188880 diff --git a/model-00059-of-00061.safetensors b/model-00059-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c5db4c9bb8da3c854fbf2248714fef5d172532e0 --- /dev/null +++ b/model-00059-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fcfd041276589386d39a8347dda431d12babcaca6077c05fe982c081722e1c62 +size 4362142872 diff --git a/model-00060-of-00061.safetensors b/model-00060-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..25bf806555c2244a4fad54748c1672ca9b80ec95 --- /dev/null +++ b/model-00060-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ed95b79233ca1c85b2fa78698e99dbd30f18af4b55e9b5806c09b764999c8c8 +size 4362142872 diff --git a/model-00061-of-00061.safetensors b/model-00061-of-00061.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..418ffcaa5f12ca5c0767240f3459477442563b13 --- /dev/null +++ b/model-00061-of-00061.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67ce3d2afa610317849df2f5525c33fdcf37e26c7d283331d83e2cfe7c2fa6e9 +size 1988723248 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..a2cd9f4af41bc127695ec4234e15f5e55e309477 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,730 @@ +{ + "metadata": { + "total_size": 275907641344 + }, + "weight_map": { + "lm_head.weight": "model-00061-of-00061.safetensors", + "model.embed_tokens.weight": "model-00001-of-00061.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00061.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00061.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00061.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00061.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00061.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00061.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00061.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00061.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00061.safetensors", + "model.layers.1.input_layernorm.weight": "model-00002-of-00061.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00002-of-00061.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00002-of-00061.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00002-of-00061.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00002-of-00061.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00061.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00002-of-00061.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00061.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00061.safetensors", + "model.layers.10.input_layernorm.weight": "model-00009-of-00061.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00009-of-00061.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00008-of-00061.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00009-of-00061.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00009-of-00061.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00008-of-00061.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00008-of-00061.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00008-of-00061.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00008-of-00061.safetensors", + "model.layers.11.input_layernorm.weight": "model-00010-of-00061.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00010-of-00061.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00009-of-00061.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00009-of-00061.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00010-of-00061.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00009-of-00061.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00009-of-00061.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00009-of-00061.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00009-of-00061.safetensors", + "model.layers.12.input_layernorm.weight": "model-00010-of-00061.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00010-of-00061.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00010-of-00061.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00010-of-00061.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00010-of-00061.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00010-of-00061.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00010-of-00061.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00010-of-00061.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00010-of-00061.safetensors", + "model.layers.13.input_layernorm.weight": "model-00011-of-00061.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00011-of-00061.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00011-of-00061.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00011-of-00061.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00011-of-00061.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00010-of-00061.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00010-of-00061.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00010-of-00061.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00010-of-00061.safetensors", + "model.layers.14.input_layernorm.weight": "model-00012-of-00061.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00012-of-00061.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00011-of-00061.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00012-of-00061.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00012-of-00061.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00011-of-00061.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00011-of-00061.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00011-of-00061.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00011-of-00061.safetensors", + "model.layers.15.input_layernorm.weight": "model-00013-of-00061.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00013-of-00061.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00012-of-00061.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00012-of-00061.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00013-of-00061.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00012-of-00061.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00012-of-00061.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00012-of-00061.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00012-of-00061.safetensors", + "model.layers.16.input_layernorm.weight": "model-00013-of-00061.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00013-of-00061.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00013-of-00061.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00013-of-00061.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00013-of-00061.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00013-of-00061.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00013-of-00061.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00013-of-00061.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00013-of-00061.safetensors", + "model.layers.17.input_layernorm.weight": "model-00014-of-00061.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00014-of-00061.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00014-of-00061.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00014-of-00061.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00014-of-00061.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00013-of-00061.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00013-of-00061.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00013-of-00061.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00013-of-00061.safetensors", + "model.layers.18.input_layernorm.weight": "model-00015-of-00061.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00015-of-00061.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00014-of-00061.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00015-of-00061.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00015-of-00061.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00014-of-00061.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00014-of-00061.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00014-of-00061.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00014-of-00061.safetensors", + "model.layers.19.input_layernorm.weight": "model-00016-of-00061.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00016-of-00061.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00015-of-00061.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00015-of-00061.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00016-of-00061.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00015-of-00061.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00015-of-00061.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00015-of-00061.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00015-of-00061.safetensors", + "model.layers.2.input_layernorm.weight": "model-00003-of-00061.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00003-of-00061.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00002-of-00061.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00003-of-00061.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00003-of-00061.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00002-of-00061.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00002-of-00061.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00002-of-00061.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00002-of-00061.safetensors", + "model.layers.20.input_layernorm.weight": "model-00016-of-00061.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00016-of-00061.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00016-of-00061.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00016-of-00061.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00016-of-00061.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00016-of-00061.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00016-of-00061.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00016-of-00061.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00016-of-00061.safetensors", + "model.layers.21.input_layernorm.weight": "model-00017-of-00061.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00017-of-00061.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00017-of-00061.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00017-of-00061.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00017-of-00061.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00016-of-00061.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00016-of-00061.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00016-of-00061.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00016-of-00061.safetensors", + "model.layers.22.input_layernorm.weight": "model-00018-of-00061.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00018-of-00061.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00017-of-00061.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00018-of-00061.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00018-of-00061.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00017-of-00061.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00017-of-00061.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00017-of-00061.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00017-of-00061.safetensors", + "model.layers.23.input_layernorm.weight": "model-00019-of-00061.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00019-of-00061.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00018-of-00061.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00018-of-00061.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00019-of-00061.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00018-of-00061.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00018-of-00061.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00018-of-00061.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00018-of-00061.safetensors", + "model.layers.24.input_layernorm.weight": "model-00019-of-00061.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00019-of-00061.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00019-of-00061.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00019-of-00061.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00019-of-00061.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00019-of-00061.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00019-of-00061.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00019-of-00061.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00019-of-00061.safetensors", + "model.layers.25.input_layernorm.weight": "model-00020-of-00061.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00020-of-00061.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00020-of-00061.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00020-of-00061.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00020-of-00061.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00019-of-00061.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00019-of-00061.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00019-of-00061.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00019-of-00061.safetensors", + "model.layers.26.input_layernorm.weight": "model-00021-of-00061.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00021-of-00061.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00020-of-00061.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00021-of-00061.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00021-of-00061.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00020-of-00061.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00020-of-00061.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00020-of-00061.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00020-of-00061.safetensors", + "model.layers.27.input_layernorm.weight": "model-00022-of-00061.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00022-of-00061.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00021-of-00061.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00021-of-00061.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00022-of-00061.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00021-of-00061.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00021-of-00061.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00021-of-00061.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00021-of-00061.safetensors", + "model.layers.28.input_layernorm.weight": "model-00022-of-00061.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00022-of-00061.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00022-of-00061.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00022-of-00061.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00022-of-00061.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00022-of-00061.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00022-of-00061.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00022-of-00061.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00022-of-00061.safetensors", + "model.layers.29.input_layernorm.weight": "model-00023-of-00061.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00023-of-00061.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00023-of-00061.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00023-of-00061.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00023-of-00061.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00022-of-00061.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00022-of-00061.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00022-of-00061.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00022-of-00061.safetensors", + "model.layers.3.input_layernorm.weight": "model-00004-of-00061.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00004-of-00061.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00003-of-00061.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00003-of-00061.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00004-of-00061.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00003-of-00061.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00003-of-00061.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00003-of-00061.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00003-of-00061.safetensors", + "model.layers.30.input_layernorm.weight": "model-00024-of-00061.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00024-of-00061.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00023-of-00061.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00024-of-00061.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00024-of-00061.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00023-of-00061.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00023-of-00061.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00023-of-00061.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00023-of-00061.safetensors", + "model.layers.31.input_layernorm.weight": "model-00025-of-00061.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00025-of-00061.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00024-of-00061.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00024-of-00061.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00025-of-00061.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00024-of-00061.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00024-of-00061.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00024-of-00061.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00024-of-00061.safetensors", + "model.layers.32.input_layernorm.weight": "model-00025-of-00061.safetensors", + "model.layers.32.mlp.down_proj.weight": "model-00025-of-00061.safetensors", + "model.layers.32.mlp.gate_proj.weight": "model-00025-of-00061.safetensors", + "model.layers.32.mlp.up_proj.weight": "model-00025-of-00061.safetensors", + "model.layers.32.post_attention_layernorm.weight": "model-00025-of-00061.safetensors", + "model.layers.32.self_attn.k_proj.weight": "model-00025-of-00061.safetensors", + "model.layers.32.self_attn.o_proj.weight": "model-00025-of-00061.safetensors", + "model.layers.32.self_attn.q_proj.weight": "model-00025-of-00061.safetensors", + "model.layers.32.self_attn.v_proj.weight": "model-00025-of-00061.safetensors", + "model.layers.33.input_layernorm.weight": "model-00026-of-00061.safetensors", + "model.layers.33.mlp.down_proj.weight": "model-00026-of-00061.safetensors", + "model.layers.33.mlp.gate_proj.weight": "model-00026-of-00061.safetensors", + "model.layers.33.mlp.up_proj.weight": "model-00026-of-00061.safetensors", + "model.layers.33.post_attention_layernorm.weight": "model-00026-of-00061.safetensors", + "model.layers.33.self_attn.k_proj.weight": "model-00025-of-00061.safetensors", + "model.layers.33.self_attn.o_proj.weight": "model-00025-of-00061.safetensors", + "model.layers.33.self_attn.q_proj.weight": "model-00025-of-00061.safetensors", + "model.layers.33.self_attn.v_proj.weight": "model-00025-of-00061.safetensors", + "model.layers.34.input_layernorm.weight": "model-00027-of-00061.safetensors", + "model.layers.34.mlp.down_proj.weight": "model-00027-of-00061.safetensors", + "model.layers.34.mlp.gate_proj.weight": "model-00026-of-00061.safetensors", + "model.layers.34.mlp.up_proj.weight": "model-00027-of-00061.safetensors", + "model.layers.34.post_attention_layernorm.weight": "model-00027-of-00061.safetensors", + "model.layers.34.self_attn.k_proj.weight": "model-00026-of-00061.safetensors", + "model.layers.34.self_attn.o_proj.weight": "model-00026-of-00061.safetensors", + "model.layers.34.self_attn.q_proj.weight": "model-00026-of-00061.safetensors", + "model.layers.34.self_attn.v_proj.weight": "model-00026-of-00061.safetensors", + "model.layers.35.input_layernorm.weight": "model-00028-of-00061.safetensors", + "model.layers.35.mlp.down_proj.weight": "model-00028-of-00061.safetensors", + "model.layers.35.mlp.gate_proj.weight": "model-00027-of-00061.safetensors", + "model.layers.35.mlp.up_proj.weight": "model-00027-of-00061.safetensors", + "model.layers.35.post_attention_layernorm.weight": "model-00028-of-00061.safetensors", + "model.layers.35.self_attn.k_proj.weight": "model-00027-of-00061.safetensors", + "model.layers.35.self_attn.o_proj.weight": "model-00027-of-00061.safetensors", + "model.layers.35.self_attn.q_proj.weight": "model-00027-of-00061.safetensors", + "model.layers.35.self_attn.v_proj.weight": "model-00027-of-00061.safetensors", + "model.layers.36.input_layernorm.weight": "model-00028-of-00061.safetensors", + "model.layers.36.mlp.down_proj.weight": "model-00028-of-00061.safetensors", + "model.layers.36.mlp.gate_proj.weight": "model-00028-of-00061.safetensors", + "model.layers.36.mlp.up_proj.weight": "model-00028-of-00061.safetensors", + "model.layers.36.post_attention_layernorm.weight": "model-00028-of-00061.safetensors", + "model.layers.36.self_attn.k_proj.weight": "model-00028-of-00061.safetensors", + "model.layers.36.self_attn.o_proj.weight": "model-00028-of-00061.safetensors", + "model.layers.36.self_attn.q_proj.weight": "model-00028-of-00061.safetensors", + "model.layers.36.self_attn.v_proj.weight": "model-00028-of-00061.safetensors", + "model.layers.37.input_layernorm.weight": "model-00029-of-00061.safetensors", + "model.layers.37.mlp.down_proj.weight": "model-00029-of-00061.safetensors", + "model.layers.37.mlp.gate_proj.weight": "model-00029-of-00061.safetensors", + "model.layers.37.mlp.up_proj.weight": "model-00029-of-00061.safetensors", + "model.layers.37.post_attention_layernorm.weight": "model-00029-of-00061.safetensors", + "model.layers.37.self_attn.k_proj.weight": "model-00028-of-00061.safetensors", + "model.layers.37.self_attn.o_proj.weight": "model-00028-of-00061.safetensors", + "model.layers.37.self_attn.q_proj.weight": "model-00028-of-00061.safetensors", + "model.layers.37.self_attn.v_proj.weight": "model-00028-of-00061.safetensors", + "model.layers.38.input_layernorm.weight": "model-00030-of-00061.safetensors", + "model.layers.38.mlp.down_proj.weight": "model-00030-of-00061.safetensors", + "model.layers.38.mlp.gate_proj.weight": "model-00029-of-00061.safetensors", + "model.layers.38.mlp.up_proj.weight": "model-00030-of-00061.safetensors", + "model.layers.38.post_attention_layernorm.weight": "model-00030-of-00061.safetensors", + "model.layers.38.self_attn.k_proj.weight": "model-00029-of-00061.safetensors", + "model.layers.38.self_attn.o_proj.weight": "model-00029-of-00061.safetensors", + "model.layers.38.self_attn.q_proj.weight": "model-00029-of-00061.safetensors", + "model.layers.38.self_attn.v_proj.weight": "model-00029-of-00061.safetensors", + "model.layers.39.input_layernorm.weight": "model-00031-of-00061.safetensors", + "model.layers.39.mlp.down_proj.weight": "model-00031-of-00061.safetensors", + "model.layers.39.mlp.gate_proj.weight": "model-00030-of-00061.safetensors", + "model.layers.39.mlp.up_proj.weight": "model-00030-of-00061.safetensors", + "model.layers.39.post_attention_layernorm.weight": "model-00031-of-00061.safetensors", + "model.layers.39.self_attn.k_proj.weight": "model-00030-of-00061.safetensors", + "model.layers.39.self_attn.o_proj.weight": "model-00030-of-00061.safetensors", + "model.layers.39.self_attn.q_proj.weight": "model-00030-of-00061.safetensors", + "model.layers.39.self_attn.v_proj.weight": "model-00030-of-00061.safetensors", + "model.layers.4.input_layernorm.weight": "model-00004-of-00061.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00004-of-00061.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00004-of-00061.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00004-of-00061.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00004-of-00061.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00004-of-00061.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00004-of-00061.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00004-of-00061.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00004-of-00061.safetensors", + "model.layers.40.input_layernorm.weight": "model-00031-of-00061.safetensors", + "model.layers.40.mlp.down_proj.weight": "model-00031-of-00061.safetensors", + "model.layers.40.mlp.gate_proj.weight": "model-00031-of-00061.safetensors", + "model.layers.40.mlp.up_proj.weight": "model-00031-of-00061.safetensors", + "model.layers.40.post_attention_layernorm.weight": "model-00031-of-00061.safetensors", + "model.layers.40.self_attn.k_proj.weight": "model-00031-of-00061.safetensors", + "model.layers.40.self_attn.o_proj.weight": "model-00031-of-00061.safetensors", + "model.layers.40.self_attn.q_proj.weight": "model-00031-of-00061.safetensors", + "model.layers.40.self_attn.v_proj.weight": "model-00031-of-00061.safetensors", + "model.layers.41.input_layernorm.weight": "model-00032-of-00061.safetensors", + "model.layers.41.mlp.down_proj.weight": "model-00032-of-00061.safetensors", + "model.layers.41.mlp.gate_proj.weight": "model-00032-of-00061.safetensors", + "model.layers.41.mlp.up_proj.weight": "model-00032-of-00061.safetensors", + "model.layers.41.post_attention_layernorm.weight": "model-00032-of-00061.safetensors", + "model.layers.41.self_attn.k_proj.weight": "model-00031-of-00061.safetensors", + "model.layers.41.self_attn.o_proj.weight": "model-00031-of-00061.safetensors", + "model.layers.41.self_attn.q_proj.weight": "model-00031-of-00061.safetensors", + "model.layers.41.self_attn.v_proj.weight": "model-00031-of-00061.safetensors", + "model.layers.42.input_layernorm.weight": "model-00033-of-00061.safetensors", + "model.layers.42.mlp.down_proj.weight": "model-00033-of-00061.safetensors", + "model.layers.42.mlp.gate_proj.weight": "model-00032-of-00061.safetensors", + "model.layers.42.mlp.up_proj.weight": "model-00033-of-00061.safetensors", + "model.layers.42.post_attention_layernorm.weight": "model-00033-of-00061.safetensors", + "model.layers.42.self_attn.k_proj.weight": "model-00032-of-00061.safetensors", + "model.layers.42.self_attn.o_proj.weight": "model-00032-of-00061.safetensors", + "model.layers.42.self_attn.q_proj.weight": "model-00032-of-00061.safetensors", + "model.layers.42.self_attn.v_proj.weight": "model-00032-of-00061.safetensors", + "model.layers.43.input_layernorm.weight": "model-00034-of-00061.safetensors", + "model.layers.43.mlp.down_proj.weight": "model-00034-of-00061.safetensors", + "model.layers.43.mlp.gate_proj.weight": "model-00033-of-00061.safetensors", + "model.layers.43.mlp.up_proj.weight": "model-00033-of-00061.safetensors", + "model.layers.43.post_attention_layernorm.weight": "model-00034-of-00061.safetensors", + "model.layers.43.self_attn.k_proj.weight": "model-00033-of-00061.safetensors", + "model.layers.43.self_attn.o_proj.weight": "model-00033-of-00061.safetensors", + "model.layers.43.self_attn.q_proj.weight": "model-00033-of-00061.safetensors", + "model.layers.43.self_attn.v_proj.weight": "model-00033-of-00061.safetensors", + "model.layers.44.input_layernorm.weight": "model-00034-of-00061.safetensors", + "model.layers.44.mlp.down_proj.weight": "model-00034-of-00061.safetensors", + "model.layers.44.mlp.gate_proj.weight": "model-00034-of-00061.safetensors", + "model.layers.44.mlp.up_proj.weight": "model-00034-of-00061.safetensors", + "model.layers.44.post_attention_layernorm.weight": "model-00034-of-00061.safetensors", + "model.layers.44.self_attn.k_proj.weight": "model-00034-of-00061.safetensors", + "model.layers.44.self_attn.o_proj.weight": "model-00034-of-00061.safetensors", + "model.layers.44.self_attn.q_proj.weight": "model-00034-of-00061.safetensors", + "model.layers.44.self_attn.v_proj.weight": "model-00034-of-00061.safetensors", + "model.layers.45.input_layernorm.weight": "model-00035-of-00061.safetensors", + "model.layers.45.mlp.down_proj.weight": "model-00035-of-00061.safetensors", + "model.layers.45.mlp.gate_proj.weight": "model-00035-of-00061.safetensors", + "model.layers.45.mlp.up_proj.weight": "model-00035-of-00061.safetensors", + "model.layers.45.post_attention_layernorm.weight": "model-00035-of-00061.safetensors", + "model.layers.45.self_attn.k_proj.weight": "model-00034-of-00061.safetensors", + "model.layers.45.self_attn.o_proj.weight": "model-00034-of-00061.safetensors", + "model.layers.45.self_attn.q_proj.weight": "model-00034-of-00061.safetensors", + "model.layers.45.self_attn.v_proj.weight": "model-00034-of-00061.safetensors", + "model.layers.46.input_layernorm.weight": "model-00036-of-00061.safetensors", + "model.layers.46.mlp.down_proj.weight": "model-00036-of-00061.safetensors", + "model.layers.46.mlp.gate_proj.weight": "model-00035-of-00061.safetensors", + "model.layers.46.mlp.up_proj.weight": "model-00036-of-00061.safetensors", + "model.layers.46.post_attention_layernorm.weight": "model-00036-of-00061.safetensors", + "model.layers.46.self_attn.k_proj.weight": "model-00035-of-00061.safetensors", + "model.layers.46.self_attn.o_proj.weight": "model-00035-of-00061.safetensors", + "model.layers.46.self_attn.q_proj.weight": "model-00035-of-00061.safetensors", + "model.layers.46.self_attn.v_proj.weight": "model-00035-of-00061.safetensors", + "model.layers.47.input_layernorm.weight": "model-00037-of-00061.safetensors", + "model.layers.47.mlp.down_proj.weight": "model-00037-of-00061.safetensors", + "model.layers.47.mlp.gate_proj.weight": "model-00036-of-00061.safetensors", + "model.layers.47.mlp.up_proj.weight": "model-00036-of-00061.safetensors", + "model.layers.47.post_attention_layernorm.weight": "model-00037-of-00061.safetensors", + "model.layers.47.self_attn.k_proj.weight": "model-00036-of-00061.safetensors", + "model.layers.47.self_attn.o_proj.weight": "model-00036-of-00061.safetensors", + "model.layers.47.self_attn.q_proj.weight": "model-00036-of-00061.safetensors", + "model.layers.47.self_attn.v_proj.weight": "model-00036-of-00061.safetensors", + "model.layers.48.input_layernorm.weight": "model-00037-of-00061.safetensors", + "model.layers.48.mlp.down_proj.weight": "model-00037-of-00061.safetensors", + "model.layers.48.mlp.gate_proj.weight": "model-00037-of-00061.safetensors", + "model.layers.48.mlp.up_proj.weight": "model-00037-of-00061.safetensors", + "model.layers.48.post_attention_layernorm.weight": "model-00037-of-00061.safetensors", + "model.layers.48.self_attn.k_proj.weight": "model-00037-of-00061.safetensors", + "model.layers.48.self_attn.o_proj.weight": "model-00037-of-00061.safetensors", + "model.layers.48.self_attn.q_proj.weight": "model-00037-of-00061.safetensors", + "model.layers.48.self_attn.v_proj.weight": "model-00037-of-00061.safetensors", + "model.layers.49.input_layernorm.weight": "model-00038-of-00061.safetensors", + "model.layers.49.mlp.down_proj.weight": "model-00038-of-00061.safetensors", + "model.layers.49.mlp.gate_proj.weight": "model-00038-of-00061.safetensors", + "model.layers.49.mlp.up_proj.weight": "model-00038-of-00061.safetensors", + "model.layers.49.post_attention_layernorm.weight": "model-00038-of-00061.safetensors", + "model.layers.49.self_attn.k_proj.weight": "model-00037-of-00061.safetensors", + "model.layers.49.self_attn.o_proj.weight": "model-00037-of-00061.safetensors", + "model.layers.49.self_attn.q_proj.weight": "model-00037-of-00061.safetensors", + "model.layers.49.self_attn.v_proj.weight": "model-00037-of-00061.safetensors", + "model.layers.5.input_layernorm.weight": "model-00005-of-00061.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00005-of-00061.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00005-of-00061.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00005-of-00061.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00005-of-00061.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00004-of-00061.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00004-of-00061.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00004-of-00061.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00004-of-00061.safetensors", + "model.layers.50.input_layernorm.weight": "model-00039-of-00061.safetensors", + "model.layers.50.mlp.down_proj.weight": "model-00039-of-00061.safetensors", + "model.layers.50.mlp.gate_proj.weight": "model-00038-of-00061.safetensors", + "model.layers.50.mlp.up_proj.weight": "model-00039-of-00061.safetensors", + "model.layers.50.post_attention_layernorm.weight": "model-00039-of-00061.safetensors", + "model.layers.50.self_attn.k_proj.weight": "model-00038-of-00061.safetensors", + "model.layers.50.self_attn.o_proj.weight": "model-00038-of-00061.safetensors", + "model.layers.50.self_attn.q_proj.weight": "model-00038-of-00061.safetensors", + "model.layers.50.self_attn.v_proj.weight": "model-00038-of-00061.safetensors", + "model.layers.51.input_layernorm.weight": "model-00040-of-00061.safetensors", + "model.layers.51.mlp.down_proj.weight": "model-00040-of-00061.safetensors", + "model.layers.51.mlp.gate_proj.weight": "model-00039-of-00061.safetensors", + "model.layers.51.mlp.up_proj.weight": "model-00039-of-00061.safetensors", + "model.layers.51.post_attention_layernorm.weight": "model-00040-of-00061.safetensors", + "model.layers.51.self_attn.k_proj.weight": "model-00039-of-00061.safetensors", + "model.layers.51.self_attn.o_proj.weight": "model-00039-of-00061.safetensors", + "model.layers.51.self_attn.q_proj.weight": "model-00039-of-00061.safetensors", + "model.layers.51.self_attn.v_proj.weight": "model-00039-of-00061.safetensors", + "model.layers.52.input_layernorm.weight": "model-00040-of-00061.safetensors", + "model.layers.52.mlp.down_proj.weight": "model-00040-of-00061.safetensors", + "model.layers.52.mlp.gate_proj.weight": "model-00040-of-00061.safetensors", + "model.layers.52.mlp.up_proj.weight": "model-00040-of-00061.safetensors", + "model.layers.52.post_attention_layernorm.weight": "model-00040-of-00061.safetensors", + "model.layers.52.self_attn.k_proj.weight": "model-00040-of-00061.safetensors", + "model.layers.52.self_attn.o_proj.weight": "model-00040-of-00061.safetensors", + "model.layers.52.self_attn.q_proj.weight": "model-00040-of-00061.safetensors", + "model.layers.52.self_attn.v_proj.weight": "model-00040-of-00061.safetensors", + "model.layers.53.input_layernorm.weight": "model-00041-of-00061.safetensors", + "model.layers.53.mlp.down_proj.weight": "model-00041-of-00061.safetensors", + "model.layers.53.mlp.gate_proj.weight": "model-00041-of-00061.safetensors", + "model.layers.53.mlp.up_proj.weight": "model-00041-of-00061.safetensors", + "model.layers.53.post_attention_layernorm.weight": "model-00041-of-00061.safetensors", + "model.layers.53.self_attn.k_proj.weight": "model-00040-of-00061.safetensors", + "model.layers.53.self_attn.o_proj.weight": "model-00040-of-00061.safetensors", + "model.layers.53.self_attn.q_proj.weight": "model-00040-of-00061.safetensors", + "model.layers.53.self_attn.v_proj.weight": "model-00040-of-00061.safetensors", + "model.layers.54.input_layernorm.weight": "model-00042-of-00061.safetensors", + "model.layers.54.mlp.down_proj.weight": "model-00042-of-00061.safetensors", + "model.layers.54.mlp.gate_proj.weight": "model-00041-of-00061.safetensors", + "model.layers.54.mlp.up_proj.weight": "model-00042-of-00061.safetensors", + "model.layers.54.post_attention_layernorm.weight": "model-00042-of-00061.safetensors", + "model.layers.54.self_attn.k_proj.weight": "model-00041-of-00061.safetensors", + "model.layers.54.self_attn.o_proj.weight": "model-00041-of-00061.safetensors", + "model.layers.54.self_attn.q_proj.weight": "model-00041-of-00061.safetensors", + "model.layers.54.self_attn.v_proj.weight": "model-00041-of-00061.safetensors", + "model.layers.55.input_layernorm.weight": "model-00043-of-00061.safetensors", + "model.layers.55.mlp.down_proj.weight": "model-00043-of-00061.safetensors", + "model.layers.55.mlp.gate_proj.weight": "model-00042-of-00061.safetensors", + "model.layers.55.mlp.up_proj.weight": "model-00042-of-00061.safetensors", + "model.layers.55.post_attention_layernorm.weight": "model-00043-of-00061.safetensors", + "model.layers.55.self_attn.k_proj.weight": "model-00042-of-00061.safetensors", + "model.layers.55.self_attn.o_proj.weight": "model-00042-of-00061.safetensors", + "model.layers.55.self_attn.q_proj.weight": "model-00042-of-00061.safetensors", + "model.layers.55.self_attn.v_proj.weight": "model-00042-of-00061.safetensors", + "model.layers.56.input_layernorm.weight": "model-00043-of-00061.safetensors", + "model.layers.56.mlp.down_proj.weight": "model-00043-of-00061.safetensors", + "model.layers.56.mlp.gate_proj.weight": "model-00043-of-00061.safetensors", + "model.layers.56.mlp.up_proj.weight": "model-00043-of-00061.safetensors", + "model.layers.56.post_attention_layernorm.weight": "model-00043-of-00061.safetensors", + "model.layers.56.self_attn.k_proj.weight": "model-00043-of-00061.safetensors", + "model.layers.56.self_attn.o_proj.weight": "model-00043-of-00061.safetensors", + "model.layers.56.self_attn.q_proj.weight": "model-00043-of-00061.safetensors", + "model.layers.56.self_attn.v_proj.weight": "model-00043-of-00061.safetensors", + "model.layers.57.input_layernorm.weight": "model-00044-of-00061.safetensors", + "model.layers.57.mlp.down_proj.weight": "model-00044-of-00061.safetensors", + "model.layers.57.mlp.gate_proj.weight": "model-00044-of-00061.safetensors", + "model.layers.57.mlp.up_proj.weight": "model-00044-of-00061.safetensors", + "model.layers.57.post_attention_layernorm.weight": "model-00044-of-00061.safetensors", + "model.layers.57.self_attn.k_proj.weight": "model-00043-of-00061.safetensors", + "model.layers.57.self_attn.o_proj.weight": "model-00043-of-00061.safetensors", + "model.layers.57.self_attn.q_proj.weight": "model-00043-of-00061.safetensors", + "model.layers.57.self_attn.v_proj.weight": "model-00043-of-00061.safetensors", + "model.layers.58.input_layernorm.weight": "model-00045-of-00061.safetensors", + "model.layers.58.mlp.down_proj.weight": "model-00045-of-00061.safetensors", + "model.layers.58.mlp.gate_proj.weight": "model-00044-of-00061.safetensors", + "model.layers.58.mlp.up_proj.weight": "model-00045-of-00061.safetensors", + "model.layers.58.post_attention_layernorm.weight": "model-00045-of-00061.safetensors", + "model.layers.58.self_attn.k_proj.weight": "model-00044-of-00061.safetensors", + "model.layers.58.self_attn.o_proj.weight": "model-00044-of-00061.safetensors", + "model.layers.58.self_attn.q_proj.weight": "model-00044-of-00061.safetensors", + "model.layers.58.self_attn.v_proj.weight": "model-00044-of-00061.safetensors", + "model.layers.59.input_layernorm.weight": "model-00046-of-00061.safetensors", + "model.layers.59.mlp.down_proj.weight": "model-00046-of-00061.safetensors", + "model.layers.59.mlp.gate_proj.weight": "model-00045-of-00061.safetensors", + "model.layers.59.mlp.up_proj.weight": "model-00045-of-00061.safetensors", + "model.layers.59.post_attention_layernorm.weight": "model-00046-of-00061.safetensors", + "model.layers.59.self_attn.k_proj.weight": "model-00045-of-00061.safetensors", + "model.layers.59.self_attn.o_proj.weight": "model-00045-of-00061.safetensors", + "model.layers.59.self_attn.q_proj.weight": "model-00045-of-00061.safetensors", + "model.layers.59.self_attn.v_proj.weight": "model-00045-of-00061.safetensors", + "model.layers.6.input_layernorm.weight": "model-00006-of-00061.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00006-of-00061.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00005-of-00061.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00006-of-00061.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00006-of-00061.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00005-of-00061.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00005-of-00061.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00005-of-00061.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00005-of-00061.safetensors", + "model.layers.60.input_layernorm.weight": "model-00046-of-00061.safetensors", + "model.layers.60.mlp.down_proj.weight": "model-00046-of-00061.safetensors", + "model.layers.60.mlp.gate_proj.weight": "model-00046-of-00061.safetensors", + "model.layers.60.mlp.up_proj.weight": "model-00046-of-00061.safetensors", + "model.layers.60.post_attention_layernorm.weight": "model-00046-of-00061.safetensors", + "model.layers.60.self_attn.k_proj.weight": "model-00046-of-00061.safetensors", + "model.layers.60.self_attn.o_proj.weight": "model-00046-of-00061.safetensors", + "model.layers.60.self_attn.q_proj.weight": "model-00046-of-00061.safetensors", + "model.layers.60.self_attn.v_proj.weight": "model-00046-of-00061.safetensors", + "model.layers.61.input_layernorm.weight": "model-00047-of-00061.safetensors", + "model.layers.61.mlp.down_proj.weight": "model-00047-of-00061.safetensors", + "model.layers.61.mlp.gate_proj.weight": "model-00047-of-00061.safetensors", + "model.layers.61.mlp.up_proj.weight": "model-00047-of-00061.safetensors", + "model.layers.61.post_attention_layernorm.weight": "model-00047-of-00061.safetensors", + "model.layers.61.self_attn.k_proj.weight": "model-00046-of-00061.safetensors", + "model.layers.61.self_attn.o_proj.weight": "model-00046-of-00061.safetensors", + "model.layers.61.self_attn.q_proj.weight": "model-00046-of-00061.safetensors", + "model.layers.61.self_attn.v_proj.weight": "model-00046-of-00061.safetensors", + "model.layers.62.input_layernorm.weight": "model-00048-of-00061.safetensors", + "model.layers.62.mlp.down_proj.weight": "model-00048-of-00061.safetensors", + "model.layers.62.mlp.gate_proj.weight": "model-00047-of-00061.safetensors", + "model.layers.62.mlp.up_proj.weight": "model-00048-of-00061.safetensors", + "model.layers.62.post_attention_layernorm.weight": "model-00048-of-00061.safetensors", + "model.layers.62.self_attn.k_proj.weight": "model-00047-of-00061.safetensors", + "model.layers.62.self_attn.o_proj.weight": "model-00047-of-00061.safetensors", + "model.layers.62.self_attn.q_proj.weight": "model-00047-of-00061.safetensors", + "model.layers.62.self_attn.v_proj.weight": "model-00047-of-00061.safetensors", + "model.layers.63.input_layernorm.weight": "model-00049-of-00061.safetensors", + "model.layers.63.mlp.down_proj.weight": "model-00049-of-00061.safetensors", + "model.layers.63.mlp.gate_proj.weight": "model-00048-of-00061.safetensors", + "model.layers.63.mlp.up_proj.weight": "model-00048-of-00061.safetensors", + "model.layers.63.post_attention_layernorm.weight": "model-00049-of-00061.safetensors", + "model.layers.63.self_attn.k_proj.weight": "model-00048-of-00061.safetensors", + "model.layers.63.self_attn.o_proj.weight": "model-00048-of-00061.safetensors", + "model.layers.63.self_attn.q_proj.weight": "model-00048-of-00061.safetensors", + "model.layers.63.self_attn.v_proj.weight": "model-00048-of-00061.safetensors", + "model.layers.64.input_layernorm.weight": "model-00049-of-00061.safetensors", + "model.layers.64.mlp.down_proj.weight": "model-00049-of-00061.safetensors", + "model.layers.64.mlp.gate_proj.weight": "model-00049-of-00061.safetensors", + "model.layers.64.mlp.up_proj.weight": "model-00049-of-00061.safetensors", + "model.layers.64.post_attention_layernorm.weight": "model-00049-of-00061.safetensors", + "model.layers.64.self_attn.k_proj.weight": "model-00049-of-00061.safetensors", + "model.layers.64.self_attn.o_proj.weight": "model-00049-of-00061.safetensors", + "model.layers.64.self_attn.q_proj.weight": "model-00049-of-00061.safetensors", + "model.layers.64.self_attn.v_proj.weight": "model-00049-of-00061.safetensors", + "model.layers.65.input_layernorm.weight": "model-00050-of-00061.safetensors", + "model.layers.65.mlp.down_proj.weight": "model-00050-of-00061.safetensors", + "model.layers.65.mlp.gate_proj.weight": "model-00050-of-00061.safetensors", + "model.layers.65.mlp.up_proj.weight": "model-00050-of-00061.safetensors", + "model.layers.65.post_attention_layernorm.weight": "model-00050-of-00061.safetensors", + "model.layers.65.self_attn.k_proj.weight": "model-00049-of-00061.safetensors", + "model.layers.65.self_attn.o_proj.weight": "model-00049-of-00061.safetensors", + "model.layers.65.self_attn.q_proj.weight": "model-00049-of-00061.safetensors", + "model.layers.65.self_attn.v_proj.weight": "model-00049-of-00061.safetensors", + "model.layers.66.input_layernorm.weight": "model-00051-of-00061.safetensors", + "model.layers.66.mlp.down_proj.weight": "model-00051-of-00061.safetensors", + "model.layers.66.mlp.gate_proj.weight": "model-00050-of-00061.safetensors", + "model.layers.66.mlp.up_proj.weight": "model-00051-of-00061.safetensors", + "model.layers.66.post_attention_layernorm.weight": "model-00051-of-00061.safetensors", + "model.layers.66.self_attn.k_proj.weight": "model-00050-of-00061.safetensors", + "model.layers.66.self_attn.o_proj.weight": "model-00050-of-00061.safetensors", + "model.layers.66.self_attn.q_proj.weight": "model-00050-of-00061.safetensors", + "model.layers.66.self_attn.v_proj.weight": "model-00050-of-00061.safetensors", + "model.layers.67.input_layernorm.weight": "model-00052-of-00061.safetensors", + "model.layers.67.mlp.down_proj.weight": "model-00052-of-00061.safetensors", + "model.layers.67.mlp.gate_proj.weight": "model-00051-of-00061.safetensors", + "model.layers.67.mlp.up_proj.weight": "model-00051-of-00061.safetensors", + "model.layers.67.post_attention_layernorm.weight": "model-00052-of-00061.safetensors", + "model.layers.67.self_attn.k_proj.weight": "model-00051-of-00061.safetensors", + "model.layers.67.self_attn.o_proj.weight": "model-00051-of-00061.safetensors", + "model.layers.67.self_attn.q_proj.weight": "model-00051-of-00061.safetensors", + "model.layers.67.self_attn.v_proj.weight": "model-00051-of-00061.safetensors", + "model.layers.68.input_layernorm.weight": "model-00052-of-00061.safetensors", + "model.layers.68.mlp.down_proj.weight": "model-00052-of-00061.safetensors", + "model.layers.68.mlp.gate_proj.weight": "model-00052-of-00061.safetensors", + "model.layers.68.mlp.up_proj.weight": "model-00052-of-00061.safetensors", + "model.layers.68.post_attention_layernorm.weight": "model-00052-of-00061.safetensors", + "model.layers.68.self_attn.k_proj.weight": "model-00052-of-00061.safetensors", + "model.layers.68.self_attn.o_proj.weight": "model-00052-of-00061.safetensors", + "model.layers.68.self_attn.q_proj.weight": "model-00052-of-00061.safetensors", + "model.layers.68.self_attn.v_proj.weight": "model-00052-of-00061.safetensors", + "model.layers.69.input_layernorm.weight": "model-00053-of-00061.safetensors", + "model.layers.69.mlp.down_proj.weight": "model-00053-of-00061.safetensors", + "model.layers.69.mlp.gate_proj.weight": "model-00053-of-00061.safetensors", + "model.layers.69.mlp.up_proj.weight": "model-00053-of-00061.safetensors", + "model.layers.69.post_attention_layernorm.weight": "model-00053-of-00061.safetensors", + "model.layers.69.self_attn.k_proj.weight": "model-00052-of-00061.safetensors", + "model.layers.69.self_attn.o_proj.weight": "model-00052-of-00061.safetensors", + "model.layers.69.self_attn.q_proj.weight": "model-00052-of-00061.safetensors", + "model.layers.69.self_attn.v_proj.weight": "model-00052-of-00061.safetensors", + "model.layers.7.input_layernorm.weight": "model-00007-of-00061.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00007-of-00061.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00006-of-00061.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00006-of-00061.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00007-of-00061.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00006-of-00061.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00006-of-00061.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00006-of-00061.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00006-of-00061.safetensors", + "model.layers.70.input_layernorm.weight": "model-00054-of-00061.safetensors", + "model.layers.70.mlp.down_proj.weight": "model-00054-of-00061.safetensors", + "model.layers.70.mlp.gate_proj.weight": "model-00053-of-00061.safetensors", + "model.layers.70.mlp.up_proj.weight": "model-00054-of-00061.safetensors", + "model.layers.70.post_attention_layernorm.weight": "model-00054-of-00061.safetensors", + "model.layers.70.self_attn.k_proj.weight": "model-00053-of-00061.safetensors", + "model.layers.70.self_attn.o_proj.weight": "model-00053-of-00061.safetensors", + "model.layers.70.self_attn.q_proj.weight": "model-00053-of-00061.safetensors", + "model.layers.70.self_attn.v_proj.weight": "model-00053-of-00061.safetensors", + "model.layers.71.input_layernorm.weight": "model-00055-of-00061.safetensors", + "model.layers.71.mlp.down_proj.weight": "model-00055-of-00061.safetensors", + "model.layers.71.mlp.gate_proj.weight": "model-00054-of-00061.safetensors", + "model.layers.71.mlp.up_proj.weight": "model-00054-of-00061.safetensors", + "model.layers.71.post_attention_layernorm.weight": "model-00055-of-00061.safetensors", + "model.layers.71.self_attn.k_proj.weight": "model-00054-of-00061.safetensors", + "model.layers.71.self_attn.o_proj.weight": "model-00054-of-00061.safetensors", + "model.layers.71.self_attn.q_proj.weight": "model-00054-of-00061.safetensors", + "model.layers.71.self_attn.v_proj.weight": "model-00054-of-00061.safetensors", + "model.layers.72.input_layernorm.weight": "model-00055-of-00061.safetensors", + "model.layers.72.mlp.down_proj.weight": "model-00055-of-00061.safetensors", + "model.layers.72.mlp.gate_proj.weight": "model-00055-of-00061.safetensors", + "model.layers.72.mlp.up_proj.weight": "model-00055-of-00061.safetensors", + "model.layers.72.post_attention_layernorm.weight": "model-00055-of-00061.safetensors", + "model.layers.72.self_attn.k_proj.weight": "model-00055-of-00061.safetensors", + "model.layers.72.self_attn.o_proj.weight": "model-00055-of-00061.safetensors", + "model.layers.72.self_attn.q_proj.weight": "model-00055-of-00061.safetensors", + "model.layers.72.self_attn.v_proj.weight": "model-00055-of-00061.safetensors", + "model.layers.73.input_layernorm.weight": "model-00056-of-00061.safetensors", + "model.layers.73.mlp.down_proj.weight": "model-00056-of-00061.safetensors", + "model.layers.73.mlp.gate_proj.weight": "model-00056-of-00061.safetensors", + "model.layers.73.mlp.up_proj.weight": "model-00056-of-00061.safetensors", + "model.layers.73.post_attention_layernorm.weight": "model-00056-of-00061.safetensors", + "model.layers.73.self_attn.k_proj.weight": "model-00055-of-00061.safetensors", + "model.layers.73.self_attn.o_proj.weight": "model-00055-of-00061.safetensors", + "model.layers.73.self_attn.q_proj.weight": "model-00055-of-00061.safetensors", + "model.layers.73.self_attn.v_proj.weight": "model-00055-of-00061.safetensors", + "model.layers.74.input_layernorm.weight": "model-00057-of-00061.safetensors", + "model.layers.74.mlp.down_proj.weight": "model-00057-of-00061.safetensors", + "model.layers.74.mlp.gate_proj.weight": "model-00056-of-00061.safetensors", + "model.layers.74.mlp.up_proj.weight": "model-00057-of-00061.safetensors", + "model.layers.74.post_attention_layernorm.weight": "model-00057-of-00061.safetensors", + "model.layers.74.self_attn.k_proj.weight": "model-00056-of-00061.safetensors", + "model.layers.74.self_attn.o_proj.weight": "model-00056-of-00061.safetensors", + "model.layers.74.self_attn.q_proj.weight": "model-00056-of-00061.safetensors", + "model.layers.74.self_attn.v_proj.weight": "model-00056-of-00061.safetensors", + "model.layers.75.input_layernorm.weight": "model-00058-of-00061.safetensors", + "model.layers.75.mlp.down_proj.weight": "model-00058-of-00061.safetensors", + "model.layers.75.mlp.gate_proj.weight": "model-00057-of-00061.safetensors", + "model.layers.75.mlp.up_proj.weight": "model-00057-of-00061.safetensors", + "model.layers.75.post_attention_layernorm.weight": "model-00058-of-00061.safetensors", + "model.layers.75.self_attn.k_proj.weight": "model-00057-of-00061.safetensors", + "model.layers.75.self_attn.o_proj.weight": "model-00057-of-00061.safetensors", + "model.layers.75.self_attn.q_proj.weight": "model-00057-of-00061.safetensors", + "model.layers.75.self_attn.v_proj.weight": "model-00057-of-00061.safetensors", + "model.layers.76.input_layernorm.weight": "model-00058-of-00061.safetensors", + "model.layers.76.mlp.down_proj.weight": "model-00058-of-00061.safetensors", + "model.layers.76.mlp.gate_proj.weight": "model-00058-of-00061.safetensors", + "model.layers.76.mlp.up_proj.weight": "model-00058-of-00061.safetensors", + "model.layers.76.post_attention_layernorm.weight": "model-00058-of-00061.safetensors", + "model.layers.76.self_attn.k_proj.weight": "model-00058-of-00061.safetensors", + "model.layers.76.self_attn.o_proj.weight": "model-00058-of-00061.safetensors", + "model.layers.76.self_attn.q_proj.weight": "model-00058-of-00061.safetensors", + "model.layers.76.self_attn.v_proj.weight": "model-00058-of-00061.safetensors", + "model.layers.77.input_layernorm.weight": "model-00059-of-00061.safetensors", + "model.layers.77.mlp.down_proj.weight": "model-00059-of-00061.safetensors", + "model.layers.77.mlp.gate_proj.weight": "model-00059-of-00061.safetensors", + "model.layers.77.mlp.up_proj.weight": "model-00059-of-00061.safetensors", + "model.layers.77.post_attention_layernorm.weight": "model-00059-of-00061.safetensors", + "model.layers.77.self_attn.k_proj.weight": "model-00058-of-00061.safetensors", + "model.layers.77.self_attn.o_proj.weight": "model-00058-of-00061.safetensors", + "model.layers.77.self_attn.q_proj.weight": "model-00058-of-00061.safetensors", + "model.layers.77.self_attn.v_proj.weight": "model-00058-of-00061.safetensors", + "model.layers.78.input_layernorm.weight": "model-00060-of-00061.safetensors", + "model.layers.78.mlp.down_proj.weight": "model-00060-of-00061.safetensors", + "model.layers.78.mlp.gate_proj.weight": "model-00059-of-00061.safetensors", + "model.layers.78.mlp.up_proj.weight": "model-00060-of-00061.safetensors", + "model.layers.78.post_attention_layernorm.weight": "model-00060-of-00061.safetensors", + "model.layers.78.self_attn.k_proj.weight": "model-00059-of-00061.safetensors", + "model.layers.78.self_attn.o_proj.weight": "model-00059-of-00061.safetensors", + "model.layers.78.self_attn.q_proj.weight": "model-00059-of-00061.safetensors", + "model.layers.78.self_attn.v_proj.weight": "model-00059-of-00061.safetensors", + "model.layers.79.input_layernorm.weight": "model-00061-of-00061.safetensors", + "model.layers.79.mlp.down_proj.weight": "model-00061-of-00061.safetensors", + "model.layers.79.mlp.gate_proj.weight": "model-00060-of-00061.safetensors", + "model.layers.79.mlp.up_proj.weight": "model-00060-of-00061.safetensors", + "model.layers.79.post_attention_layernorm.weight": "model-00061-of-00061.safetensors", + "model.layers.79.self_attn.k_proj.weight": "model-00060-of-00061.safetensors", + "model.layers.79.self_attn.o_proj.weight": "model-00060-of-00061.safetensors", + "model.layers.79.self_attn.q_proj.weight": "model-00060-of-00061.safetensors", + "model.layers.79.self_attn.v_proj.weight": "model-00060-of-00061.safetensors", + "model.layers.8.input_layernorm.weight": "model-00007-of-00061.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00007-of-00061.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00007-of-00061.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00007-of-00061.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00007-of-00061.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00007-of-00061.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00007-of-00061.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00007-of-00061.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00007-of-00061.safetensors", + "model.layers.9.input_layernorm.weight": "model-00008-of-00061.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00008-of-00061.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00008-of-00061.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00008-of-00061.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00008-of-00061.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00007-of-00061.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00007-of-00061.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00007-of-00061.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00007-of-00061.safetensors", + "model.norm.weight": "model-00061-of-00061.safetensors" + } +}