diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00001-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00001-of-00080.bin deleted file mode 100644 index 7d1308f4a59ae1267cc0e3e1bc9d9659fd8967cb..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00001-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a81737a16884845a644acc5d2448c7855eededb415bf84bbf8e9fa1a084a2ca3 -size 9993536928 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00002-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00002-of-00080.bin deleted file mode 100644 index 14be14c622c7afd7d15f717e518bf293ede94d2d..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00002-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9170a0bfac9df23a61537b855cc8773efba28b306cab7c4cf6832a9080301449 -size 9795604562 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00003-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00003-of-00080.bin deleted file mode 100644 index d7e9731cd951af042ffebc6835f4094d66b5b3e7..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00003-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9c65195277a0430fe5279edbe09daa3d6eded81348aa038340973947ca14d9a7 -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00004-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00004-of-00080.bin deleted file mode 100644 index e8b777acd92ede746b59ebf1c2a6c2a54fa8ef2d..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00004-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:65d3267b296f4017f4fe1c37c63062623231ded1cac8e46ca6b1aca80b650e74 -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00005-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00005-of-00080.bin deleted file mode 100644 index 8e2262695c292afa95df9b206847c4e717229460..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00005-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9c3b6c982c2dd2979027ec4773833bc8bae5e1b350114a0c947f8073a4ec12c5 -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00006-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00006-of-00080.bin deleted file mode 100644 index a5a7dc7a59b41469d3b7c6ac6f7edb24c908a453..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00006-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9ed37b054c5d710f7a900e4c55b27ffd6a7601be1ed7f1d6885b7bc145106684 -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00007-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00007-of-00080.bin deleted file mode 100644 index 41391a510b5cfc72319e7a8981508dd41436b214..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00007-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ca0471b22144beaf3b76e68f544eb9ceacd33b9cbfecbf7f060e2a95a6e32dd0 -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00008-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00008-of-00080.bin deleted file mode 100644 index 4d02d0cd7c5a2ba48ddd0aad0af837a488563901..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00008-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0b4377452fbdf0d3b9093ae6162869f73820471314ffbd4b31d7982ee6730bd2 -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00009-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00009-of-00080.bin deleted file mode 100644 index 921264138527f1af87a5c6130e6fa50e07c8289d..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00009-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3d8af5bad16cd4c87730840d5541e7019b5b08c5b91cd980a03fa3f6e35d3fa5 -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00010-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00010-of-00080.bin deleted file mode 100644 index 6a7bb28e93af5ad618ea5c1a6706ddd23de527ab..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00010-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:99942b981d1abe3ccb2f893e0711549ed340bb0d1305bf996f30d001b4e6995e -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00011-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00011-of-00080.bin deleted file mode 100644 index c3e34956ab275aa1606e3ed82d4d59b7b033799f..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00011-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c44a73832630bd007367e7972d87325b3f5248084faf691904c1dd4117f1b897 -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00012-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00012-of-00080.bin deleted file mode 100644 index 9fab107dffb43a791688a5f77d9ff58b38ffe8c3..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00012-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:45126b382b19376c89893a2c7c530d0fb5cecaec4872d805480640b20557f83c -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00013-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00013-of-00080.bin deleted file mode 100644 index a6dd89db0eedd17bee690c3254be3261c564dd72..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00013-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6f5cc6c13dbcecc7f772204c2e220f5c25606b5d85e773859b63cf052e4b736a -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00014-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00014-of-00080.bin deleted file mode 100644 index b36b07650a30512ab5f3906a334bdaf2fe1efc9e..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00014-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:79f9600b46b7f7e18c761ef08231296efae726afd09129fc927654c5a505e0f4 -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00015-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00015-of-00080.bin deleted file mode 100644 index 66d1bd9283f99a68e15289af4e971ff05b3322e7..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00015-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:08f8612a53e5209a1523664386f7d42fa0656fae1cec1879e95602883efb8a3e -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00016-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00016-of-00080.bin deleted file mode 100644 index 27f78eb61e58c1b8a34b2dd7cd551c5c16cb6c22..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00016-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4899011871e897b2f8cd3a047720fb32067abbc12b554aaec6e5fb9c58029b45 -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00017-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00017-of-00080.bin deleted file mode 100644 index 8d678c26e04a3e77bed339d9733b552960fc9618..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00017-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ab34508eac8c9551697aca52d2b8d5f5a4f0f7cf85ebb4d3330b12705f665054 -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00018-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00018-of-00080.bin deleted file mode 100644 index b2e9e08cccc09d1413e5b595e1496e6569a573e9..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00018-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:98a9c9fa15edcfb9ae3d42c528a0ea09333892e3b0b70d1133c310229df72ed3 -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00019-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00019-of-00080.bin deleted file mode 100644 index c3a6f67e84842ec4b6f28b439dd2c6bf9ada727c..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00019-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:de9773f8aa4cd0a4a27122ed6cf779e92daea1d1b6d5740b9c89535ec87050df -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00020-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00020-of-00080.bin deleted file mode 100644 index 08c45510a9c1d0157f1f014daac0eb2bdb378d46..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00020-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d37dcea06ff3af07b15782bd430f09c604ad46d96c90bd3ac997e5834ef93c02 -size 8540480521 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00021-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00021-of-00080.bin deleted file mode 100644 index b7a91fdb420a6830461468e7528f907280db7072..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00021-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e16762c05cf46c9a1ee0b6a9a70d1e3da809f56846c912cdfeb511749ae66e42 -size 9994004482 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00022-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00022-of-00080.bin deleted file mode 100644 index 2b7d25091c9807ddcb03b970da22324002d94810..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00022-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2af3bd37351bc6df6ed1c3afeaeef37f8b368c7024525d3cd11ca88038a96751 -size 9783670222 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00023-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00023-of-00080.bin deleted file mode 100644 index a977862eb1f2a9dbc9763a9f527de875cd4eab37..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00023-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3ca96ec902f7e3d5dc772c526744e92b6359d1478ee0416431e5b624da2cc560 -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00024-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00024-of-00080.bin deleted file mode 100644 index 6e055bdd878ab6482bc05d664c4f5926d7224a02..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00024-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2ceb1603d0047ca3740a81c6881f9ebd8fb55c435654402d3c8167f90d7a5d73 -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00025-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00025-of-00080.bin deleted file mode 100644 index 4b26d6de9ef03cb32aa6b5226207b139fbb524e3..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00025-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:94a7581ac79e1eac60a5d35e46416c8e294c8b838f4a191d2bf5947f4264809a -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00026-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00026-of-00080.bin deleted file mode 100644 index 45ec661d1c9e282f9f5253752ecd4c7edbede032..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00026-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e447fe49646c36f59df9ae3a95484d10bfee4daed97032993e62b973f79394a6 -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00027-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00027-of-00080.bin deleted file mode 100644 index d300ab6354575d0fa2e3d4ee39425c8fd5fda722..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00027-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:82214b393e41872595cf5212b9197a9acf657e71fb1bbda5b8ace668b2509d5d -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00028-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00028-of-00080.bin deleted file mode 100644 index 8e472c63c11cd0083ef360cbefa79c9c28096806..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00028-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:af16a3287e072a6fca4f6561c991382e8242468b462fcef43327e02c99dded10 -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00029-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00029-of-00080.bin deleted file mode 100644 index 03785c9906c8d9d0a801722281992934fcc27860..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00029-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:aee479017687a23b19e6736d7460d2d3f7f12e43a2edd4b2348a50f66e0856dd -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00030-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00030-of-00080.bin deleted file mode 100644 index 64b48cce30787b9db7435a63dc26d3a7e84ec797..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00030-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5689239c7419cfee57635c54f01ec309287bd9f68deacd151b283762c7d7ab47 -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00031-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00031-of-00080.bin deleted file mode 100644 index 1717a94f3bbbc84f4cc58345ff25557382a7c05b..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00031-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4297cf8ff49f54eb110159e6ecee8dca5eaae3859972013389d91184515c714d -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00032-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00032-of-00080.bin deleted file mode 100644 index 452a3b1e0e57d48188f254b90d1fee89350d77cf..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00032-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:738ffad7bc9b3285d38c00c873cdd5ec38930da967dfa01cf4aa755e548b5304 -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00033-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00033-of-00080.bin deleted file mode 100644 index cae33824aab5b789a3a98f30a235a9586dc5bcac..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00033-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c1fc283196616c6c0f960ca1e64f0550355c82e46a366081a7632f4b455d83cd -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00034-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00034-of-00080.bin deleted file mode 100644 index 228ee31e0bbf99b70556d7f0c05c651a9034afb6..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00034-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c83dc33b20a975e3ce8c7b1d68c6127b58d2ee036e65135afd14631accfacb07 -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00035-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00035-of-00080.bin deleted file mode 100644 index 0c7731958d5fee44547478aa0dd73c6c33b06c29..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00035-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:835446a89dd2f3435387fe9bf0e19209683d006e1e4c86c1cc7ff914a95b7049 -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00036-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00036-of-00080.bin deleted file mode 100644 index e7529ae63527afc23feb3037b1c232e3b23a4411..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00036-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bc82443773eaeeccd0ac6faaa76ae8792473cffcd2bec4bd62d829e86749dc6d -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00037-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00037-of-00080.bin deleted file mode 100644 index de74d0df0a0006df14eab7fc0b60199ed6c675ec..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00037-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a73b04e4ee5d2712162966c32d23c2f289d6d9c26c7ae04c06e16241625565f5 -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00038-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00038-of-00080.bin deleted file mode 100644 index 10fa83f0afdef23c26079039c7bd192d3a7c9fa7..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00038-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d67e256e8fd1794ef7fabba906781c3fcad5a9de20cdc62089c9ac3d74407380 -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00039-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00039-of-00080.bin deleted file mode 100644 index 30f3576a9afd01185a0d9a0a387feaed7e3e72a7..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00039-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:848420a003e755b4fd41b011b8dfa0299907c2c129dcae6c608135fd9a8ae86c -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00040-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00040-of-00080.bin deleted file mode 100644 index 2d5dd9e71814345d2e1f700605a1a11bb374ab8f..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00040-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1c2ecf55380d549485b80ca411cf26184b5041d00937a1b822bab4876b4d519a -size 6900439728 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00041-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00041-of-00080.bin deleted file mode 100644 index 1709c66acbb332279781dfc01b288fafd8d26e9c..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00041-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:379aff74dc4ee8357ddbf620cb8d0ac95eb577ab95e0ebee2727be651ba04ad7 -size 9994004482 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00042-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00042-of-00080.bin deleted file mode 100644 index be00e47e1b9b6cb261027a9d2d4a7cf13fcfd221..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00042-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e7862a8af4401ac7fbac9e967d6aed83ff1be6a8f16903be4b4a6ecf1bfc581f -size 9783670222 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00043-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00043-of-00080.bin deleted file mode 100644 index d69b67a039a84751eccbb1a97340eb2eede6c0c5..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00043-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b74db285e0dd90d93206c7b9b74a77499dd7b88a5430f1f6633d4af2a82c947a -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00044-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00044-of-00080.bin deleted file mode 100644 index 6233db9d88544d7103f25e83f7dbb5f2cd3750c1..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00044-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d1d65aae53d946b14d4bec787430c217ad44b0fed5af02cc9c9b53a264a362df -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00045-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00045-of-00080.bin deleted file mode 100644 index ef61ee692eec7d46e888bbc18e9473d5bcdd3a8e..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00045-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:eaee1f4d3dc5960c4158faf76bd2dc25bca7dae62b7a17ad3ca3c1a361b724a1 -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00046-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00046-of-00080.bin deleted file mode 100644 index e324afabb3ea6b2530747953a6dd08e3703915cb..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00046-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a8ef667b5573515d900b9d1961b3aaa364f8cefd266794ec9030480e0bddd38b -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00047-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00047-of-00080.bin deleted file mode 100644 index 1694634edd09eea0901db097694bc50d1853097c..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00047-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1a86cc1336a53270314a39e2ef3c576098f1cb9401493f796086f4d2d5c30c4e -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00048-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00048-of-00080.bin deleted file mode 100644 index a55631af736cc904238a179d6e6a3dcab9b3f242..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00048-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:985a5ee1859c38a18d6ec2538698884b3f0e3821c5d6ffeb2f65c7f4ca40e4c5 -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00049-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00049-of-00080.bin deleted file mode 100644 index af8be579d4ae9c56adbdf40cccddb3db3933007b..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00049-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a7528c468ca7f4d3387de86887a203a1e8ef3904241ddd6b9488caf4bca9f764 -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00050-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00050-of-00080.bin deleted file mode 100644 index 21d65bae203efd3971733f116652bc7710447d79..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00050-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b1eda1b774e66fa837be795bc0162f012a52f79e96ab677fbdc32a944e352785 -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00051-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00051-of-00080.bin deleted file mode 100644 index 0d152279f91d30af3e646192d467b0c909fa6203..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00051-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c47aedc9451d345c172564df5b4dd0bfcb872043af4af54ac91738f28aa2d9ed -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00052-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00052-of-00080.bin deleted file mode 100644 index 1341b9b69657a9f6b98e93be90ddce209f8b7d9d..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00052-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2d2dfd29de86da63b5dad601fcb31b6c6c550d451b7fcd3ba602380cfacc2392 -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00053-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00053-of-00080.bin deleted file mode 100644 index ce854f43186d8cceb6f62b2db0f62fd92d174de6..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00053-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:237c977bedd66d1ed1c4cd0736eae52c8e2d2347e99951e7239ac8b0a0dde572 -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00054-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00054-of-00080.bin deleted file mode 100644 index 01f9f57fed71cd2ed441e8dd31434d684587a397..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00054-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e7a6c4604585e93953779136a7731c7a458b9e750c59054d42075ed946110ce1 -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00055-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00055-of-00080.bin deleted file mode 100644 index a67c1dd06f74c7a9824981bb82dfa0501f00c9dd..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00055-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:60bf3e0ec9b46dd4ebcdf11567c4339dd75b0904723669018731ea75322a5006 -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00056-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00056-of-00080.bin deleted file mode 100644 index f1db20e6b653293451c157205548e6ad870bc69d..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00056-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f1edbe8f2d6c478d034f5013ec45113b87dbb5a64670ea1bcd6fe1371ce4116a -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00057-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00057-of-00080.bin deleted file mode 100644 index 633fcdf2b9888c471792350358c30cee73255b3c..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00057-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c67bd6a4b62a8e47d586c6c5e95ebb9bcf821ac96feadce4bf59e69a09e60e83 -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00058-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00058-of-00080.bin deleted file mode 100644 index 636cc5ee9a6c34cfeed585c7a38a9f3fd93c3f2f..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00058-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a81af3dca46d4dca1f281b21b73f80cfd811e8d67fb4352fe824c13a4df44be6 -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00059-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00059-of-00080.bin deleted file mode 100644 index 5ee5582d222c20f48864bf5d36ff3d26464458f3..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00059-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:61fcb482bd71c5ca90a1fd91e40532bfb1dae9203029401961502f622a1e5ac4 -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00060-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00060-of-00080.bin deleted file mode 100644 index 4c91063810bfc1c5b1959bb83d347f50deb40209..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00060-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:36874b741aa053b51ba40aa28f3ed950910063dad6e6b304ede3ee3d034852d5 -size 6900439728 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00061-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00061-of-00080.bin deleted file mode 100644 index 6b02a1172da4e86bdfa57f03cd340dc90fd17550..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00061-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b381ad05fbc9f0cd35d5f983f488c3a364e9f8f357c6bb9c9d819eb1f86536e6 -size 9994004482 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00062-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00062-of-00080.bin deleted file mode 100644 index 434b1af9533b1f017592b28f5db05b7b80442423..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00062-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2eef5019f2ae9107942a981ee7cd2184697898175e1828c0cdfdeeca2a94e309 -size 9795149979 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00063-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00063-of-00080.bin deleted file mode 100644 index 194b19469142612b2303d41128e6b53a03f3e22c..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00063-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4f8e1d46e5d88a8f990e0230fad05f57bc83c379947ae7a15115148b7fa66946 -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00064-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00064-of-00080.bin deleted file mode 100644 index 87badb56c2279966c1a22f9dd163fc0e9edc72fd..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00064-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a95071602f83e3dbb9a213e53ae8402d71adac15996ca0f766eb89c960e5ed5e -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00065-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00065-of-00080.bin deleted file mode 100644 index b0b44840740b55251cf4b90bf218ea66640b2acd..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00065-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:623ace5d00ad0dace0eea831ce76b4c0ae718e9ae4c088d0b60dc5ca41efecd5 -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00066-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00066-of-00080.bin deleted file mode 100644 index 2677d74e9afc4e169d1ea7d5689304e109db0829..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00066-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:80723b88cf97ebcaa3dfac548fc2cc1c4f7d756f0d0801a8fd8bd725e6d8026a -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00067-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00067-of-00080.bin deleted file mode 100644 index 9c651722a1e09554a2950de68aef47d2eea7c91a..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00067-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:50a1154cfe97b3bce623ee748ae736f15f377fde975f6e836466a7fd0133ae37 -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00068-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00068-of-00080.bin deleted file mode 100644 index 10483c40096071fda30e07352ed1dc998ed4725d..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00068-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:eb9c188afea9372d5811a5957e6b4cb56ab7824638c080994a620bf7519f6dc2 -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00069-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00069-of-00080.bin deleted file mode 100644 index fe6e602bd3a91b3f7ae7f2c413777d50530ae878..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00069-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2430ce37dfb395d3acb0d602e0e127d2d03bea8223226605188192bd3302bff9 -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00070-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00070-of-00080.bin deleted file mode 100644 index 62a4b1afe820fd2933a4577c22b7608abb8f0021..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00070-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:11e6cbf631ef9383a927fe1badd67abb43516cfc24fe6b1a5937a3af77e9d0a2 -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00071-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00071-of-00080.bin deleted file mode 100644 index ee785dbb99cb18a7087ee454c30932e806d3a907..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00071-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:59afe1ec76db261f23b1c3b2824639a97e87f6734bc543498ba8017136976675 -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00072-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00072-of-00080.bin deleted file mode 100644 index 533dbe4fb78663f039a0b5bdff8a02ecbc1d06df..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00072-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e4f34607a9f94bb5b8db7d0f72baf2d05a26cabc3ffa5a486fc8bfb4349ccf28 -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00073-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00073-of-00080.bin deleted file mode 100644 index 900b477e289a0d5e068521ed548769ebacb4f5ac..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00073-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:79aa829a41eef77682d44b71827f5624b9b7f8884b8aa982fdfadb183e019ad7 -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00074-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00074-of-00080.bin deleted file mode 100644 index 85334c79d2bf7590957b84bcbc46b6b1d26191d9..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00074-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cabc6bc312d4c367c7f8bc707c5b62ca32886ad73753dacbbe4fb0f61823cc19 -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00075-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00075-of-00080.bin deleted file mode 100644 index f9a939d61f2b8e3eda6584f9148c205dff1370b1..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00075-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6e0151ea47d63476d00f21bcafb2e389db5b2c960cf42a3467f43192e83abab6 -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00076-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00076-of-00080.bin deleted file mode 100644 index 3c50e99dc95f67cc0db07fdfa07d4b000cb5caf6..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00076-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:05a987fc43264efafcad344b71d2e8adef2e929a569df69ece820d3a90f47760 -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00077-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00077-of-00080.bin deleted file mode 100644 index 89d1b7037abaa7eebb32d0bd800bfec4fb05ad4f..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00077-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b68805e94edaa9329c9acbd207f21051b6f1445730dca923a9c0da6769ff4eb7 -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00078-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00078-of-00080.bin deleted file mode 100644 index 4f7d8cf0c01bc3b8e0ee557c6f823f78f4923cae..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00078-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d645296b39d26c27b09b592fb2268ec2c3bb8e6cb9252023c7a02ee0ad5b854a -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00079-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00079-of-00080.bin deleted file mode 100644 index e8f6b25b137c1d6ddcaf74cd9efac380f5a7559f..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00079-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:46db90156bd69c1b19cf93ae576b28d7b907aa4cd32dc802e29e95e47fdfa8f2 -size 9840245586 diff --git a/Hunyuan-A52B-Instruct/pytorch_model-00080-of-00080.bin b/Hunyuan-A52B-Instruct/pytorch_model-00080-of-00080.bin deleted file mode 100644 index 71a6e044ed334c08dc57cbed9af22b96df2ca80d..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model-00080-of-00080.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e6118459aa5e6e94223ec4ca9bb6e031cdc6cc2a84c3b859d2d3fcc60b58a622 -size 8540480649 diff --git a/Hunyuan-A52B-Instruct/pytorch_model.bin.index.json b/Hunyuan-A52B-Instruct/pytorch_model.bin.index.json deleted file mode 100644 index a5b1da4105b727ef71b32d6aa2dcce92d1e9496d..0000000000000000000000000000000000000000 --- a/Hunyuan-A52B-Instruct/pytorch_model.bin.index.json +++ /dev/null @@ -1,7241 +0,0 @@ -{ - "metadata": { - "total_size": 779149623808 - }, - "weight_map": { - "model.embed_tokens.weight": "pytorch_model-00062-of-00080.bin", - "model.layers.0.input_layernorm.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.0.mlp.experts.0.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.0.mlp.experts.0.down_proj.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.0.mlp.experts.0.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.0.mlp.experts.0.gate_proj.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.0.mlp.experts.0.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.0.mlp.experts.0.up_proj.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.0.mlp.experts.1.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.0.mlp.experts.1.down_proj.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.0.mlp.experts.1.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.0.mlp.experts.1.gate_proj.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.0.mlp.experts.1.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.0.mlp.experts.1.up_proj.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.0.mlp.experts.10.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.0.mlp.experts.10.down_proj.weight": "pytorch_model-00003-of-00080.bin", - "model.layers.0.mlp.experts.10.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.0.mlp.experts.10.gate_proj.weight": "pytorch_model-00003-of-00080.bin", - "model.layers.0.mlp.experts.10.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.0.mlp.experts.10.up_proj.weight": "pytorch_model-00003-of-00080.bin", - "model.layers.0.mlp.experts.11.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.0.mlp.experts.11.down_proj.weight": "pytorch_model-00003-of-00080.bin", - "model.layers.0.mlp.experts.11.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.0.mlp.experts.11.gate_proj.weight": "pytorch_model-00003-of-00080.bin", - "model.layers.0.mlp.experts.11.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.0.mlp.experts.11.up_proj.weight": "pytorch_model-00003-of-00080.bin", - "model.layers.0.mlp.experts.12.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.0.mlp.experts.12.down_proj.weight": "pytorch_model-00003-of-00080.bin", - "model.layers.0.mlp.experts.12.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.0.mlp.experts.12.gate_proj.weight": "pytorch_model-00003-of-00080.bin", - "model.layers.0.mlp.experts.12.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.0.mlp.experts.12.up_proj.weight": "pytorch_model-00003-of-00080.bin", - "model.layers.0.mlp.experts.13.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.0.mlp.experts.13.down_proj.weight": "pytorch_model-00003-of-00080.bin", - "model.layers.0.mlp.experts.13.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.0.mlp.experts.13.gate_proj.weight": "pytorch_model-00003-of-00080.bin", - "model.layers.0.mlp.experts.13.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.0.mlp.experts.13.up_proj.weight": "pytorch_model-00003-of-00080.bin", - "model.layers.0.mlp.experts.14.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.0.mlp.experts.14.down_proj.weight": "pytorch_model-00003-of-00080.bin", - "model.layers.0.mlp.experts.14.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.0.mlp.experts.14.gate_proj.weight": "pytorch_model-00003-of-00080.bin", - "model.layers.0.mlp.experts.14.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.0.mlp.experts.14.up_proj.weight": "pytorch_model-00003-of-00080.bin", - "model.layers.0.mlp.experts.15.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.0.mlp.experts.15.down_proj.weight": "pytorch_model-00003-of-00080.bin", - "model.layers.0.mlp.experts.15.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.0.mlp.experts.15.gate_proj.weight": "pytorch_model-00003-of-00080.bin", - "model.layers.0.mlp.experts.15.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.0.mlp.experts.15.up_proj.weight": "pytorch_model-00003-of-00080.bin", - "model.layers.0.mlp.experts.2.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.0.mlp.experts.2.down_proj.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.0.mlp.experts.2.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.0.mlp.experts.2.gate_proj.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.0.mlp.experts.2.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.0.mlp.experts.2.up_proj.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.0.mlp.experts.3.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.0.mlp.experts.3.down_proj.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.0.mlp.experts.3.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.0.mlp.experts.3.gate_proj.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.0.mlp.experts.3.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.0.mlp.experts.3.up_proj.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.0.mlp.experts.4.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.0.mlp.experts.4.down_proj.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.0.mlp.experts.4.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.0.mlp.experts.4.gate_proj.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.0.mlp.experts.4.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.0.mlp.experts.4.up_proj.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.0.mlp.experts.5.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.0.mlp.experts.5.down_proj.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.0.mlp.experts.5.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.0.mlp.experts.5.gate_proj.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.0.mlp.experts.5.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.0.mlp.experts.5.up_proj.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.0.mlp.experts.6.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.0.mlp.experts.6.down_proj.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.0.mlp.experts.6.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.0.mlp.experts.6.gate_proj.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.0.mlp.experts.6.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.0.mlp.experts.6.up_proj.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.0.mlp.experts.7.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.0.mlp.experts.7.down_proj.weight": "pytorch_model-00003-of-00080.bin", - "model.layers.0.mlp.experts.7.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.0.mlp.experts.7.gate_proj.weight": "pytorch_model-00003-of-00080.bin", - "model.layers.0.mlp.experts.7.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.0.mlp.experts.7.up_proj.weight": "pytorch_model-00003-of-00080.bin", - "model.layers.0.mlp.experts.8.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.0.mlp.experts.8.down_proj.weight": "pytorch_model-00003-of-00080.bin", - "model.layers.0.mlp.experts.8.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.0.mlp.experts.8.gate_proj.weight": "pytorch_model-00003-of-00080.bin", - "model.layers.0.mlp.experts.8.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.0.mlp.experts.8.up_proj.weight": "pytorch_model-00003-of-00080.bin", - "model.layers.0.mlp.experts.9.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.0.mlp.experts.9.down_proj.weight": "pytorch_model-00003-of-00080.bin", - "model.layers.0.mlp.experts.9.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.0.mlp.experts.9.gate_proj.weight": "pytorch_model-00003-of-00080.bin", - "model.layers.0.mlp.experts.9.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.0.mlp.experts.9.up_proj.weight": "pytorch_model-00003-of-00080.bin", - "model.layers.0.mlp.gate.wg.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.0.mlp.shared_mlp.down_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.0.mlp.shared_mlp.down_proj.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.0.mlp.shared_mlp.gate_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.0.mlp.shared_mlp.gate_proj.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.0.mlp.shared_mlp.up_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.0.mlp.shared_mlp.up_proj.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.0.post_attention_layernorm.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.0.self_attn.k_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.0.self_attn.k_proj.weight": "pytorch_model-00020-of-00080.bin", - "model.layers.0.self_attn.key_layernorm.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.0.self_attn.o_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.0.self_attn.o_proj.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.0.self_attn.q_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.0.self_attn.q_proj.weight": "pytorch_model-00020-of-00080.bin", - "model.layers.0.self_attn.query_layernorm.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.0.self_attn.v_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.0.self_attn.v_proj.weight": "pytorch_model-00020-of-00080.bin", - "model.layers.1.input_layernorm.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.1.mlp.experts.0.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.1.mlp.experts.0.down_proj.weight": "pytorch_model-00003-of-00080.bin", - "model.layers.1.mlp.experts.0.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.1.mlp.experts.0.gate_proj.weight": "pytorch_model-00003-of-00080.bin", - "model.layers.1.mlp.experts.0.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.1.mlp.experts.0.up_proj.weight": "pytorch_model-00003-of-00080.bin", - "model.layers.1.mlp.experts.1.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.1.mlp.experts.1.down_proj.weight": "pytorch_model-00003-of-00080.bin", - "model.layers.1.mlp.experts.1.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.1.mlp.experts.1.gate_proj.weight": "pytorch_model-00003-of-00080.bin", - "model.layers.1.mlp.experts.1.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.1.mlp.experts.1.up_proj.weight": "pytorch_model-00003-of-00080.bin", - "model.layers.1.mlp.experts.10.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.1.mlp.experts.10.down_proj.weight": "pytorch_model-00004-of-00080.bin", - "model.layers.1.mlp.experts.10.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.1.mlp.experts.10.gate_proj.weight": "pytorch_model-00004-of-00080.bin", - "model.layers.1.mlp.experts.10.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.1.mlp.experts.10.up_proj.weight": "pytorch_model-00004-of-00080.bin", - "model.layers.1.mlp.experts.11.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.1.mlp.experts.11.down_proj.weight": "pytorch_model-00004-of-00080.bin", - "model.layers.1.mlp.experts.11.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.1.mlp.experts.11.gate_proj.weight": "pytorch_model-00004-of-00080.bin", - "model.layers.1.mlp.experts.11.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.1.mlp.experts.11.up_proj.weight": "pytorch_model-00004-of-00080.bin", - "model.layers.1.mlp.experts.12.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.1.mlp.experts.12.down_proj.weight": "pytorch_model-00004-of-00080.bin", - "model.layers.1.mlp.experts.12.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.1.mlp.experts.12.gate_proj.weight": "pytorch_model-00004-of-00080.bin", - "model.layers.1.mlp.experts.12.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.1.mlp.experts.12.up_proj.weight": "pytorch_model-00004-of-00080.bin", - "model.layers.1.mlp.experts.13.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.1.mlp.experts.13.down_proj.weight": "pytorch_model-00004-of-00080.bin", - "model.layers.1.mlp.experts.13.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.1.mlp.experts.13.gate_proj.weight": "pytorch_model-00004-of-00080.bin", - "model.layers.1.mlp.experts.13.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.1.mlp.experts.13.up_proj.weight": "pytorch_model-00004-of-00080.bin", - "model.layers.1.mlp.experts.14.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.1.mlp.experts.14.down_proj.weight": "pytorch_model-00004-of-00080.bin", - "model.layers.1.mlp.experts.14.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.1.mlp.experts.14.gate_proj.weight": "pytorch_model-00004-of-00080.bin", - "model.layers.1.mlp.experts.14.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.1.mlp.experts.14.up_proj.weight": "pytorch_model-00004-of-00080.bin", - "model.layers.1.mlp.experts.15.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.1.mlp.experts.15.down_proj.weight": "pytorch_model-00004-of-00080.bin", - "model.layers.1.mlp.experts.15.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.1.mlp.experts.15.gate_proj.weight": "pytorch_model-00004-of-00080.bin", - "model.layers.1.mlp.experts.15.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.1.mlp.experts.15.up_proj.weight": "pytorch_model-00004-of-00080.bin", - "model.layers.1.mlp.experts.2.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.1.mlp.experts.2.down_proj.weight": "pytorch_model-00003-of-00080.bin", - "model.layers.1.mlp.experts.2.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.1.mlp.experts.2.gate_proj.weight": "pytorch_model-00003-of-00080.bin", - "model.layers.1.mlp.experts.2.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.1.mlp.experts.2.up_proj.weight": "pytorch_model-00003-of-00080.bin", - "model.layers.1.mlp.experts.3.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.1.mlp.experts.3.down_proj.weight": "pytorch_model-00003-of-00080.bin", - "model.layers.1.mlp.experts.3.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.1.mlp.experts.3.gate_proj.weight": "pytorch_model-00003-of-00080.bin", - "model.layers.1.mlp.experts.3.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.1.mlp.experts.3.up_proj.weight": "pytorch_model-00003-of-00080.bin", - "model.layers.1.mlp.experts.4.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.1.mlp.experts.4.down_proj.weight": "pytorch_model-00003-of-00080.bin", - "model.layers.1.mlp.experts.4.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.1.mlp.experts.4.gate_proj.weight": "pytorch_model-00003-of-00080.bin", - "model.layers.1.mlp.experts.4.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.1.mlp.experts.4.up_proj.weight": "pytorch_model-00003-of-00080.bin", - "model.layers.1.mlp.experts.5.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.1.mlp.experts.5.down_proj.weight": "pytorch_model-00004-of-00080.bin", - "model.layers.1.mlp.experts.5.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.1.mlp.experts.5.gate_proj.weight": "pytorch_model-00004-of-00080.bin", - "model.layers.1.mlp.experts.5.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.1.mlp.experts.5.up_proj.weight": "pytorch_model-00004-of-00080.bin", - "model.layers.1.mlp.experts.6.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.1.mlp.experts.6.down_proj.weight": "pytorch_model-00004-of-00080.bin", - "model.layers.1.mlp.experts.6.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.1.mlp.experts.6.gate_proj.weight": "pytorch_model-00004-of-00080.bin", - "model.layers.1.mlp.experts.6.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.1.mlp.experts.6.up_proj.weight": "pytorch_model-00004-of-00080.bin", - "model.layers.1.mlp.experts.7.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.1.mlp.experts.7.down_proj.weight": "pytorch_model-00004-of-00080.bin", - "model.layers.1.mlp.experts.7.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.1.mlp.experts.7.gate_proj.weight": "pytorch_model-00004-of-00080.bin", - "model.layers.1.mlp.experts.7.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.1.mlp.experts.7.up_proj.weight": "pytorch_model-00004-of-00080.bin", - "model.layers.1.mlp.experts.8.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.1.mlp.experts.8.down_proj.weight": "pytorch_model-00004-of-00080.bin", - "model.layers.1.mlp.experts.8.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.1.mlp.experts.8.gate_proj.weight": "pytorch_model-00004-of-00080.bin", - "model.layers.1.mlp.experts.8.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.1.mlp.experts.8.up_proj.weight": "pytorch_model-00004-of-00080.bin", - "model.layers.1.mlp.experts.9.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.1.mlp.experts.9.down_proj.weight": "pytorch_model-00004-of-00080.bin", - "model.layers.1.mlp.experts.9.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.1.mlp.experts.9.gate_proj.weight": "pytorch_model-00004-of-00080.bin", - "model.layers.1.mlp.experts.9.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.1.mlp.experts.9.up_proj.weight": "pytorch_model-00004-of-00080.bin", - "model.layers.1.mlp.gate.wg.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.1.mlp.shared_mlp.down_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.1.mlp.shared_mlp.down_proj.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.1.mlp.shared_mlp.gate_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.1.mlp.shared_mlp.gate_proj.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.1.mlp.shared_mlp.up_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.1.mlp.shared_mlp.up_proj.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.1.post_attention_layernorm.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.1.self_attn.key_layernorm.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.1.self_attn.o_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.1.self_attn.o_proj.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.1.self_attn.q_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.1.self_attn.q_proj.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.1.self_attn.query_layernorm.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.10.input_layernorm.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.10.mlp.experts.0.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.10.mlp.experts.0.down_proj.weight": "pytorch_model-00024-of-00080.bin", - "model.layers.10.mlp.experts.0.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.10.mlp.experts.0.gate_proj.weight": "pytorch_model-00024-of-00080.bin", - "model.layers.10.mlp.experts.0.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.10.mlp.experts.0.up_proj.weight": "pytorch_model-00024-of-00080.bin", - "model.layers.10.mlp.experts.1.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.10.mlp.experts.1.down_proj.weight": "pytorch_model-00024-of-00080.bin", - "model.layers.10.mlp.experts.1.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.10.mlp.experts.1.gate_proj.weight": "pytorch_model-00024-of-00080.bin", - "model.layers.10.mlp.experts.1.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.10.mlp.experts.1.up_proj.weight": "pytorch_model-00024-of-00080.bin", - "model.layers.10.mlp.experts.10.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.10.mlp.experts.10.down_proj.weight": "pytorch_model-00025-of-00080.bin", - "model.layers.10.mlp.experts.10.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.10.mlp.experts.10.gate_proj.weight": "pytorch_model-00025-of-00080.bin", - "model.layers.10.mlp.experts.10.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.10.mlp.experts.10.up_proj.weight": "pytorch_model-00025-of-00080.bin", - "model.layers.10.mlp.experts.11.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.10.mlp.experts.11.down_proj.weight": "pytorch_model-00025-of-00080.bin", - "model.layers.10.mlp.experts.11.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.10.mlp.experts.11.gate_proj.weight": "pytorch_model-00025-of-00080.bin", - "model.layers.10.mlp.experts.11.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.10.mlp.experts.11.up_proj.weight": "pytorch_model-00025-of-00080.bin", - "model.layers.10.mlp.experts.12.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.10.mlp.experts.12.down_proj.weight": "pytorch_model-00025-of-00080.bin", - "model.layers.10.mlp.experts.12.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.10.mlp.experts.12.gate_proj.weight": "pytorch_model-00025-of-00080.bin", - "model.layers.10.mlp.experts.12.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.10.mlp.experts.12.up_proj.weight": "pytorch_model-00025-of-00080.bin", - "model.layers.10.mlp.experts.13.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.10.mlp.experts.13.down_proj.weight": "pytorch_model-00025-of-00080.bin", - "model.layers.10.mlp.experts.13.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.10.mlp.experts.13.gate_proj.weight": "pytorch_model-00025-of-00080.bin", - "model.layers.10.mlp.experts.13.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.10.mlp.experts.13.up_proj.weight": "pytorch_model-00025-of-00080.bin", - "model.layers.10.mlp.experts.14.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.10.mlp.experts.14.down_proj.weight": "pytorch_model-00025-of-00080.bin", - "model.layers.10.mlp.experts.14.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.10.mlp.experts.14.gate_proj.weight": "pytorch_model-00025-of-00080.bin", - "model.layers.10.mlp.experts.14.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.10.mlp.experts.14.up_proj.weight": "pytorch_model-00025-of-00080.bin", - "model.layers.10.mlp.experts.15.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.10.mlp.experts.15.down_proj.weight": "pytorch_model-00025-of-00080.bin", - "model.layers.10.mlp.experts.15.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.10.mlp.experts.15.gate_proj.weight": "pytorch_model-00025-of-00080.bin", - "model.layers.10.mlp.experts.15.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.10.mlp.experts.15.up_proj.weight": "pytorch_model-00025-of-00080.bin", - "model.layers.10.mlp.experts.2.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.10.mlp.experts.2.down_proj.weight": "pytorch_model-00024-of-00080.bin", - "model.layers.10.mlp.experts.2.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.10.mlp.experts.2.gate_proj.weight": "pytorch_model-00024-of-00080.bin", - "model.layers.10.mlp.experts.2.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.10.mlp.experts.2.up_proj.weight": "pytorch_model-00024-of-00080.bin", - "model.layers.10.mlp.experts.3.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.10.mlp.experts.3.down_proj.weight": "pytorch_model-00024-of-00080.bin", - "model.layers.10.mlp.experts.3.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.10.mlp.experts.3.gate_proj.weight": "pytorch_model-00024-of-00080.bin", - "model.layers.10.mlp.experts.3.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.10.mlp.experts.3.up_proj.weight": "pytorch_model-00024-of-00080.bin", - "model.layers.10.mlp.experts.4.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.10.mlp.experts.4.down_proj.weight": "pytorch_model-00024-of-00080.bin", - "model.layers.10.mlp.experts.4.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.10.mlp.experts.4.gate_proj.weight": "pytorch_model-00024-of-00080.bin", - "model.layers.10.mlp.experts.4.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.10.mlp.experts.4.up_proj.weight": "pytorch_model-00024-of-00080.bin", - "model.layers.10.mlp.experts.5.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.10.mlp.experts.5.down_proj.weight": "pytorch_model-00025-of-00080.bin", - "model.layers.10.mlp.experts.5.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.10.mlp.experts.5.gate_proj.weight": "pytorch_model-00025-of-00080.bin", - "model.layers.10.mlp.experts.5.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.10.mlp.experts.5.up_proj.weight": "pytorch_model-00024-of-00080.bin", - "model.layers.10.mlp.experts.6.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.10.mlp.experts.6.down_proj.weight": "pytorch_model-00025-of-00080.bin", - "model.layers.10.mlp.experts.6.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.10.mlp.experts.6.gate_proj.weight": "pytorch_model-00025-of-00080.bin", - "model.layers.10.mlp.experts.6.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.10.mlp.experts.6.up_proj.weight": "pytorch_model-00025-of-00080.bin", - "model.layers.10.mlp.experts.7.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.10.mlp.experts.7.down_proj.weight": "pytorch_model-00025-of-00080.bin", - "model.layers.10.mlp.experts.7.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.10.mlp.experts.7.gate_proj.weight": "pytorch_model-00025-of-00080.bin", - "model.layers.10.mlp.experts.7.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.10.mlp.experts.7.up_proj.weight": "pytorch_model-00025-of-00080.bin", - "model.layers.10.mlp.experts.8.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.10.mlp.experts.8.down_proj.weight": "pytorch_model-00025-of-00080.bin", - "model.layers.10.mlp.experts.8.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.10.mlp.experts.8.gate_proj.weight": "pytorch_model-00025-of-00080.bin", - "model.layers.10.mlp.experts.8.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.10.mlp.experts.8.up_proj.weight": "pytorch_model-00025-of-00080.bin", - "model.layers.10.mlp.experts.9.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.10.mlp.experts.9.down_proj.weight": "pytorch_model-00025-of-00080.bin", - "model.layers.10.mlp.experts.9.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.10.mlp.experts.9.gate_proj.weight": "pytorch_model-00025-of-00080.bin", - "model.layers.10.mlp.experts.9.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.10.mlp.experts.9.up_proj.weight": "pytorch_model-00025-of-00080.bin", - "model.layers.10.mlp.gate.wg.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.10.mlp.shared_mlp.down_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.10.mlp.shared_mlp.down_proj.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.10.mlp.shared_mlp.gate_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.10.mlp.shared_mlp.gate_proj.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.10.mlp.shared_mlp.up_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.10.mlp.shared_mlp.up_proj.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.10.post_attention_layernorm.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.10.self_attn.k_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.10.self_attn.k_proj.weight": "pytorch_model-00040-of-00080.bin", - "model.layers.10.self_attn.key_layernorm.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.10.self_attn.o_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.10.self_attn.o_proj.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.10.self_attn.q_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.10.self_attn.q_proj.weight": "pytorch_model-00040-of-00080.bin", - "model.layers.10.self_attn.query_layernorm.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.10.self_attn.v_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.10.self_attn.v_proj.weight": "pytorch_model-00040-of-00080.bin", - "model.layers.11.input_layernorm.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.11.mlp.experts.0.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.11.mlp.experts.0.down_proj.weight": "pytorch_model-00025-of-00080.bin", - "model.layers.11.mlp.experts.0.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.11.mlp.experts.0.gate_proj.weight": "pytorch_model-00025-of-00080.bin", - "model.layers.11.mlp.experts.0.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.11.mlp.experts.0.up_proj.weight": "pytorch_model-00025-of-00080.bin", - "model.layers.11.mlp.experts.1.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.11.mlp.experts.1.down_proj.weight": "pytorch_model-00025-of-00080.bin", - "model.layers.11.mlp.experts.1.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.11.mlp.experts.1.gate_proj.weight": "pytorch_model-00025-of-00080.bin", - "model.layers.11.mlp.experts.1.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.11.mlp.experts.1.up_proj.weight": "pytorch_model-00025-of-00080.bin", - "model.layers.11.mlp.experts.10.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.11.mlp.experts.10.down_proj.weight": "pytorch_model-00026-of-00080.bin", - "model.layers.11.mlp.experts.10.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.11.mlp.experts.10.gate_proj.weight": "pytorch_model-00026-of-00080.bin", - "model.layers.11.mlp.experts.10.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.11.mlp.experts.10.up_proj.weight": "pytorch_model-00026-of-00080.bin", - "model.layers.11.mlp.experts.11.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.11.mlp.experts.11.down_proj.weight": "pytorch_model-00026-of-00080.bin", - "model.layers.11.mlp.experts.11.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.11.mlp.experts.11.gate_proj.weight": "pytorch_model-00026-of-00080.bin", - "model.layers.11.mlp.experts.11.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.11.mlp.experts.11.up_proj.weight": "pytorch_model-00026-of-00080.bin", - "model.layers.11.mlp.experts.12.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.11.mlp.experts.12.down_proj.weight": "pytorch_model-00026-of-00080.bin", - "model.layers.11.mlp.experts.12.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.11.mlp.experts.12.gate_proj.weight": "pytorch_model-00026-of-00080.bin", - "model.layers.11.mlp.experts.12.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.11.mlp.experts.12.up_proj.weight": "pytorch_model-00026-of-00080.bin", - "model.layers.11.mlp.experts.13.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.11.mlp.experts.13.down_proj.weight": "pytorch_model-00026-of-00080.bin", - "model.layers.11.mlp.experts.13.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.11.mlp.experts.13.gate_proj.weight": "pytorch_model-00026-of-00080.bin", - "model.layers.11.mlp.experts.13.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.11.mlp.experts.13.up_proj.weight": "pytorch_model-00026-of-00080.bin", - "model.layers.11.mlp.experts.14.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.11.mlp.experts.14.down_proj.weight": "pytorch_model-00026-of-00080.bin", - "model.layers.11.mlp.experts.14.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.11.mlp.experts.14.gate_proj.weight": "pytorch_model-00026-of-00080.bin", - "model.layers.11.mlp.experts.14.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.11.mlp.experts.14.up_proj.weight": "pytorch_model-00026-of-00080.bin", - "model.layers.11.mlp.experts.15.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.11.mlp.experts.15.down_proj.weight": "pytorch_model-00026-of-00080.bin", - "model.layers.11.mlp.experts.15.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.11.mlp.experts.15.gate_proj.weight": "pytorch_model-00026-of-00080.bin", - "model.layers.11.mlp.experts.15.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.11.mlp.experts.15.up_proj.weight": "pytorch_model-00026-of-00080.bin", - "model.layers.11.mlp.experts.2.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.11.mlp.experts.2.down_proj.weight": "pytorch_model-00025-of-00080.bin", - "model.layers.11.mlp.experts.2.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.11.mlp.experts.2.gate_proj.weight": "pytorch_model-00025-of-00080.bin", - "model.layers.11.mlp.experts.2.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.11.mlp.experts.2.up_proj.weight": "pytorch_model-00025-of-00080.bin", - "model.layers.11.mlp.experts.3.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.11.mlp.experts.3.down_proj.weight": "pytorch_model-00026-of-00080.bin", - "model.layers.11.mlp.experts.3.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.11.mlp.experts.3.gate_proj.weight": "pytorch_model-00026-of-00080.bin", - "model.layers.11.mlp.experts.3.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.11.mlp.experts.3.up_proj.weight": "pytorch_model-00025-of-00080.bin", - "model.layers.11.mlp.experts.4.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.11.mlp.experts.4.down_proj.weight": "pytorch_model-00026-of-00080.bin", - "model.layers.11.mlp.experts.4.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.11.mlp.experts.4.gate_proj.weight": "pytorch_model-00026-of-00080.bin", - "model.layers.11.mlp.experts.4.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.11.mlp.experts.4.up_proj.weight": "pytorch_model-00026-of-00080.bin", - "model.layers.11.mlp.experts.5.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.11.mlp.experts.5.down_proj.weight": "pytorch_model-00026-of-00080.bin", - "model.layers.11.mlp.experts.5.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.11.mlp.experts.5.gate_proj.weight": "pytorch_model-00026-of-00080.bin", - "model.layers.11.mlp.experts.5.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.11.mlp.experts.5.up_proj.weight": "pytorch_model-00026-of-00080.bin", - "model.layers.11.mlp.experts.6.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.11.mlp.experts.6.down_proj.weight": "pytorch_model-00026-of-00080.bin", - "model.layers.11.mlp.experts.6.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.11.mlp.experts.6.gate_proj.weight": "pytorch_model-00026-of-00080.bin", - "model.layers.11.mlp.experts.6.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.11.mlp.experts.6.up_proj.weight": "pytorch_model-00026-of-00080.bin", - "model.layers.11.mlp.experts.7.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.11.mlp.experts.7.down_proj.weight": "pytorch_model-00026-of-00080.bin", - "model.layers.11.mlp.experts.7.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.11.mlp.experts.7.gate_proj.weight": "pytorch_model-00026-of-00080.bin", - "model.layers.11.mlp.experts.7.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.11.mlp.experts.7.up_proj.weight": "pytorch_model-00026-of-00080.bin", - "model.layers.11.mlp.experts.8.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.11.mlp.experts.8.down_proj.weight": "pytorch_model-00026-of-00080.bin", - "model.layers.11.mlp.experts.8.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.11.mlp.experts.8.gate_proj.weight": "pytorch_model-00026-of-00080.bin", - "model.layers.11.mlp.experts.8.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.11.mlp.experts.8.up_proj.weight": "pytorch_model-00026-of-00080.bin", - "model.layers.11.mlp.experts.9.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.11.mlp.experts.9.down_proj.weight": "pytorch_model-00026-of-00080.bin", - "model.layers.11.mlp.experts.9.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.11.mlp.experts.9.gate_proj.weight": "pytorch_model-00026-of-00080.bin", - "model.layers.11.mlp.experts.9.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.11.mlp.experts.9.up_proj.weight": "pytorch_model-00026-of-00080.bin", - "model.layers.11.mlp.gate.wg.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.11.mlp.shared_mlp.down_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.11.mlp.shared_mlp.down_proj.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.11.mlp.shared_mlp.gate_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.11.mlp.shared_mlp.gate_proj.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.11.mlp.shared_mlp.up_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.11.mlp.shared_mlp.up_proj.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.11.post_attention_layernorm.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.11.self_attn.key_layernorm.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.11.self_attn.o_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.11.self_attn.o_proj.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.11.self_attn.q_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.11.self_attn.q_proj.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.11.self_attn.query_layernorm.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.12.input_layernorm.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.12.mlp.experts.0.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.12.mlp.experts.0.down_proj.weight": "pytorch_model-00044-of-00080.bin", - "model.layers.12.mlp.experts.0.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.12.mlp.experts.0.gate_proj.weight": "pytorch_model-00044-of-00080.bin", - "model.layers.12.mlp.experts.0.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.12.mlp.experts.0.up_proj.weight": "pytorch_model-00044-of-00080.bin", - "model.layers.12.mlp.experts.1.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.12.mlp.experts.1.down_proj.weight": "pytorch_model-00044-of-00080.bin", - "model.layers.12.mlp.experts.1.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.12.mlp.experts.1.gate_proj.weight": "pytorch_model-00044-of-00080.bin", - "model.layers.12.mlp.experts.1.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.12.mlp.experts.1.up_proj.weight": "pytorch_model-00044-of-00080.bin", - "model.layers.12.mlp.experts.10.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.12.mlp.experts.10.down_proj.weight": "pytorch_model-00045-of-00080.bin", - "model.layers.12.mlp.experts.10.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.12.mlp.experts.10.gate_proj.weight": "pytorch_model-00045-of-00080.bin", - "model.layers.12.mlp.experts.10.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.12.mlp.experts.10.up_proj.weight": "pytorch_model-00045-of-00080.bin", - "model.layers.12.mlp.experts.11.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.12.mlp.experts.11.down_proj.weight": "pytorch_model-00045-of-00080.bin", - "model.layers.12.mlp.experts.11.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.12.mlp.experts.11.gate_proj.weight": "pytorch_model-00045-of-00080.bin", - "model.layers.12.mlp.experts.11.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.12.mlp.experts.11.up_proj.weight": "pytorch_model-00045-of-00080.bin", - "model.layers.12.mlp.experts.12.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.12.mlp.experts.12.down_proj.weight": "pytorch_model-00045-of-00080.bin", - "model.layers.12.mlp.experts.12.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.12.mlp.experts.12.gate_proj.weight": "pytorch_model-00045-of-00080.bin", - "model.layers.12.mlp.experts.12.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.12.mlp.experts.12.up_proj.weight": "pytorch_model-00045-of-00080.bin", - "model.layers.12.mlp.experts.13.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.12.mlp.experts.13.down_proj.weight": "pytorch_model-00045-of-00080.bin", - "model.layers.12.mlp.experts.13.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.12.mlp.experts.13.gate_proj.weight": "pytorch_model-00045-of-00080.bin", - "model.layers.12.mlp.experts.13.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.12.mlp.experts.13.up_proj.weight": "pytorch_model-00045-of-00080.bin", - "model.layers.12.mlp.experts.14.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.12.mlp.experts.14.down_proj.weight": "pytorch_model-00045-of-00080.bin", - "model.layers.12.mlp.experts.14.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.12.mlp.experts.14.gate_proj.weight": "pytorch_model-00045-of-00080.bin", - "model.layers.12.mlp.experts.14.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.12.mlp.experts.14.up_proj.weight": "pytorch_model-00045-of-00080.bin", - "model.layers.12.mlp.experts.15.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.12.mlp.experts.15.down_proj.weight": "pytorch_model-00045-of-00080.bin", - "model.layers.12.mlp.experts.15.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.12.mlp.experts.15.gate_proj.weight": "pytorch_model-00045-of-00080.bin", - "model.layers.12.mlp.experts.15.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.12.mlp.experts.15.up_proj.weight": "pytorch_model-00045-of-00080.bin", - "model.layers.12.mlp.experts.2.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.12.mlp.experts.2.down_proj.weight": "pytorch_model-00044-of-00080.bin", - "model.layers.12.mlp.experts.2.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.12.mlp.experts.2.gate_proj.weight": "pytorch_model-00044-of-00080.bin", - "model.layers.12.mlp.experts.2.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.12.mlp.experts.2.up_proj.weight": "pytorch_model-00044-of-00080.bin", - "model.layers.12.mlp.experts.3.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.12.mlp.experts.3.down_proj.weight": "pytorch_model-00044-of-00080.bin", - "model.layers.12.mlp.experts.3.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.12.mlp.experts.3.gate_proj.weight": "pytorch_model-00044-of-00080.bin", - "model.layers.12.mlp.experts.3.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.12.mlp.experts.3.up_proj.weight": "pytorch_model-00044-of-00080.bin", - "model.layers.12.mlp.experts.4.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.12.mlp.experts.4.down_proj.weight": "pytorch_model-00044-of-00080.bin", - "model.layers.12.mlp.experts.4.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.12.mlp.experts.4.gate_proj.weight": "pytorch_model-00044-of-00080.bin", - "model.layers.12.mlp.experts.4.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.12.mlp.experts.4.up_proj.weight": "pytorch_model-00044-of-00080.bin", - "model.layers.12.mlp.experts.5.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.12.mlp.experts.5.down_proj.weight": "pytorch_model-00045-of-00080.bin", - "model.layers.12.mlp.experts.5.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.12.mlp.experts.5.gate_proj.weight": "pytorch_model-00045-of-00080.bin", - "model.layers.12.mlp.experts.5.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.12.mlp.experts.5.up_proj.weight": "pytorch_model-00044-of-00080.bin", - "model.layers.12.mlp.experts.6.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.12.mlp.experts.6.down_proj.weight": "pytorch_model-00045-of-00080.bin", - "model.layers.12.mlp.experts.6.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.12.mlp.experts.6.gate_proj.weight": "pytorch_model-00045-of-00080.bin", - "model.layers.12.mlp.experts.6.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.12.mlp.experts.6.up_proj.weight": "pytorch_model-00045-of-00080.bin", - "model.layers.12.mlp.experts.7.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.12.mlp.experts.7.down_proj.weight": "pytorch_model-00045-of-00080.bin", - "model.layers.12.mlp.experts.7.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.12.mlp.experts.7.gate_proj.weight": "pytorch_model-00045-of-00080.bin", - "model.layers.12.mlp.experts.7.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.12.mlp.experts.7.up_proj.weight": "pytorch_model-00045-of-00080.bin", - "model.layers.12.mlp.experts.8.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.12.mlp.experts.8.down_proj.weight": "pytorch_model-00045-of-00080.bin", - "model.layers.12.mlp.experts.8.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.12.mlp.experts.8.gate_proj.weight": "pytorch_model-00045-of-00080.bin", - "model.layers.12.mlp.experts.8.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.12.mlp.experts.8.up_proj.weight": "pytorch_model-00045-of-00080.bin", - "model.layers.12.mlp.experts.9.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.12.mlp.experts.9.down_proj.weight": "pytorch_model-00045-of-00080.bin", - "model.layers.12.mlp.experts.9.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.12.mlp.experts.9.gate_proj.weight": "pytorch_model-00045-of-00080.bin", - "model.layers.12.mlp.experts.9.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.12.mlp.experts.9.up_proj.weight": "pytorch_model-00045-of-00080.bin", - "model.layers.12.mlp.gate.wg.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.12.mlp.shared_mlp.down_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.12.mlp.shared_mlp.down_proj.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.12.mlp.shared_mlp.gate_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.12.mlp.shared_mlp.gate_proj.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.12.mlp.shared_mlp.up_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.12.mlp.shared_mlp.up_proj.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.12.post_attention_layernorm.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.12.self_attn.k_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.12.self_attn.k_proj.weight": "pytorch_model-00060-of-00080.bin", - "model.layers.12.self_attn.key_layernorm.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.12.self_attn.o_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.12.self_attn.o_proj.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.12.self_attn.q_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.12.self_attn.q_proj.weight": "pytorch_model-00060-of-00080.bin", - "model.layers.12.self_attn.query_layernorm.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.12.self_attn.v_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.12.self_attn.v_proj.weight": "pytorch_model-00060-of-00080.bin", - "model.layers.13.input_layernorm.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.13.mlp.experts.0.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.13.mlp.experts.0.down_proj.weight": "pytorch_model-00045-of-00080.bin", - "model.layers.13.mlp.experts.0.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.13.mlp.experts.0.gate_proj.weight": "pytorch_model-00045-of-00080.bin", - "model.layers.13.mlp.experts.0.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.13.mlp.experts.0.up_proj.weight": "pytorch_model-00045-of-00080.bin", - "model.layers.13.mlp.experts.1.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.13.mlp.experts.1.down_proj.weight": "pytorch_model-00045-of-00080.bin", - "model.layers.13.mlp.experts.1.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.13.mlp.experts.1.gate_proj.weight": "pytorch_model-00045-of-00080.bin", - "model.layers.13.mlp.experts.1.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.13.mlp.experts.1.up_proj.weight": "pytorch_model-00045-of-00080.bin", - "model.layers.13.mlp.experts.10.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.13.mlp.experts.10.down_proj.weight": "pytorch_model-00046-of-00080.bin", - "model.layers.13.mlp.experts.10.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.13.mlp.experts.10.gate_proj.weight": "pytorch_model-00046-of-00080.bin", - "model.layers.13.mlp.experts.10.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.13.mlp.experts.10.up_proj.weight": "pytorch_model-00046-of-00080.bin", - "model.layers.13.mlp.experts.11.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.13.mlp.experts.11.down_proj.weight": "pytorch_model-00046-of-00080.bin", - "model.layers.13.mlp.experts.11.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.13.mlp.experts.11.gate_proj.weight": "pytorch_model-00046-of-00080.bin", - "model.layers.13.mlp.experts.11.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.13.mlp.experts.11.up_proj.weight": "pytorch_model-00046-of-00080.bin", - "model.layers.13.mlp.experts.12.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.13.mlp.experts.12.down_proj.weight": "pytorch_model-00046-of-00080.bin", - "model.layers.13.mlp.experts.12.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.13.mlp.experts.12.gate_proj.weight": "pytorch_model-00046-of-00080.bin", - "model.layers.13.mlp.experts.12.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.13.mlp.experts.12.up_proj.weight": "pytorch_model-00046-of-00080.bin", - "model.layers.13.mlp.experts.13.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.13.mlp.experts.13.down_proj.weight": "pytorch_model-00046-of-00080.bin", - "model.layers.13.mlp.experts.13.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.13.mlp.experts.13.gate_proj.weight": "pytorch_model-00046-of-00080.bin", - "model.layers.13.mlp.experts.13.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.13.mlp.experts.13.up_proj.weight": "pytorch_model-00046-of-00080.bin", - "model.layers.13.mlp.experts.14.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.13.mlp.experts.14.down_proj.weight": "pytorch_model-00046-of-00080.bin", - "model.layers.13.mlp.experts.14.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.13.mlp.experts.14.gate_proj.weight": "pytorch_model-00046-of-00080.bin", - "model.layers.13.mlp.experts.14.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.13.mlp.experts.14.up_proj.weight": "pytorch_model-00046-of-00080.bin", - "model.layers.13.mlp.experts.15.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.13.mlp.experts.15.down_proj.weight": "pytorch_model-00046-of-00080.bin", - "model.layers.13.mlp.experts.15.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.13.mlp.experts.15.gate_proj.weight": "pytorch_model-00046-of-00080.bin", - "model.layers.13.mlp.experts.15.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.13.mlp.experts.15.up_proj.weight": "pytorch_model-00046-of-00080.bin", - "model.layers.13.mlp.experts.2.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.13.mlp.experts.2.down_proj.weight": "pytorch_model-00045-of-00080.bin", - "model.layers.13.mlp.experts.2.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.13.mlp.experts.2.gate_proj.weight": "pytorch_model-00045-of-00080.bin", - "model.layers.13.mlp.experts.2.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.13.mlp.experts.2.up_proj.weight": "pytorch_model-00045-of-00080.bin", - "model.layers.13.mlp.experts.3.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.13.mlp.experts.3.down_proj.weight": "pytorch_model-00046-of-00080.bin", - "model.layers.13.mlp.experts.3.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.13.mlp.experts.3.gate_proj.weight": "pytorch_model-00046-of-00080.bin", - "model.layers.13.mlp.experts.3.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.13.mlp.experts.3.up_proj.weight": "pytorch_model-00045-of-00080.bin", - "model.layers.13.mlp.experts.4.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.13.mlp.experts.4.down_proj.weight": "pytorch_model-00046-of-00080.bin", - "model.layers.13.mlp.experts.4.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.13.mlp.experts.4.gate_proj.weight": "pytorch_model-00046-of-00080.bin", - "model.layers.13.mlp.experts.4.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.13.mlp.experts.4.up_proj.weight": "pytorch_model-00046-of-00080.bin", - "model.layers.13.mlp.experts.5.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.13.mlp.experts.5.down_proj.weight": "pytorch_model-00046-of-00080.bin", - "model.layers.13.mlp.experts.5.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.13.mlp.experts.5.gate_proj.weight": "pytorch_model-00046-of-00080.bin", - "model.layers.13.mlp.experts.5.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.13.mlp.experts.5.up_proj.weight": "pytorch_model-00046-of-00080.bin", - "model.layers.13.mlp.experts.6.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.13.mlp.experts.6.down_proj.weight": "pytorch_model-00046-of-00080.bin", - "model.layers.13.mlp.experts.6.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.13.mlp.experts.6.gate_proj.weight": "pytorch_model-00046-of-00080.bin", - "model.layers.13.mlp.experts.6.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.13.mlp.experts.6.up_proj.weight": "pytorch_model-00046-of-00080.bin", - "model.layers.13.mlp.experts.7.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.13.mlp.experts.7.down_proj.weight": "pytorch_model-00046-of-00080.bin", - "model.layers.13.mlp.experts.7.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.13.mlp.experts.7.gate_proj.weight": "pytorch_model-00046-of-00080.bin", - "model.layers.13.mlp.experts.7.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.13.mlp.experts.7.up_proj.weight": "pytorch_model-00046-of-00080.bin", - "model.layers.13.mlp.experts.8.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.13.mlp.experts.8.down_proj.weight": "pytorch_model-00046-of-00080.bin", - "model.layers.13.mlp.experts.8.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.13.mlp.experts.8.gate_proj.weight": "pytorch_model-00046-of-00080.bin", - "model.layers.13.mlp.experts.8.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.13.mlp.experts.8.up_proj.weight": "pytorch_model-00046-of-00080.bin", - "model.layers.13.mlp.experts.9.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.13.mlp.experts.9.down_proj.weight": "pytorch_model-00046-of-00080.bin", - "model.layers.13.mlp.experts.9.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.13.mlp.experts.9.gate_proj.weight": "pytorch_model-00046-of-00080.bin", - "model.layers.13.mlp.experts.9.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.13.mlp.experts.9.up_proj.weight": "pytorch_model-00046-of-00080.bin", - "model.layers.13.mlp.gate.wg.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.13.mlp.shared_mlp.down_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.13.mlp.shared_mlp.down_proj.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.13.mlp.shared_mlp.gate_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.13.mlp.shared_mlp.gate_proj.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.13.mlp.shared_mlp.up_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.13.mlp.shared_mlp.up_proj.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.13.post_attention_layernorm.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.13.self_attn.key_layernorm.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.13.self_attn.o_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.13.self_attn.o_proj.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.13.self_attn.q_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.13.self_attn.q_proj.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.13.self_attn.query_layernorm.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.14.input_layernorm.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.14.mlp.experts.0.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.14.mlp.experts.0.down_proj.weight": "pytorch_model-00064-of-00080.bin", - "model.layers.14.mlp.experts.0.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.14.mlp.experts.0.gate_proj.weight": "pytorch_model-00064-of-00080.bin", - "model.layers.14.mlp.experts.0.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.14.mlp.experts.0.up_proj.weight": "pytorch_model-00064-of-00080.bin", - "model.layers.14.mlp.experts.1.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.14.mlp.experts.1.down_proj.weight": "pytorch_model-00064-of-00080.bin", - "model.layers.14.mlp.experts.1.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.14.mlp.experts.1.gate_proj.weight": "pytorch_model-00064-of-00080.bin", - "model.layers.14.mlp.experts.1.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.14.mlp.experts.1.up_proj.weight": "pytorch_model-00064-of-00080.bin", - "model.layers.14.mlp.experts.10.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.14.mlp.experts.10.down_proj.weight": "pytorch_model-00065-of-00080.bin", - "model.layers.14.mlp.experts.10.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.14.mlp.experts.10.gate_proj.weight": "pytorch_model-00065-of-00080.bin", - "model.layers.14.mlp.experts.10.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.14.mlp.experts.10.up_proj.weight": "pytorch_model-00065-of-00080.bin", - "model.layers.14.mlp.experts.11.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.14.mlp.experts.11.down_proj.weight": "pytorch_model-00065-of-00080.bin", - "model.layers.14.mlp.experts.11.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.14.mlp.experts.11.gate_proj.weight": "pytorch_model-00065-of-00080.bin", - "model.layers.14.mlp.experts.11.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.14.mlp.experts.11.up_proj.weight": "pytorch_model-00065-of-00080.bin", - "model.layers.14.mlp.experts.12.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.14.mlp.experts.12.down_proj.weight": "pytorch_model-00065-of-00080.bin", - "model.layers.14.mlp.experts.12.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.14.mlp.experts.12.gate_proj.weight": "pytorch_model-00065-of-00080.bin", - "model.layers.14.mlp.experts.12.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.14.mlp.experts.12.up_proj.weight": "pytorch_model-00065-of-00080.bin", - "model.layers.14.mlp.experts.13.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.14.mlp.experts.13.down_proj.weight": "pytorch_model-00065-of-00080.bin", - "model.layers.14.mlp.experts.13.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.14.mlp.experts.13.gate_proj.weight": "pytorch_model-00065-of-00080.bin", - "model.layers.14.mlp.experts.13.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.14.mlp.experts.13.up_proj.weight": "pytorch_model-00065-of-00080.bin", - "model.layers.14.mlp.experts.14.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.14.mlp.experts.14.down_proj.weight": "pytorch_model-00065-of-00080.bin", - "model.layers.14.mlp.experts.14.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.14.mlp.experts.14.gate_proj.weight": "pytorch_model-00065-of-00080.bin", - "model.layers.14.mlp.experts.14.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.14.mlp.experts.14.up_proj.weight": "pytorch_model-00065-of-00080.bin", - "model.layers.14.mlp.experts.15.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.14.mlp.experts.15.down_proj.weight": "pytorch_model-00065-of-00080.bin", - "model.layers.14.mlp.experts.15.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.14.mlp.experts.15.gate_proj.weight": "pytorch_model-00065-of-00080.bin", - "model.layers.14.mlp.experts.15.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.14.mlp.experts.15.up_proj.weight": "pytorch_model-00065-of-00080.bin", - "model.layers.14.mlp.experts.2.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.14.mlp.experts.2.down_proj.weight": "pytorch_model-00064-of-00080.bin", - "model.layers.14.mlp.experts.2.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.14.mlp.experts.2.gate_proj.weight": "pytorch_model-00064-of-00080.bin", - "model.layers.14.mlp.experts.2.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.14.mlp.experts.2.up_proj.weight": "pytorch_model-00064-of-00080.bin", - "model.layers.14.mlp.experts.3.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.14.mlp.experts.3.down_proj.weight": "pytorch_model-00065-of-00080.bin", - "model.layers.14.mlp.experts.3.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.14.mlp.experts.3.gate_proj.weight": "pytorch_model-00065-of-00080.bin", - "model.layers.14.mlp.experts.3.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.14.mlp.experts.3.up_proj.weight": "pytorch_model-00065-of-00080.bin", - "model.layers.14.mlp.experts.4.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.14.mlp.experts.4.down_proj.weight": "pytorch_model-00065-of-00080.bin", - "model.layers.14.mlp.experts.4.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.14.mlp.experts.4.gate_proj.weight": "pytorch_model-00065-of-00080.bin", - "model.layers.14.mlp.experts.4.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.14.mlp.experts.4.up_proj.weight": "pytorch_model-00065-of-00080.bin", - "model.layers.14.mlp.experts.5.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.14.mlp.experts.5.down_proj.weight": "pytorch_model-00065-of-00080.bin", - "model.layers.14.mlp.experts.5.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.14.mlp.experts.5.gate_proj.weight": "pytorch_model-00065-of-00080.bin", - "model.layers.14.mlp.experts.5.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.14.mlp.experts.5.up_proj.weight": "pytorch_model-00065-of-00080.bin", - "model.layers.14.mlp.experts.6.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.14.mlp.experts.6.down_proj.weight": "pytorch_model-00065-of-00080.bin", - "model.layers.14.mlp.experts.6.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.14.mlp.experts.6.gate_proj.weight": "pytorch_model-00065-of-00080.bin", - "model.layers.14.mlp.experts.6.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.14.mlp.experts.6.up_proj.weight": "pytorch_model-00065-of-00080.bin", - "model.layers.14.mlp.experts.7.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.14.mlp.experts.7.down_proj.weight": "pytorch_model-00065-of-00080.bin", - "model.layers.14.mlp.experts.7.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.14.mlp.experts.7.gate_proj.weight": "pytorch_model-00065-of-00080.bin", - "model.layers.14.mlp.experts.7.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.14.mlp.experts.7.up_proj.weight": "pytorch_model-00065-of-00080.bin", - "model.layers.14.mlp.experts.8.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.14.mlp.experts.8.down_proj.weight": "pytorch_model-00065-of-00080.bin", - "model.layers.14.mlp.experts.8.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.14.mlp.experts.8.gate_proj.weight": "pytorch_model-00065-of-00080.bin", - "model.layers.14.mlp.experts.8.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.14.mlp.experts.8.up_proj.weight": "pytorch_model-00065-of-00080.bin", - "model.layers.14.mlp.experts.9.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.14.mlp.experts.9.down_proj.weight": "pytorch_model-00065-of-00080.bin", - "model.layers.14.mlp.experts.9.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.14.mlp.experts.9.gate_proj.weight": "pytorch_model-00065-of-00080.bin", - "model.layers.14.mlp.experts.9.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.14.mlp.experts.9.up_proj.weight": "pytorch_model-00065-of-00080.bin", - "model.layers.14.mlp.gate.wg.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.14.mlp.shared_mlp.down_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.14.mlp.shared_mlp.down_proj.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.14.mlp.shared_mlp.gate_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.14.mlp.shared_mlp.gate_proj.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.14.mlp.shared_mlp.up_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.14.mlp.shared_mlp.up_proj.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.14.post_attention_layernorm.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.14.self_attn.k_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.14.self_attn.k_proj.weight": "pytorch_model-00080-of-00080.bin", - "model.layers.14.self_attn.key_layernorm.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.14.self_attn.o_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.14.self_attn.o_proj.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.14.self_attn.q_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.14.self_attn.q_proj.weight": "pytorch_model-00080-of-00080.bin", - "model.layers.14.self_attn.query_layernorm.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.14.self_attn.v_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.14.self_attn.v_proj.weight": "pytorch_model-00080-of-00080.bin", - "model.layers.15.input_layernorm.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.15.mlp.experts.0.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.15.mlp.experts.0.down_proj.weight": "pytorch_model-00065-of-00080.bin", - "model.layers.15.mlp.experts.0.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.15.mlp.experts.0.gate_proj.weight": "pytorch_model-00065-of-00080.bin", - "model.layers.15.mlp.experts.0.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.15.mlp.experts.0.up_proj.weight": "pytorch_model-00065-of-00080.bin", - "model.layers.15.mlp.experts.1.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.15.mlp.experts.1.down_proj.weight": "pytorch_model-00066-of-00080.bin", - "model.layers.15.mlp.experts.1.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.15.mlp.experts.1.gate_proj.weight": "pytorch_model-00066-of-00080.bin", - "model.layers.15.mlp.experts.1.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.15.mlp.experts.1.up_proj.weight": "pytorch_model-00066-of-00080.bin", - "model.layers.15.mlp.experts.10.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.15.mlp.experts.10.down_proj.weight": "pytorch_model-00066-of-00080.bin", - "model.layers.15.mlp.experts.10.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.15.mlp.experts.10.gate_proj.weight": "pytorch_model-00066-of-00080.bin", - "model.layers.15.mlp.experts.10.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.15.mlp.experts.10.up_proj.weight": "pytorch_model-00066-of-00080.bin", - "model.layers.15.mlp.experts.11.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.15.mlp.experts.11.down_proj.weight": "pytorch_model-00066-of-00080.bin", - "model.layers.15.mlp.experts.11.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.15.mlp.experts.11.gate_proj.weight": "pytorch_model-00066-of-00080.bin", - "model.layers.15.mlp.experts.11.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.15.mlp.experts.11.up_proj.weight": "pytorch_model-00066-of-00080.bin", - "model.layers.15.mlp.experts.12.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.15.mlp.experts.12.down_proj.weight": "pytorch_model-00066-of-00080.bin", - "model.layers.15.mlp.experts.12.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.15.mlp.experts.12.gate_proj.weight": "pytorch_model-00066-of-00080.bin", - "model.layers.15.mlp.experts.12.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.15.mlp.experts.12.up_proj.weight": "pytorch_model-00066-of-00080.bin", - "model.layers.15.mlp.experts.13.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.15.mlp.experts.13.down_proj.weight": "pytorch_model-00066-of-00080.bin", - "model.layers.15.mlp.experts.13.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.15.mlp.experts.13.gate_proj.weight": "pytorch_model-00066-of-00080.bin", - "model.layers.15.mlp.experts.13.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.15.mlp.experts.13.up_proj.weight": "pytorch_model-00066-of-00080.bin", - "model.layers.15.mlp.experts.14.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.15.mlp.experts.14.down_proj.weight": "pytorch_model-00066-of-00080.bin", - "model.layers.15.mlp.experts.14.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.15.mlp.experts.14.gate_proj.weight": "pytorch_model-00066-of-00080.bin", - "model.layers.15.mlp.experts.14.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.15.mlp.experts.14.up_proj.weight": "pytorch_model-00066-of-00080.bin", - "model.layers.15.mlp.experts.15.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.15.mlp.experts.15.down_proj.weight": "pytorch_model-00067-of-00080.bin", - "model.layers.15.mlp.experts.15.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.15.mlp.experts.15.gate_proj.weight": "pytorch_model-00067-of-00080.bin", - "model.layers.15.mlp.experts.15.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.15.mlp.experts.15.up_proj.weight": "pytorch_model-00067-of-00080.bin", - "model.layers.15.mlp.experts.2.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.15.mlp.experts.2.down_proj.weight": "pytorch_model-00066-of-00080.bin", - "model.layers.15.mlp.experts.2.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.15.mlp.experts.2.gate_proj.weight": "pytorch_model-00066-of-00080.bin", - "model.layers.15.mlp.experts.2.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.15.mlp.experts.2.up_proj.weight": "pytorch_model-00066-of-00080.bin", - "model.layers.15.mlp.experts.3.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.15.mlp.experts.3.down_proj.weight": "pytorch_model-00066-of-00080.bin", - "model.layers.15.mlp.experts.3.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.15.mlp.experts.3.gate_proj.weight": "pytorch_model-00066-of-00080.bin", - "model.layers.15.mlp.experts.3.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.15.mlp.experts.3.up_proj.weight": "pytorch_model-00066-of-00080.bin", - "model.layers.15.mlp.experts.4.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.15.mlp.experts.4.down_proj.weight": "pytorch_model-00066-of-00080.bin", - "model.layers.15.mlp.experts.4.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.15.mlp.experts.4.gate_proj.weight": "pytorch_model-00066-of-00080.bin", - "model.layers.15.mlp.experts.4.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.15.mlp.experts.4.up_proj.weight": "pytorch_model-00066-of-00080.bin", - "model.layers.15.mlp.experts.5.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.15.mlp.experts.5.down_proj.weight": "pytorch_model-00066-of-00080.bin", - "model.layers.15.mlp.experts.5.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.15.mlp.experts.5.gate_proj.weight": "pytorch_model-00066-of-00080.bin", - "model.layers.15.mlp.experts.5.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.15.mlp.experts.5.up_proj.weight": "pytorch_model-00066-of-00080.bin", - "model.layers.15.mlp.experts.6.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.15.mlp.experts.6.down_proj.weight": "pytorch_model-00066-of-00080.bin", - "model.layers.15.mlp.experts.6.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.15.mlp.experts.6.gate_proj.weight": "pytorch_model-00066-of-00080.bin", - "model.layers.15.mlp.experts.6.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.15.mlp.experts.6.up_proj.weight": "pytorch_model-00066-of-00080.bin", - "model.layers.15.mlp.experts.7.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.15.mlp.experts.7.down_proj.weight": "pytorch_model-00066-of-00080.bin", - "model.layers.15.mlp.experts.7.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.15.mlp.experts.7.gate_proj.weight": "pytorch_model-00066-of-00080.bin", - "model.layers.15.mlp.experts.7.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.15.mlp.experts.7.up_proj.weight": "pytorch_model-00066-of-00080.bin", - "model.layers.15.mlp.experts.8.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.15.mlp.experts.8.down_proj.weight": "pytorch_model-00066-of-00080.bin", - "model.layers.15.mlp.experts.8.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.15.mlp.experts.8.gate_proj.weight": "pytorch_model-00066-of-00080.bin", - "model.layers.15.mlp.experts.8.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.15.mlp.experts.8.up_proj.weight": "pytorch_model-00066-of-00080.bin", - "model.layers.15.mlp.experts.9.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.15.mlp.experts.9.down_proj.weight": "pytorch_model-00066-of-00080.bin", - "model.layers.15.mlp.experts.9.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.15.mlp.experts.9.gate_proj.weight": "pytorch_model-00066-of-00080.bin", - "model.layers.15.mlp.experts.9.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.15.mlp.experts.9.up_proj.weight": "pytorch_model-00066-of-00080.bin", - "model.layers.15.mlp.gate.wg.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.15.mlp.shared_mlp.down_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.15.mlp.shared_mlp.down_proj.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.15.mlp.shared_mlp.gate_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.15.mlp.shared_mlp.gate_proj.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.15.mlp.shared_mlp.up_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.15.mlp.shared_mlp.up_proj.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.15.post_attention_layernorm.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.15.self_attn.key_layernorm.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.15.self_attn.o_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.15.self_attn.o_proj.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.15.self_attn.q_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.15.self_attn.q_proj.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.15.self_attn.query_layernorm.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.16.input_layernorm.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.16.mlp.experts.0.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.16.mlp.experts.0.down_proj.weight": "pytorch_model-00007-of-00080.bin", - "model.layers.16.mlp.experts.0.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.16.mlp.experts.0.gate_proj.weight": "pytorch_model-00007-of-00080.bin", - "model.layers.16.mlp.experts.0.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.16.mlp.experts.0.up_proj.weight": "pytorch_model-00007-of-00080.bin", - "model.layers.16.mlp.experts.1.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.16.mlp.experts.1.down_proj.weight": "pytorch_model-00007-of-00080.bin", - "model.layers.16.mlp.experts.1.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.16.mlp.experts.1.gate_proj.weight": "pytorch_model-00007-of-00080.bin", - "model.layers.16.mlp.experts.1.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.16.mlp.experts.1.up_proj.weight": "pytorch_model-00007-of-00080.bin", - "model.layers.16.mlp.experts.10.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.16.mlp.experts.10.down_proj.weight": "pytorch_model-00007-of-00080.bin", - "model.layers.16.mlp.experts.10.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.16.mlp.experts.10.gate_proj.weight": "pytorch_model-00007-of-00080.bin", - "model.layers.16.mlp.experts.10.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.16.mlp.experts.10.up_proj.weight": "pytorch_model-00007-of-00080.bin", - "model.layers.16.mlp.experts.11.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.16.mlp.experts.11.down_proj.weight": "pytorch_model-00007-of-00080.bin", - "model.layers.16.mlp.experts.11.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.16.mlp.experts.11.gate_proj.weight": "pytorch_model-00007-of-00080.bin", - "model.layers.16.mlp.experts.11.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.16.mlp.experts.11.up_proj.weight": "pytorch_model-00007-of-00080.bin", - "model.layers.16.mlp.experts.12.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.16.mlp.experts.12.down_proj.weight": "pytorch_model-00007-of-00080.bin", - "model.layers.16.mlp.experts.12.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.16.mlp.experts.12.gate_proj.weight": "pytorch_model-00007-of-00080.bin", - "model.layers.16.mlp.experts.12.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.16.mlp.experts.12.up_proj.weight": "pytorch_model-00007-of-00080.bin", - "model.layers.16.mlp.experts.13.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.16.mlp.experts.13.down_proj.weight": "pytorch_model-00008-of-00080.bin", - "model.layers.16.mlp.experts.13.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.16.mlp.experts.13.gate_proj.weight": "pytorch_model-00008-of-00080.bin", - "model.layers.16.mlp.experts.13.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.16.mlp.experts.13.up_proj.weight": "pytorch_model-00008-of-00080.bin", - "model.layers.16.mlp.experts.14.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.16.mlp.experts.14.down_proj.weight": "pytorch_model-00008-of-00080.bin", - "model.layers.16.mlp.experts.14.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.16.mlp.experts.14.gate_proj.weight": "pytorch_model-00008-of-00080.bin", - "model.layers.16.mlp.experts.14.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.16.mlp.experts.14.up_proj.weight": "pytorch_model-00008-of-00080.bin", - "model.layers.16.mlp.experts.15.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.16.mlp.experts.15.down_proj.weight": "pytorch_model-00008-of-00080.bin", - "model.layers.16.mlp.experts.15.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.16.mlp.experts.15.gate_proj.weight": "pytorch_model-00008-of-00080.bin", - "model.layers.16.mlp.experts.15.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.16.mlp.experts.15.up_proj.weight": "pytorch_model-00008-of-00080.bin", - "model.layers.16.mlp.experts.2.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.16.mlp.experts.2.down_proj.weight": "pytorch_model-00007-of-00080.bin", - "model.layers.16.mlp.experts.2.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.16.mlp.experts.2.gate_proj.weight": "pytorch_model-00007-of-00080.bin", - "model.layers.16.mlp.experts.2.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.16.mlp.experts.2.up_proj.weight": "pytorch_model-00007-of-00080.bin", - "model.layers.16.mlp.experts.3.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.16.mlp.experts.3.down_proj.weight": "pytorch_model-00007-of-00080.bin", - "model.layers.16.mlp.experts.3.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.16.mlp.experts.3.gate_proj.weight": "pytorch_model-00007-of-00080.bin", - "model.layers.16.mlp.experts.3.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.16.mlp.experts.3.up_proj.weight": "pytorch_model-00007-of-00080.bin", - "model.layers.16.mlp.experts.4.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.16.mlp.experts.4.down_proj.weight": "pytorch_model-00007-of-00080.bin", - "model.layers.16.mlp.experts.4.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.16.mlp.experts.4.gate_proj.weight": "pytorch_model-00007-of-00080.bin", - "model.layers.16.mlp.experts.4.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.16.mlp.experts.4.up_proj.weight": "pytorch_model-00007-of-00080.bin", - "model.layers.16.mlp.experts.5.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.16.mlp.experts.5.down_proj.weight": "pytorch_model-00007-of-00080.bin", - "model.layers.16.mlp.experts.5.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.16.mlp.experts.5.gate_proj.weight": "pytorch_model-00007-of-00080.bin", - "model.layers.16.mlp.experts.5.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.16.mlp.experts.5.up_proj.weight": "pytorch_model-00007-of-00080.bin", - "model.layers.16.mlp.experts.6.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.16.mlp.experts.6.down_proj.weight": "pytorch_model-00007-of-00080.bin", - "model.layers.16.mlp.experts.6.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.16.mlp.experts.6.gate_proj.weight": "pytorch_model-00007-of-00080.bin", - "model.layers.16.mlp.experts.6.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.16.mlp.experts.6.up_proj.weight": "pytorch_model-00007-of-00080.bin", - "model.layers.16.mlp.experts.7.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.16.mlp.experts.7.down_proj.weight": "pytorch_model-00007-of-00080.bin", - "model.layers.16.mlp.experts.7.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.16.mlp.experts.7.gate_proj.weight": "pytorch_model-00007-of-00080.bin", - "model.layers.16.mlp.experts.7.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.16.mlp.experts.7.up_proj.weight": "pytorch_model-00007-of-00080.bin", - "model.layers.16.mlp.experts.8.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.16.mlp.experts.8.down_proj.weight": "pytorch_model-00007-of-00080.bin", - "model.layers.16.mlp.experts.8.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.16.mlp.experts.8.gate_proj.weight": "pytorch_model-00007-of-00080.bin", - "model.layers.16.mlp.experts.8.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.16.mlp.experts.8.up_proj.weight": "pytorch_model-00007-of-00080.bin", - "model.layers.16.mlp.experts.9.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.16.mlp.experts.9.down_proj.weight": "pytorch_model-00007-of-00080.bin", - "model.layers.16.mlp.experts.9.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.16.mlp.experts.9.gate_proj.weight": "pytorch_model-00007-of-00080.bin", - "model.layers.16.mlp.experts.9.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.16.mlp.experts.9.up_proj.weight": "pytorch_model-00007-of-00080.bin", - "model.layers.16.mlp.gate.wg.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.16.mlp.shared_mlp.down_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.16.mlp.shared_mlp.down_proj.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.16.mlp.shared_mlp.gate_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.16.mlp.shared_mlp.gate_proj.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.16.mlp.shared_mlp.up_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.16.mlp.shared_mlp.up_proj.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.16.post_attention_layernorm.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.16.self_attn.k_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.16.self_attn.k_proj.weight": "pytorch_model-00020-of-00080.bin", - "model.layers.16.self_attn.key_layernorm.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.16.self_attn.o_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.16.self_attn.o_proj.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.16.self_attn.q_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.16.self_attn.q_proj.weight": "pytorch_model-00020-of-00080.bin", - "model.layers.16.self_attn.query_layernorm.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.16.self_attn.v_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.16.self_attn.v_proj.weight": "pytorch_model-00020-of-00080.bin", - "model.layers.17.input_layernorm.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.17.mlp.experts.0.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.17.mlp.experts.0.down_proj.weight": "pytorch_model-00008-of-00080.bin", - "model.layers.17.mlp.experts.0.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.17.mlp.experts.0.gate_proj.weight": "pytorch_model-00008-of-00080.bin", - "model.layers.17.mlp.experts.0.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.17.mlp.experts.0.up_proj.weight": "pytorch_model-00008-of-00080.bin", - "model.layers.17.mlp.experts.1.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.17.mlp.experts.1.down_proj.weight": "pytorch_model-00008-of-00080.bin", - "model.layers.17.mlp.experts.1.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.17.mlp.experts.1.gate_proj.weight": "pytorch_model-00008-of-00080.bin", - "model.layers.17.mlp.experts.1.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.17.mlp.experts.1.up_proj.weight": "pytorch_model-00008-of-00080.bin", - "model.layers.17.mlp.experts.10.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.17.mlp.experts.10.down_proj.weight": "pytorch_model-00008-of-00080.bin", - "model.layers.17.mlp.experts.10.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.17.mlp.experts.10.gate_proj.weight": "pytorch_model-00008-of-00080.bin", - "model.layers.17.mlp.experts.10.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.17.mlp.experts.10.up_proj.weight": "pytorch_model-00008-of-00080.bin", - "model.layers.17.mlp.experts.11.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.17.mlp.experts.11.down_proj.weight": "pytorch_model-00009-of-00080.bin", - "model.layers.17.mlp.experts.11.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.17.mlp.experts.11.gate_proj.weight": "pytorch_model-00009-of-00080.bin", - "model.layers.17.mlp.experts.11.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.17.mlp.experts.11.up_proj.weight": "pytorch_model-00009-of-00080.bin", - "model.layers.17.mlp.experts.12.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.17.mlp.experts.12.down_proj.weight": "pytorch_model-00009-of-00080.bin", - "model.layers.17.mlp.experts.12.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.17.mlp.experts.12.gate_proj.weight": "pytorch_model-00009-of-00080.bin", - "model.layers.17.mlp.experts.12.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.17.mlp.experts.12.up_proj.weight": "pytorch_model-00009-of-00080.bin", - "model.layers.17.mlp.experts.13.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.17.mlp.experts.13.down_proj.weight": "pytorch_model-00009-of-00080.bin", - "model.layers.17.mlp.experts.13.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.17.mlp.experts.13.gate_proj.weight": "pytorch_model-00009-of-00080.bin", - "model.layers.17.mlp.experts.13.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.17.mlp.experts.13.up_proj.weight": "pytorch_model-00009-of-00080.bin", - "model.layers.17.mlp.experts.14.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.17.mlp.experts.14.down_proj.weight": "pytorch_model-00009-of-00080.bin", - "model.layers.17.mlp.experts.14.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.17.mlp.experts.14.gate_proj.weight": "pytorch_model-00009-of-00080.bin", - "model.layers.17.mlp.experts.14.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.17.mlp.experts.14.up_proj.weight": "pytorch_model-00009-of-00080.bin", - "model.layers.17.mlp.experts.15.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.17.mlp.experts.15.down_proj.weight": "pytorch_model-00009-of-00080.bin", - "model.layers.17.mlp.experts.15.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.17.mlp.experts.15.gate_proj.weight": "pytorch_model-00009-of-00080.bin", - "model.layers.17.mlp.experts.15.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.17.mlp.experts.15.up_proj.weight": "pytorch_model-00009-of-00080.bin", - "model.layers.17.mlp.experts.2.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.17.mlp.experts.2.down_proj.weight": "pytorch_model-00008-of-00080.bin", - "model.layers.17.mlp.experts.2.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.17.mlp.experts.2.gate_proj.weight": "pytorch_model-00008-of-00080.bin", - "model.layers.17.mlp.experts.2.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.17.mlp.experts.2.up_proj.weight": "pytorch_model-00008-of-00080.bin", - "model.layers.17.mlp.experts.3.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.17.mlp.experts.3.down_proj.weight": "pytorch_model-00008-of-00080.bin", - "model.layers.17.mlp.experts.3.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.17.mlp.experts.3.gate_proj.weight": "pytorch_model-00008-of-00080.bin", - "model.layers.17.mlp.experts.3.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.17.mlp.experts.3.up_proj.weight": "pytorch_model-00008-of-00080.bin", - "model.layers.17.mlp.experts.4.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.17.mlp.experts.4.down_proj.weight": "pytorch_model-00008-of-00080.bin", - "model.layers.17.mlp.experts.4.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.17.mlp.experts.4.gate_proj.weight": "pytorch_model-00008-of-00080.bin", - "model.layers.17.mlp.experts.4.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.17.mlp.experts.4.up_proj.weight": "pytorch_model-00008-of-00080.bin", - "model.layers.17.mlp.experts.5.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.17.mlp.experts.5.down_proj.weight": "pytorch_model-00008-of-00080.bin", - "model.layers.17.mlp.experts.5.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.17.mlp.experts.5.gate_proj.weight": "pytorch_model-00008-of-00080.bin", - "model.layers.17.mlp.experts.5.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.17.mlp.experts.5.up_proj.weight": "pytorch_model-00008-of-00080.bin", - "model.layers.17.mlp.experts.6.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.17.mlp.experts.6.down_proj.weight": "pytorch_model-00008-of-00080.bin", - "model.layers.17.mlp.experts.6.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.17.mlp.experts.6.gate_proj.weight": "pytorch_model-00008-of-00080.bin", - "model.layers.17.mlp.experts.6.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.17.mlp.experts.6.up_proj.weight": "pytorch_model-00008-of-00080.bin", - "model.layers.17.mlp.experts.7.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.17.mlp.experts.7.down_proj.weight": "pytorch_model-00008-of-00080.bin", - "model.layers.17.mlp.experts.7.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.17.mlp.experts.7.gate_proj.weight": "pytorch_model-00008-of-00080.bin", - "model.layers.17.mlp.experts.7.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.17.mlp.experts.7.up_proj.weight": "pytorch_model-00008-of-00080.bin", - "model.layers.17.mlp.experts.8.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.17.mlp.experts.8.down_proj.weight": "pytorch_model-00008-of-00080.bin", - "model.layers.17.mlp.experts.8.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.17.mlp.experts.8.gate_proj.weight": "pytorch_model-00008-of-00080.bin", - "model.layers.17.mlp.experts.8.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.17.mlp.experts.8.up_proj.weight": "pytorch_model-00008-of-00080.bin", - "model.layers.17.mlp.experts.9.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.17.mlp.experts.9.down_proj.weight": "pytorch_model-00008-of-00080.bin", - "model.layers.17.mlp.experts.9.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.17.mlp.experts.9.gate_proj.weight": "pytorch_model-00008-of-00080.bin", - "model.layers.17.mlp.experts.9.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.17.mlp.experts.9.up_proj.weight": "pytorch_model-00008-of-00080.bin", - "model.layers.17.mlp.gate.wg.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.17.mlp.shared_mlp.down_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.17.mlp.shared_mlp.down_proj.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.17.mlp.shared_mlp.gate_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.17.mlp.shared_mlp.gate_proj.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.17.mlp.shared_mlp.up_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.17.mlp.shared_mlp.up_proj.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.17.post_attention_layernorm.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.17.self_attn.key_layernorm.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.17.self_attn.o_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.17.self_attn.o_proj.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.17.self_attn.q_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.17.self_attn.q_proj.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.17.self_attn.query_layernorm.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.18.input_layernorm.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.18.mlp.experts.0.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.18.mlp.experts.0.down_proj.weight": "pytorch_model-00026-of-00080.bin", - "model.layers.18.mlp.experts.0.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.18.mlp.experts.0.gate_proj.weight": "pytorch_model-00026-of-00080.bin", - "model.layers.18.mlp.experts.0.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.18.mlp.experts.0.up_proj.weight": "pytorch_model-00026-of-00080.bin", - "model.layers.18.mlp.experts.1.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.18.mlp.experts.1.down_proj.weight": "pytorch_model-00027-of-00080.bin", - "model.layers.18.mlp.experts.1.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.18.mlp.experts.1.gate_proj.weight": "pytorch_model-00027-of-00080.bin", - "model.layers.18.mlp.experts.1.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.18.mlp.experts.1.up_proj.weight": "pytorch_model-00026-of-00080.bin", - "model.layers.18.mlp.experts.10.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.18.mlp.experts.10.down_proj.weight": "pytorch_model-00027-of-00080.bin", - "model.layers.18.mlp.experts.10.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.18.mlp.experts.10.gate_proj.weight": "pytorch_model-00027-of-00080.bin", - "model.layers.18.mlp.experts.10.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.18.mlp.experts.10.up_proj.weight": "pytorch_model-00027-of-00080.bin", - "model.layers.18.mlp.experts.11.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.18.mlp.experts.11.down_proj.weight": "pytorch_model-00027-of-00080.bin", - "model.layers.18.mlp.experts.11.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.18.mlp.experts.11.gate_proj.weight": "pytorch_model-00027-of-00080.bin", - "model.layers.18.mlp.experts.11.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.18.mlp.experts.11.up_proj.weight": "pytorch_model-00027-of-00080.bin", - "model.layers.18.mlp.experts.12.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.18.mlp.experts.12.down_proj.weight": "pytorch_model-00027-of-00080.bin", - "model.layers.18.mlp.experts.12.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.18.mlp.experts.12.gate_proj.weight": "pytorch_model-00027-of-00080.bin", - "model.layers.18.mlp.experts.12.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.18.mlp.experts.12.up_proj.weight": "pytorch_model-00027-of-00080.bin", - "model.layers.18.mlp.experts.13.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.18.mlp.experts.13.down_proj.weight": "pytorch_model-00027-of-00080.bin", - "model.layers.18.mlp.experts.13.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.18.mlp.experts.13.gate_proj.weight": "pytorch_model-00027-of-00080.bin", - "model.layers.18.mlp.experts.13.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.18.mlp.experts.13.up_proj.weight": "pytorch_model-00027-of-00080.bin", - "model.layers.18.mlp.experts.14.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.18.mlp.experts.14.down_proj.weight": "pytorch_model-00027-of-00080.bin", - "model.layers.18.mlp.experts.14.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.18.mlp.experts.14.gate_proj.weight": "pytorch_model-00027-of-00080.bin", - "model.layers.18.mlp.experts.14.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.18.mlp.experts.14.up_proj.weight": "pytorch_model-00027-of-00080.bin", - "model.layers.18.mlp.experts.15.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.18.mlp.experts.15.down_proj.weight": "pytorch_model-00028-of-00080.bin", - "model.layers.18.mlp.experts.15.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.18.mlp.experts.15.gate_proj.weight": "pytorch_model-00028-of-00080.bin", - "model.layers.18.mlp.experts.15.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.18.mlp.experts.15.up_proj.weight": "pytorch_model-00027-of-00080.bin", - "model.layers.18.mlp.experts.2.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.18.mlp.experts.2.down_proj.weight": "pytorch_model-00027-of-00080.bin", - "model.layers.18.mlp.experts.2.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.18.mlp.experts.2.gate_proj.weight": "pytorch_model-00027-of-00080.bin", - "model.layers.18.mlp.experts.2.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.18.mlp.experts.2.up_proj.weight": "pytorch_model-00027-of-00080.bin", - "model.layers.18.mlp.experts.3.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.18.mlp.experts.3.down_proj.weight": "pytorch_model-00027-of-00080.bin", - "model.layers.18.mlp.experts.3.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.18.mlp.experts.3.gate_proj.weight": "pytorch_model-00027-of-00080.bin", - "model.layers.18.mlp.experts.3.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.18.mlp.experts.3.up_proj.weight": "pytorch_model-00027-of-00080.bin", - "model.layers.18.mlp.experts.4.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.18.mlp.experts.4.down_proj.weight": "pytorch_model-00027-of-00080.bin", - "model.layers.18.mlp.experts.4.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.18.mlp.experts.4.gate_proj.weight": "pytorch_model-00027-of-00080.bin", - "model.layers.18.mlp.experts.4.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.18.mlp.experts.4.up_proj.weight": "pytorch_model-00027-of-00080.bin", - "model.layers.18.mlp.experts.5.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.18.mlp.experts.5.down_proj.weight": "pytorch_model-00027-of-00080.bin", - "model.layers.18.mlp.experts.5.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.18.mlp.experts.5.gate_proj.weight": "pytorch_model-00027-of-00080.bin", - "model.layers.18.mlp.experts.5.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.18.mlp.experts.5.up_proj.weight": "pytorch_model-00027-of-00080.bin", - "model.layers.18.mlp.experts.6.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.18.mlp.experts.6.down_proj.weight": "pytorch_model-00027-of-00080.bin", - "model.layers.18.mlp.experts.6.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.18.mlp.experts.6.gate_proj.weight": "pytorch_model-00027-of-00080.bin", - "model.layers.18.mlp.experts.6.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.18.mlp.experts.6.up_proj.weight": "pytorch_model-00027-of-00080.bin", - "model.layers.18.mlp.experts.7.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.18.mlp.experts.7.down_proj.weight": "pytorch_model-00027-of-00080.bin", - "model.layers.18.mlp.experts.7.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.18.mlp.experts.7.gate_proj.weight": "pytorch_model-00027-of-00080.bin", - "model.layers.18.mlp.experts.7.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.18.mlp.experts.7.up_proj.weight": "pytorch_model-00027-of-00080.bin", - "model.layers.18.mlp.experts.8.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.18.mlp.experts.8.down_proj.weight": "pytorch_model-00027-of-00080.bin", - "model.layers.18.mlp.experts.8.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.18.mlp.experts.8.gate_proj.weight": "pytorch_model-00027-of-00080.bin", - "model.layers.18.mlp.experts.8.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.18.mlp.experts.8.up_proj.weight": "pytorch_model-00027-of-00080.bin", - "model.layers.18.mlp.experts.9.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.18.mlp.experts.9.down_proj.weight": "pytorch_model-00027-of-00080.bin", - "model.layers.18.mlp.experts.9.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.18.mlp.experts.9.gate_proj.weight": "pytorch_model-00027-of-00080.bin", - "model.layers.18.mlp.experts.9.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.18.mlp.experts.9.up_proj.weight": "pytorch_model-00027-of-00080.bin", - "model.layers.18.mlp.gate.wg.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.18.mlp.shared_mlp.down_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.18.mlp.shared_mlp.down_proj.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.18.mlp.shared_mlp.gate_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.18.mlp.shared_mlp.gate_proj.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.18.mlp.shared_mlp.up_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.18.mlp.shared_mlp.up_proj.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.18.post_attention_layernorm.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.18.self_attn.k_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.18.self_attn.k_proj.weight": "pytorch_model-00040-of-00080.bin", - "model.layers.18.self_attn.key_layernorm.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.18.self_attn.o_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.18.self_attn.o_proj.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.18.self_attn.q_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.18.self_attn.q_proj.weight": "pytorch_model-00040-of-00080.bin", - "model.layers.18.self_attn.query_layernorm.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.18.self_attn.v_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.18.self_attn.v_proj.weight": "pytorch_model-00040-of-00080.bin", - "model.layers.19.input_layernorm.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.19.mlp.experts.0.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.19.mlp.experts.0.down_proj.weight": "pytorch_model-00028-of-00080.bin", - "model.layers.19.mlp.experts.0.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.19.mlp.experts.0.gate_proj.weight": "pytorch_model-00028-of-00080.bin", - "model.layers.19.mlp.experts.0.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.19.mlp.experts.0.up_proj.weight": "pytorch_model-00028-of-00080.bin", - "model.layers.19.mlp.experts.1.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.19.mlp.experts.1.down_proj.weight": "pytorch_model-00028-of-00080.bin", - "model.layers.19.mlp.experts.1.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.19.mlp.experts.1.gate_proj.weight": "pytorch_model-00028-of-00080.bin", - "model.layers.19.mlp.experts.1.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.19.mlp.experts.1.up_proj.weight": "pytorch_model-00028-of-00080.bin", - "model.layers.19.mlp.experts.10.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.19.mlp.experts.10.down_proj.weight": "pytorch_model-00028-of-00080.bin", - "model.layers.19.mlp.experts.10.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.19.mlp.experts.10.gate_proj.weight": "pytorch_model-00028-of-00080.bin", - "model.layers.19.mlp.experts.10.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.19.mlp.experts.10.up_proj.weight": "pytorch_model-00028-of-00080.bin", - "model.layers.19.mlp.experts.11.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.19.mlp.experts.11.down_proj.weight": "pytorch_model-00028-of-00080.bin", - "model.layers.19.mlp.experts.11.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.19.mlp.experts.11.gate_proj.weight": "pytorch_model-00028-of-00080.bin", - "model.layers.19.mlp.experts.11.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.19.mlp.experts.11.up_proj.weight": "pytorch_model-00028-of-00080.bin", - "model.layers.19.mlp.experts.12.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.19.mlp.experts.12.down_proj.weight": "pytorch_model-00028-of-00080.bin", - "model.layers.19.mlp.experts.12.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.19.mlp.experts.12.gate_proj.weight": "pytorch_model-00028-of-00080.bin", - "model.layers.19.mlp.experts.12.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.19.mlp.experts.12.up_proj.weight": "pytorch_model-00028-of-00080.bin", - "model.layers.19.mlp.experts.13.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.19.mlp.experts.13.down_proj.weight": "pytorch_model-00029-of-00080.bin", - "model.layers.19.mlp.experts.13.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.19.mlp.experts.13.gate_proj.weight": "pytorch_model-00029-of-00080.bin", - "model.layers.19.mlp.experts.13.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.19.mlp.experts.13.up_proj.weight": "pytorch_model-00028-of-00080.bin", - "model.layers.19.mlp.experts.14.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.19.mlp.experts.14.down_proj.weight": "pytorch_model-00029-of-00080.bin", - "model.layers.19.mlp.experts.14.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.19.mlp.experts.14.gate_proj.weight": "pytorch_model-00029-of-00080.bin", - "model.layers.19.mlp.experts.14.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.19.mlp.experts.14.up_proj.weight": "pytorch_model-00029-of-00080.bin", - "model.layers.19.mlp.experts.15.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.19.mlp.experts.15.down_proj.weight": "pytorch_model-00029-of-00080.bin", - "model.layers.19.mlp.experts.15.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.19.mlp.experts.15.gate_proj.weight": "pytorch_model-00029-of-00080.bin", - "model.layers.19.mlp.experts.15.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.19.mlp.experts.15.up_proj.weight": "pytorch_model-00029-of-00080.bin", - "model.layers.19.mlp.experts.2.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.19.mlp.experts.2.down_proj.weight": "pytorch_model-00028-of-00080.bin", - "model.layers.19.mlp.experts.2.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.19.mlp.experts.2.gate_proj.weight": "pytorch_model-00028-of-00080.bin", - "model.layers.19.mlp.experts.2.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.19.mlp.experts.2.up_proj.weight": "pytorch_model-00028-of-00080.bin", - "model.layers.19.mlp.experts.3.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.19.mlp.experts.3.down_proj.weight": "pytorch_model-00028-of-00080.bin", - "model.layers.19.mlp.experts.3.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.19.mlp.experts.3.gate_proj.weight": "pytorch_model-00028-of-00080.bin", - "model.layers.19.mlp.experts.3.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.19.mlp.experts.3.up_proj.weight": "pytorch_model-00028-of-00080.bin", - "model.layers.19.mlp.experts.4.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.19.mlp.experts.4.down_proj.weight": "pytorch_model-00028-of-00080.bin", - "model.layers.19.mlp.experts.4.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.19.mlp.experts.4.gate_proj.weight": "pytorch_model-00028-of-00080.bin", - "model.layers.19.mlp.experts.4.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.19.mlp.experts.4.up_proj.weight": "pytorch_model-00028-of-00080.bin", - "model.layers.19.mlp.experts.5.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.19.mlp.experts.5.down_proj.weight": "pytorch_model-00028-of-00080.bin", - "model.layers.19.mlp.experts.5.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.19.mlp.experts.5.gate_proj.weight": "pytorch_model-00028-of-00080.bin", - "model.layers.19.mlp.experts.5.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.19.mlp.experts.5.up_proj.weight": "pytorch_model-00028-of-00080.bin", - "model.layers.19.mlp.experts.6.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.19.mlp.experts.6.down_proj.weight": "pytorch_model-00028-of-00080.bin", - "model.layers.19.mlp.experts.6.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.19.mlp.experts.6.gate_proj.weight": "pytorch_model-00028-of-00080.bin", - "model.layers.19.mlp.experts.6.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.19.mlp.experts.6.up_proj.weight": "pytorch_model-00028-of-00080.bin", - "model.layers.19.mlp.experts.7.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.19.mlp.experts.7.down_proj.weight": "pytorch_model-00028-of-00080.bin", - "model.layers.19.mlp.experts.7.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.19.mlp.experts.7.gate_proj.weight": "pytorch_model-00028-of-00080.bin", - "model.layers.19.mlp.experts.7.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.19.mlp.experts.7.up_proj.weight": "pytorch_model-00028-of-00080.bin", - "model.layers.19.mlp.experts.8.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.19.mlp.experts.8.down_proj.weight": "pytorch_model-00028-of-00080.bin", - "model.layers.19.mlp.experts.8.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.19.mlp.experts.8.gate_proj.weight": "pytorch_model-00028-of-00080.bin", - "model.layers.19.mlp.experts.8.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.19.mlp.experts.8.up_proj.weight": "pytorch_model-00028-of-00080.bin", - "model.layers.19.mlp.experts.9.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.19.mlp.experts.9.down_proj.weight": "pytorch_model-00028-of-00080.bin", - "model.layers.19.mlp.experts.9.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.19.mlp.experts.9.gate_proj.weight": "pytorch_model-00028-of-00080.bin", - "model.layers.19.mlp.experts.9.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.19.mlp.experts.9.up_proj.weight": "pytorch_model-00028-of-00080.bin", - "model.layers.19.mlp.gate.wg.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.19.mlp.shared_mlp.down_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.19.mlp.shared_mlp.down_proj.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.19.mlp.shared_mlp.gate_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.19.mlp.shared_mlp.gate_proj.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.19.mlp.shared_mlp.up_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.19.mlp.shared_mlp.up_proj.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.19.post_attention_layernorm.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.19.self_attn.key_layernorm.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.19.self_attn.o_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.19.self_attn.o_proj.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.19.self_attn.q_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.19.self_attn.q_proj.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.19.self_attn.query_layernorm.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.2.input_layernorm.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.2.mlp.experts.0.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.2.mlp.experts.0.down_proj.weight": "pytorch_model-00022-of-00080.bin", - "model.layers.2.mlp.experts.0.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.2.mlp.experts.0.gate_proj.weight": "pytorch_model-00022-of-00080.bin", - "model.layers.2.mlp.experts.0.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.2.mlp.experts.0.up_proj.weight": "pytorch_model-00022-of-00080.bin", - "model.layers.2.mlp.experts.1.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.2.mlp.experts.1.down_proj.weight": "pytorch_model-00022-of-00080.bin", - "model.layers.2.mlp.experts.1.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.2.mlp.experts.1.gate_proj.weight": "pytorch_model-00022-of-00080.bin", - "model.layers.2.mlp.experts.1.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.2.mlp.experts.1.up_proj.weight": "pytorch_model-00022-of-00080.bin", - "model.layers.2.mlp.experts.10.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.2.mlp.experts.10.down_proj.weight": "pytorch_model-00023-of-00080.bin", - "model.layers.2.mlp.experts.10.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.2.mlp.experts.10.gate_proj.weight": "pytorch_model-00023-of-00080.bin", - "model.layers.2.mlp.experts.10.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.2.mlp.experts.10.up_proj.weight": "pytorch_model-00023-of-00080.bin", - "model.layers.2.mlp.experts.11.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.2.mlp.experts.11.down_proj.weight": "pytorch_model-00023-of-00080.bin", - "model.layers.2.mlp.experts.11.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.2.mlp.experts.11.gate_proj.weight": "pytorch_model-00023-of-00080.bin", - "model.layers.2.mlp.experts.11.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.2.mlp.experts.11.up_proj.weight": "pytorch_model-00023-of-00080.bin", - "model.layers.2.mlp.experts.12.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.2.mlp.experts.12.down_proj.weight": "pytorch_model-00023-of-00080.bin", - "model.layers.2.mlp.experts.12.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.2.mlp.experts.12.gate_proj.weight": "pytorch_model-00023-of-00080.bin", - "model.layers.2.mlp.experts.12.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.2.mlp.experts.12.up_proj.weight": "pytorch_model-00023-of-00080.bin", - "model.layers.2.mlp.experts.13.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.2.mlp.experts.13.down_proj.weight": "pytorch_model-00023-of-00080.bin", - "model.layers.2.mlp.experts.13.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.2.mlp.experts.13.gate_proj.weight": "pytorch_model-00023-of-00080.bin", - "model.layers.2.mlp.experts.13.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.2.mlp.experts.13.up_proj.weight": "pytorch_model-00023-of-00080.bin", - "model.layers.2.mlp.experts.14.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.2.mlp.experts.14.down_proj.weight": "pytorch_model-00023-of-00080.bin", - "model.layers.2.mlp.experts.14.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.2.mlp.experts.14.gate_proj.weight": "pytorch_model-00023-of-00080.bin", - "model.layers.2.mlp.experts.14.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.2.mlp.experts.14.up_proj.weight": "pytorch_model-00023-of-00080.bin", - "model.layers.2.mlp.experts.15.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.2.mlp.experts.15.down_proj.weight": "pytorch_model-00023-of-00080.bin", - "model.layers.2.mlp.experts.15.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.2.mlp.experts.15.gate_proj.weight": "pytorch_model-00023-of-00080.bin", - "model.layers.2.mlp.experts.15.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.2.mlp.experts.15.up_proj.weight": "pytorch_model-00023-of-00080.bin", - "model.layers.2.mlp.experts.2.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.2.mlp.experts.2.down_proj.weight": "pytorch_model-00022-of-00080.bin", - "model.layers.2.mlp.experts.2.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.2.mlp.experts.2.gate_proj.weight": "pytorch_model-00022-of-00080.bin", - "model.layers.2.mlp.experts.2.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.2.mlp.experts.2.up_proj.weight": "pytorch_model-00022-of-00080.bin", - "model.layers.2.mlp.experts.3.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.2.mlp.experts.3.down_proj.weight": "pytorch_model-00022-of-00080.bin", - "model.layers.2.mlp.experts.3.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.2.mlp.experts.3.gate_proj.weight": "pytorch_model-00022-of-00080.bin", - "model.layers.2.mlp.experts.3.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.2.mlp.experts.3.up_proj.weight": "pytorch_model-00022-of-00080.bin", - "model.layers.2.mlp.experts.4.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.2.mlp.experts.4.down_proj.weight": "pytorch_model-00022-of-00080.bin", - "model.layers.2.mlp.experts.4.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.2.mlp.experts.4.gate_proj.weight": "pytorch_model-00022-of-00080.bin", - "model.layers.2.mlp.experts.4.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.2.mlp.experts.4.up_proj.weight": "pytorch_model-00022-of-00080.bin", - "model.layers.2.mlp.experts.5.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.2.mlp.experts.5.down_proj.weight": "pytorch_model-00022-of-00080.bin", - "model.layers.2.mlp.experts.5.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.2.mlp.experts.5.gate_proj.weight": "pytorch_model-00022-of-00080.bin", - "model.layers.2.mlp.experts.5.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.2.mlp.experts.5.up_proj.weight": "pytorch_model-00022-of-00080.bin", - "model.layers.2.mlp.experts.6.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.2.mlp.experts.6.down_proj.weight": "pytorch_model-00022-of-00080.bin", - "model.layers.2.mlp.experts.6.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.2.mlp.experts.6.gate_proj.weight": "pytorch_model-00022-of-00080.bin", - "model.layers.2.mlp.experts.6.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.2.mlp.experts.6.up_proj.weight": "pytorch_model-00022-of-00080.bin", - "model.layers.2.mlp.experts.7.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.2.mlp.experts.7.down_proj.weight": "pytorch_model-00022-of-00080.bin", - "model.layers.2.mlp.experts.7.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.2.mlp.experts.7.gate_proj.weight": "pytorch_model-00022-of-00080.bin", - "model.layers.2.mlp.experts.7.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.2.mlp.experts.7.up_proj.weight": "pytorch_model-00022-of-00080.bin", - "model.layers.2.mlp.experts.8.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.2.mlp.experts.8.down_proj.weight": "pytorch_model-00022-of-00080.bin", - "model.layers.2.mlp.experts.8.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.2.mlp.experts.8.gate_proj.weight": "pytorch_model-00022-of-00080.bin", - "model.layers.2.mlp.experts.8.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.2.mlp.experts.8.up_proj.weight": "pytorch_model-00022-of-00080.bin", - "model.layers.2.mlp.experts.9.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.2.mlp.experts.9.down_proj.weight": "pytorch_model-00023-of-00080.bin", - "model.layers.2.mlp.experts.9.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.2.mlp.experts.9.gate_proj.weight": "pytorch_model-00023-of-00080.bin", - "model.layers.2.mlp.experts.9.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.2.mlp.experts.9.up_proj.weight": "pytorch_model-00022-of-00080.bin", - "model.layers.2.mlp.gate.wg.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.2.mlp.shared_mlp.down_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.2.mlp.shared_mlp.down_proj.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.2.mlp.shared_mlp.gate_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.2.mlp.shared_mlp.gate_proj.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.2.mlp.shared_mlp.up_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.2.mlp.shared_mlp.up_proj.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.2.post_attention_layernorm.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.2.self_attn.k_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.2.self_attn.k_proj.weight": "pytorch_model-00040-of-00080.bin", - "model.layers.2.self_attn.key_layernorm.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.2.self_attn.o_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.2.self_attn.o_proj.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.2.self_attn.q_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.2.self_attn.q_proj.weight": "pytorch_model-00040-of-00080.bin", - "model.layers.2.self_attn.query_layernorm.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.2.self_attn.v_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.2.self_attn.v_proj.weight": "pytorch_model-00040-of-00080.bin", - "model.layers.20.input_layernorm.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.20.mlp.experts.0.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.20.mlp.experts.0.down_proj.weight": "pytorch_model-00046-of-00080.bin", - "model.layers.20.mlp.experts.0.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.20.mlp.experts.0.gate_proj.weight": "pytorch_model-00046-of-00080.bin", - "model.layers.20.mlp.experts.0.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.20.mlp.experts.0.up_proj.weight": "pytorch_model-00046-of-00080.bin", - "model.layers.20.mlp.experts.1.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.20.mlp.experts.1.down_proj.weight": "pytorch_model-00047-of-00080.bin", - "model.layers.20.mlp.experts.1.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.20.mlp.experts.1.gate_proj.weight": "pytorch_model-00047-of-00080.bin", - "model.layers.20.mlp.experts.1.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.20.mlp.experts.1.up_proj.weight": "pytorch_model-00046-of-00080.bin", - "model.layers.20.mlp.experts.10.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.20.mlp.experts.10.down_proj.weight": "pytorch_model-00047-of-00080.bin", - "model.layers.20.mlp.experts.10.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.20.mlp.experts.10.gate_proj.weight": "pytorch_model-00047-of-00080.bin", - "model.layers.20.mlp.experts.10.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.20.mlp.experts.10.up_proj.weight": "pytorch_model-00047-of-00080.bin", - "model.layers.20.mlp.experts.11.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.20.mlp.experts.11.down_proj.weight": "pytorch_model-00047-of-00080.bin", - "model.layers.20.mlp.experts.11.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.20.mlp.experts.11.gate_proj.weight": "pytorch_model-00047-of-00080.bin", - "model.layers.20.mlp.experts.11.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.20.mlp.experts.11.up_proj.weight": "pytorch_model-00047-of-00080.bin", - "model.layers.20.mlp.experts.12.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.20.mlp.experts.12.down_proj.weight": "pytorch_model-00047-of-00080.bin", - "model.layers.20.mlp.experts.12.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.20.mlp.experts.12.gate_proj.weight": "pytorch_model-00047-of-00080.bin", - "model.layers.20.mlp.experts.12.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.20.mlp.experts.12.up_proj.weight": "pytorch_model-00047-of-00080.bin", - "model.layers.20.mlp.experts.13.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.20.mlp.experts.13.down_proj.weight": "pytorch_model-00047-of-00080.bin", - "model.layers.20.mlp.experts.13.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.20.mlp.experts.13.gate_proj.weight": "pytorch_model-00047-of-00080.bin", - "model.layers.20.mlp.experts.13.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.20.mlp.experts.13.up_proj.weight": "pytorch_model-00047-of-00080.bin", - "model.layers.20.mlp.experts.14.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.20.mlp.experts.14.down_proj.weight": "pytorch_model-00047-of-00080.bin", - "model.layers.20.mlp.experts.14.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.20.mlp.experts.14.gate_proj.weight": "pytorch_model-00047-of-00080.bin", - "model.layers.20.mlp.experts.14.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.20.mlp.experts.14.up_proj.weight": "pytorch_model-00047-of-00080.bin", - "model.layers.20.mlp.experts.15.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.20.mlp.experts.15.down_proj.weight": "pytorch_model-00048-of-00080.bin", - "model.layers.20.mlp.experts.15.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.20.mlp.experts.15.gate_proj.weight": "pytorch_model-00048-of-00080.bin", - "model.layers.20.mlp.experts.15.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.20.mlp.experts.15.up_proj.weight": "pytorch_model-00047-of-00080.bin", - "model.layers.20.mlp.experts.2.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.20.mlp.experts.2.down_proj.weight": "pytorch_model-00047-of-00080.bin", - "model.layers.20.mlp.experts.2.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.20.mlp.experts.2.gate_proj.weight": "pytorch_model-00047-of-00080.bin", - "model.layers.20.mlp.experts.2.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.20.mlp.experts.2.up_proj.weight": "pytorch_model-00047-of-00080.bin", - "model.layers.20.mlp.experts.3.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.20.mlp.experts.3.down_proj.weight": "pytorch_model-00047-of-00080.bin", - "model.layers.20.mlp.experts.3.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.20.mlp.experts.3.gate_proj.weight": "pytorch_model-00047-of-00080.bin", - "model.layers.20.mlp.experts.3.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.20.mlp.experts.3.up_proj.weight": "pytorch_model-00047-of-00080.bin", - "model.layers.20.mlp.experts.4.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.20.mlp.experts.4.down_proj.weight": "pytorch_model-00047-of-00080.bin", - "model.layers.20.mlp.experts.4.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.20.mlp.experts.4.gate_proj.weight": "pytorch_model-00047-of-00080.bin", - "model.layers.20.mlp.experts.4.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.20.mlp.experts.4.up_proj.weight": "pytorch_model-00047-of-00080.bin", - "model.layers.20.mlp.experts.5.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.20.mlp.experts.5.down_proj.weight": "pytorch_model-00047-of-00080.bin", - "model.layers.20.mlp.experts.5.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.20.mlp.experts.5.gate_proj.weight": "pytorch_model-00047-of-00080.bin", - "model.layers.20.mlp.experts.5.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.20.mlp.experts.5.up_proj.weight": "pytorch_model-00047-of-00080.bin", - "model.layers.20.mlp.experts.6.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.20.mlp.experts.6.down_proj.weight": "pytorch_model-00047-of-00080.bin", - "model.layers.20.mlp.experts.6.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.20.mlp.experts.6.gate_proj.weight": "pytorch_model-00047-of-00080.bin", - "model.layers.20.mlp.experts.6.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.20.mlp.experts.6.up_proj.weight": "pytorch_model-00047-of-00080.bin", - "model.layers.20.mlp.experts.7.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.20.mlp.experts.7.down_proj.weight": "pytorch_model-00047-of-00080.bin", - "model.layers.20.mlp.experts.7.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.20.mlp.experts.7.gate_proj.weight": "pytorch_model-00047-of-00080.bin", - "model.layers.20.mlp.experts.7.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.20.mlp.experts.7.up_proj.weight": "pytorch_model-00047-of-00080.bin", - "model.layers.20.mlp.experts.8.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.20.mlp.experts.8.down_proj.weight": "pytorch_model-00047-of-00080.bin", - "model.layers.20.mlp.experts.8.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.20.mlp.experts.8.gate_proj.weight": "pytorch_model-00047-of-00080.bin", - "model.layers.20.mlp.experts.8.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.20.mlp.experts.8.up_proj.weight": "pytorch_model-00047-of-00080.bin", - "model.layers.20.mlp.experts.9.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.20.mlp.experts.9.down_proj.weight": "pytorch_model-00047-of-00080.bin", - "model.layers.20.mlp.experts.9.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.20.mlp.experts.9.gate_proj.weight": "pytorch_model-00047-of-00080.bin", - "model.layers.20.mlp.experts.9.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.20.mlp.experts.9.up_proj.weight": "pytorch_model-00047-of-00080.bin", - "model.layers.20.mlp.gate.wg.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.20.mlp.shared_mlp.down_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.20.mlp.shared_mlp.down_proj.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.20.mlp.shared_mlp.gate_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.20.mlp.shared_mlp.gate_proj.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.20.mlp.shared_mlp.up_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.20.mlp.shared_mlp.up_proj.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.20.post_attention_layernorm.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.20.self_attn.k_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.20.self_attn.k_proj.weight": "pytorch_model-00060-of-00080.bin", - "model.layers.20.self_attn.key_layernorm.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.20.self_attn.o_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.20.self_attn.o_proj.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.20.self_attn.q_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.20.self_attn.q_proj.weight": "pytorch_model-00060-of-00080.bin", - "model.layers.20.self_attn.query_layernorm.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.20.self_attn.v_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.20.self_attn.v_proj.weight": "pytorch_model-00060-of-00080.bin", - "model.layers.21.input_layernorm.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.21.mlp.experts.0.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.21.mlp.experts.0.down_proj.weight": "pytorch_model-00048-of-00080.bin", - "model.layers.21.mlp.experts.0.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.21.mlp.experts.0.gate_proj.weight": "pytorch_model-00048-of-00080.bin", - "model.layers.21.mlp.experts.0.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.21.mlp.experts.0.up_proj.weight": "pytorch_model-00048-of-00080.bin", - "model.layers.21.mlp.experts.1.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.21.mlp.experts.1.down_proj.weight": "pytorch_model-00048-of-00080.bin", - "model.layers.21.mlp.experts.1.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.21.mlp.experts.1.gate_proj.weight": "pytorch_model-00048-of-00080.bin", - "model.layers.21.mlp.experts.1.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.21.mlp.experts.1.up_proj.weight": "pytorch_model-00048-of-00080.bin", - "model.layers.21.mlp.experts.10.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.21.mlp.experts.10.down_proj.weight": "pytorch_model-00048-of-00080.bin", - "model.layers.21.mlp.experts.10.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.21.mlp.experts.10.gate_proj.weight": "pytorch_model-00048-of-00080.bin", - "model.layers.21.mlp.experts.10.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.21.mlp.experts.10.up_proj.weight": "pytorch_model-00048-of-00080.bin", - "model.layers.21.mlp.experts.11.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.21.mlp.experts.11.down_proj.weight": "pytorch_model-00048-of-00080.bin", - "model.layers.21.mlp.experts.11.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.21.mlp.experts.11.gate_proj.weight": "pytorch_model-00048-of-00080.bin", - "model.layers.21.mlp.experts.11.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.21.mlp.experts.11.up_proj.weight": "pytorch_model-00048-of-00080.bin", - "model.layers.21.mlp.experts.12.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.21.mlp.experts.12.down_proj.weight": "pytorch_model-00048-of-00080.bin", - "model.layers.21.mlp.experts.12.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.21.mlp.experts.12.gate_proj.weight": "pytorch_model-00048-of-00080.bin", - "model.layers.21.mlp.experts.12.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.21.mlp.experts.12.up_proj.weight": "pytorch_model-00048-of-00080.bin", - "model.layers.21.mlp.experts.13.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.21.mlp.experts.13.down_proj.weight": "pytorch_model-00049-of-00080.bin", - "model.layers.21.mlp.experts.13.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.21.mlp.experts.13.gate_proj.weight": "pytorch_model-00049-of-00080.bin", - "model.layers.21.mlp.experts.13.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.21.mlp.experts.13.up_proj.weight": "pytorch_model-00048-of-00080.bin", - "model.layers.21.mlp.experts.14.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.21.mlp.experts.14.down_proj.weight": "pytorch_model-00049-of-00080.bin", - "model.layers.21.mlp.experts.14.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.21.mlp.experts.14.gate_proj.weight": "pytorch_model-00049-of-00080.bin", - "model.layers.21.mlp.experts.14.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.21.mlp.experts.14.up_proj.weight": "pytorch_model-00049-of-00080.bin", - "model.layers.21.mlp.experts.15.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.21.mlp.experts.15.down_proj.weight": "pytorch_model-00049-of-00080.bin", - "model.layers.21.mlp.experts.15.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.21.mlp.experts.15.gate_proj.weight": "pytorch_model-00049-of-00080.bin", - "model.layers.21.mlp.experts.15.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.21.mlp.experts.15.up_proj.weight": "pytorch_model-00049-of-00080.bin", - "model.layers.21.mlp.experts.2.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.21.mlp.experts.2.down_proj.weight": "pytorch_model-00048-of-00080.bin", - "model.layers.21.mlp.experts.2.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.21.mlp.experts.2.gate_proj.weight": "pytorch_model-00048-of-00080.bin", - "model.layers.21.mlp.experts.2.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.21.mlp.experts.2.up_proj.weight": "pytorch_model-00048-of-00080.bin", - "model.layers.21.mlp.experts.3.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.21.mlp.experts.3.down_proj.weight": "pytorch_model-00048-of-00080.bin", - "model.layers.21.mlp.experts.3.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.21.mlp.experts.3.gate_proj.weight": "pytorch_model-00048-of-00080.bin", - "model.layers.21.mlp.experts.3.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.21.mlp.experts.3.up_proj.weight": "pytorch_model-00048-of-00080.bin", - "model.layers.21.mlp.experts.4.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.21.mlp.experts.4.down_proj.weight": "pytorch_model-00048-of-00080.bin", - "model.layers.21.mlp.experts.4.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.21.mlp.experts.4.gate_proj.weight": "pytorch_model-00048-of-00080.bin", - "model.layers.21.mlp.experts.4.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.21.mlp.experts.4.up_proj.weight": "pytorch_model-00048-of-00080.bin", - "model.layers.21.mlp.experts.5.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.21.mlp.experts.5.down_proj.weight": "pytorch_model-00048-of-00080.bin", - "model.layers.21.mlp.experts.5.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.21.mlp.experts.5.gate_proj.weight": "pytorch_model-00048-of-00080.bin", - "model.layers.21.mlp.experts.5.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.21.mlp.experts.5.up_proj.weight": "pytorch_model-00048-of-00080.bin", - "model.layers.21.mlp.experts.6.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.21.mlp.experts.6.down_proj.weight": "pytorch_model-00048-of-00080.bin", - "model.layers.21.mlp.experts.6.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.21.mlp.experts.6.gate_proj.weight": "pytorch_model-00048-of-00080.bin", - "model.layers.21.mlp.experts.6.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.21.mlp.experts.6.up_proj.weight": "pytorch_model-00048-of-00080.bin", - "model.layers.21.mlp.experts.7.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.21.mlp.experts.7.down_proj.weight": "pytorch_model-00048-of-00080.bin", - "model.layers.21.mlp.experts.7.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.21.mlp.experts.7.gate_proj.weight": "pytorch_model-00048-of-00080.bin", - "model.layers.21.mlp.experts.7.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.21.mlp.experts.7.up_proj.weight": "pytorch_model-00048-of-00080.bin", - "model.layers.21.mlp.experts.8.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.21.mlp.experts.8.down_proj.weight": "pytorch_model-00048-of-00080.bin", - "model.layers.21.mlp.experts.8.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.21.mlp.experts.8.gate_proj.weight": "pytorch_model-00048-of-00080.bin", - "model.layers.21.mlp.experts.8.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.21.mlp.experts.8.up_proj.weight": "pytorch_model-00048-of-00080.bin", - "model.layers.21.mlp.experts.9.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.21.mlp.experts.9.down_proj.weight": "pytorch_model-00048-of-00080.bin", - "model.layers.21.mlp.experts.9.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.21.mlp.experts.9.gate_proj.weight": "pytorch_model-00048-of-00080.bin", - "model.layers.21.mlp.experts.9.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.21.mlp.experts.9.up_proj.weight": "pytorch_model-00048-of-00080.bin", - "model.layers.21.mlp.gate.wg.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.21.mlp.shared_mlp.down_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.21.mlp.shared_mlp.down_proj.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.21.mlp.shared_mlp.gate_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.21.mlp.shared_mlp.gate_proj.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.21.mlp.shared_mlp.up_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.21.mlp.shared_mlp.up_proj.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.21.post_attention_layernorm.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.21.self_attn.key_layernorm.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.21.self_attn.o_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.21.self_attn.o_proj.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.21.self_attn.q_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.21.self_attn.q_proj.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.21.self_attn.query_layernorm.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.22.input_layernorm.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.22.mlp.experts.0.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.22.mlp.experts.0.down_proj.weight": "pytorch_model-00067-of-00080.bin", - "model.layers.22.mlp.experts.0.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.22.mlp.experts.0.gate_proj.weight": "pytorch_model-00067-of-00080.bin", - "model.layers.22.mlp.experts.0.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.22.mlp.experts.0.up_proj.weight": "pytorch_model-00067-of-00080.bin", - "model.layers.22.mlp.experts.1.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.22.mlp.experts.1.down_proj.weight": "pytorch_model-00067-of-00080.bin", - "model.layers.22.mlp.experts.1.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.22.mlp.experts.1.gate_proj.weight": "pytorch_model-00067-of-00080.bin", - "model.layers.22.mlp.experts.1.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.22.mlp.experts.1.up_proj.weight": "pytorch_model-00067-of-00080.bin", - "model.layers.22.mlp.experts.10.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.22.mlp.experts.10.down_proj.weight": "pytorch_model-00067-of-00080.bin", - "model.layers.22.mlp.experts.10.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.22.mlp.experts.10.gate_proj.weight": "pytorch_model-00067-of-00080.bin", - "model.layers.22.mlp.experts.10.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.22.mlp.experts.10.up_proj.weight": "pytorch_model-00067-of-00080.bin", - "model.layers.22.mlp.experts.11.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.22.mlp.experts.11.down_proj.weight": "pytorch_model-00067-of-00080.bin", - "model.layers.22.mlp.experts.11.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.22.mlp.experts.11.gate_proj.weight": "pytorch_model-00067-of-00080.bin", - "model.layers.22.mlp.experts.11.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.22.mlp.experts.11.up_proj.weight": "pytorch_model-00067-of-00080.bin", - "model.layers.22.mlp.experts.12.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.22.mlp.experts.12.down_proj.weight": "pytorch_model-00067-of-00080.bin", - "model.layers.22.mlp.experts.12.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.22.mlp.experts.12.gate_proj.weight": "pytorch_model-00067-of-00080.bin", - "model.layers.22.mlp.experts.12.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.22.mlp.experts.12.up_proj.weight": "pytorch_model-00067-of-00080.bin", - "model.layers.22.mlp.experts.13.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.22.mlp.experts.13.down_proj.weight": "pytorch_model-00068-of-00080.bin", - "model.layers.22.mlp.experts.13.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.22.mlp.experts.13.gate_proj.weight": "pytorch_model-00068-of-00080.bin", - "model.layers.22.mlp.experts.13.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.22.mlp.experts.13.up_proj.weight": "pytorch_model-00068-of-00080.bin", - "model.layers.22.mlp.experts.14.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.22.mlp.experts.14.down_proj.weight": "pytorch_model-00068-of-00080.bin", - "model.layers.22.mlp.experts.14.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.22.mlp.experts.14.gate_proj.weight": "pytorch_model-00068-of-00080.bin", - "model.layers.22.mlp.experts.14.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.22.mlp.experts.14.up_proj.weight": "pytorch_model-00068-of-00080.bin", - "model.layers.22.mlp.experts.15.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.22.mlp.experts.15.down_proj.weight": "pytorch_model-00068-of-00080.bin", - "model.layers.22.mlp.experts.15.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.22.mlp.experts.15.gate_proj.weight": "pytorch_model-00068-of-00080.bin", - "model.layers.22.mlp.experts.15.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.22.mlp.experts.15.up_proj.weight": "pytorch_model-00068-of-00080.bin", - "model.layers.22.mlp.experts.2.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.22.mlp.experts.2.down_proj.weight": "pytorch_model-00067-of-00080.bin", - "model.layers.22.mlp.experts.2.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.22.mlp.experts.2.gate_proj.weight": "pytorch_model-00067-of-00080.bin", - "model.layers.22.mlp.experts.2.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.22.mlp.experts.2.up_proj.weight": "pytorch_model-00067-of-00080.bin", - "model.layers.22.mlp.experts.3.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.22.mlp.experts.3.down_proj.weight": "pytorch_model-00067-of-00080.bin", - "model.layers.22.mlp.experts.3.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.22.mlp.experts.3.gate_proj.weight": "pytorch_model-00067-of-00080.bin", - "model.layers.22.mlp.experts.3.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.22.mlp.experts.3.up_proj.weight": "pytorch_model-00067-of-00080.bin", - "model.layers.22.mlp.experts.4.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.22.mlp.experts.4.down_proj.weight": "pytorch_model-00067-of-00080.bin", - "model.layers.22.mlp.experts.4.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.22.mlp.experts.4.gate_proj.weight": "pytorch_model-00067-of-00080.bin", - "model.layers.22.mlp.experts.4.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.22.mlp.experts.4.up_proj.weight": "pytorch_model-00067-of-00080.bin", - "model.layers.22.mlp.experts.5.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.22.mlp.experts.5.down_proj.weight": "pytorch_model-00067-of-00080.bin", - "model.layers.22.mlp.experts.5.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.22.mlp.experts.5.gate_proj.weight": "pytorch_model-00067-of-00080.bin", - "model.layers.22.mlp.experts.5.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.22.mlp.experts.5.up_proj.weight": "pytorch_model-00067-of-00080.bin", - "model.layers.22.mlp.experts.6.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.22.mlp.experts.6.down_proj.weight": "pytorch_model-00067-of-00080.bin", - "model.layers.22.mlp.experts.6.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.22.mlp.experts.6.gate_proj.weight": "pytorch_model-00067-of-00080.bin", - "model.layers.22.mlp.experts.6.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.22.mlp.experts.6.up_proj.weight": "pytorch_model-00067-of-00080.bin", - "model.layers.22.mlp.experts.7.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.22.mlp.experts.7.down_proj.weight": "pytorch_model-00067-of-00080.bin", - "model.layers.22.mlp.experts.7.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.22.mlp.experts.7.gate_proj.weight": "pytorch_model-00067-of-00080.bin", - "model.layers.22.mlp.experts.7.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.22.mlp.experts.7.up_proj.weight": "pytorch_model-00067-of-00080.bin", - "model.layers.22.mlp.experts.8.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.22.mlp.experts.8.down_proj.weight": "pytorch_model-00067-of-00080.bin", - "model.layers.22.mlp.experts.8.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.22.mlp.experts.8.gate_proj.weight": "pytorch_model-00067-of-00080.bin", - "model.layers.22.mlp.experts.8.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.22.mlp.experts.8.up_proj.weight": "pytorch_model-00067-of-00080.bin", - "model.layers.22.mlp.experts.9.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.22.mlp.experts.9.down_proj.weight": "pytorch_model-00067-of-00080.bin", - "model.layers.22.mlp.experts.9.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.22.mlp.experts.9.gate_proj.weight": "pytorch_model-00067-of-00080.bin", - "model.layers.22.mlp.experts.9.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.22.mlp.experts.9.up_proj.weight": "pytorch_model-00067-of-00080.bin", - "model.layers.22.mlp.gate.wg.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.22.mlp.shared_mlp.down_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.22.mlp.shared_mlp.down_proj.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.22.mlp.shared_mlp.gate_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.22.mlp.shared_mlp.gate_proj.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.22.mlp.shared_mlp.up_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.22.mlp.shared_mlp.up_proj.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.22.post_attention_layernorm.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.22.self_attn.k_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.22.self_attn.k_proj.weight": "pytorch_model-00080-of-00080.bin", - "model.layers.22.self_attn.key_layernorm.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.22.self_attn.o_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.22.self_attn.o_proj.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.22.self_attn.q_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.22.self_attn.q_proj.weight": "pytorch_model-00080-of-00080.bin", - "model.layers.22.self_attn.query_layernorm.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.22.self_attn.v_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.22.self_attn.v_proj.weight": "pytorch_model-00080-of-00080.bin", - "model.layers.23.input_layernorm.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.23.mlp.experts.0.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.23.mlp.experts.0.down_proj.weight": "pytorch_model-00068-of-00080.bin", - "model.layers.23.mlp.experts.0.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.23.mlp.experts.0.gate_proj.weight": "pytorch_model-00068-of-00080.bin", - "model.layers.23.mlp.experts.0.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.23.mlp.experts.0.up_proj.weight": "pytorch_model-00068-of-00080.bin", - "model.layers.23.mlp.experts.1.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.23.mlp.experts.1.down_proj.weight": "pytorch_model-00068-of-00080.bin", - "model.layers.23.mlp.experts.1.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.23.mlp.experts.1.gate_proj.weight": "pytorch_model-00068-of-00080.bin", - "model.layers.23.mlp.experts.1.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.23.mlp.experts.1.up_proj.weight": "pytorch_model-00068-of-00080.bin", - "model.layers.23.mlp.experts.10.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.23.mlp.experts.10.down_proj.weight": "pytorch_model-00068-of-00080.bin", - "model.layers.23.mlp.experts.10.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.23.mlp.experts.10.gate_proj.weight": "pytorch_model-00068-of-00080.bin", - "model.layers.23.mlp.experts.10.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.23.mlp.experts.10.up_proj.weight": "pytorch_model-00068-of-00080.bin", - "model.layers.23.mlp.experts.11.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.23.mlp.experts.11.down_proj.weight": "pytorch_model-00069-of-00080.bin", - "model.layers.23.mlp.experts.11.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.23.mlp.experts.11.gate_proj.weight": "pytorch_model-00069-of-00080.bin", - "model.layers.23.mlp.experts.11.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.23.mlp.experts.11.up_proj.weight": "pytorch_model-00069-of-00080.bin", - "model.layers.23.mlp.experts.12.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.23.mlp.experts.12.down_proj.weight": "pytorch_model-00069-of-00080.bin", - "model.layers.23.mlp.experts.12.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.23.mlp.experts.12.gate_proj.weight": "pytorch_model-00069-of-00080.bin", - "model.layers.23.mlp.experts.12.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.23.mlp.experts.12.up_proj.weight": "pytorch_model-00069-of-00080.bin", - "model.layers.23.mlp.experts.13.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.23.mlp.experts.13.down_proj.weight": "pytorch_model-00069-of-00080.bin", - "model.layers.23.mlp.experts.13.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.23.mlp.experts.13.gate_proj.weight": "pytorch_model-00069-of-00080.bin", - "model.layers.23.mlp.experts.13.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.23.mlp.experts.13.up_proj.weight": "pytorch_model-00069-of-00080.bin", - "model.layers.23.mlp.experts.14.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.23.mlp.experts.14.down_proj.weight": "pytorch_model-00069-of-00080.bin", - "model.layers.23.mlp.experts.14.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.23.mlp.experts.14.gate_proj.weight": "pytorch_model-00069-of-00080.bin", - "model.layers.23.mlp.experts.14.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.23.mlp.experts.14.up_proj.weight": "pytorch_model-00069-of-00080.bin", - "model.layers.23.mlp.experts.15.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.23.mlp.experts.15.down_proj.weight": "pytorch_model-00069-of-00080.bin", - "model.layers.23.mlp.experts.15.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.23.mlp.experts.15.gate_proj.weight": "pytorch_model-00069-of-00080.bin", - "model.layers.23.mlp.experts.15.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.23.mlp.experts.15.up_proj.weight": "pytorch_model-00069-of-00080.bin", - "model.layers.23.mlp.experts.2.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.23.mlp.experts.2.down_proj.weight": "pytorch_model-00068-of-00080.bin", - "model.layers.23.mlp.experts.2.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.23.mlp.experts.2.gate_proj.weight": "pytorch_model-00068-of-00080.bin", - "model.layers.23.mlp.experts.2.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.23.mlp.experts.2.up_proj.weight": "pytorch_model-00068-of-00080.bin", - "model.layers.23.mlp.experts.3.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.23.mlp.experts.3.down_proj.weight": "pytorch_model-00068-of-00080.bin", - "model.layers.23.mlp.experts.3.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.23.mlp.experts.3.gate_proj.weight": "pytorch_model-00068-of-00080.bin", - "model.layers.23.mlp.experts.3.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.23.mlp.experts.3.up_proj.weight": "pytorch_model-00068-of-00080.bin", - "model.layers.23.mlp.experts.4.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.23.mlp.experts.4.down_proj.weight": "pytorch_model-00068-of-00080.bin", - "model.layers.23.mlp.experts.4.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.23.mlp.experts.4.gate_proj.weight": "pytorch_model-00068-of-00080.bin", - "model.layers.23.mlp.experts.4.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.23.mlp.experts.4.up_proj.weight": "pytorch_model-00068-of-00080.bin", - "model.layers.23.mlp.experts.5.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.23.mlp.experts.5.down_proj.weight": "pytorch_model-00068-of-00080.bin", - "model.layers.23.mlp.experts.5.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.23.mlp.experts.5.gate_proj.weight": "pytorch_model-00068-of-00080.bin", - "model.layers.23.mlp.experts.5.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.23.mlp.experts.5.up_proj.weight": "pytorch_model-00068-of-00080.bin", - "model.layers.23.mlp.experts.6.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.23.mlp.experts.6.down_proj.weight": "pytorch_model-00068-of-00080.bin", - "model.layers.23.mlp.experts.6.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.23.mlp.experts.6.gate_proj.weight": "pytorch_model-00068-of-00080.bin", - "model.layers.23.mlp.experts.6.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.23.mlp.experts.6.up_proj.weight": "pytorch_model-00068-of-00080.bin", - "model.layers.23.mlp.experts.7.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.23.mlp.experts.7.down_proj.weight": "pytorch_model-00068-of-00080.bin", - "model.layers.23.mlp.experts.7.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.23.mlp.experts.7.gate_proj.weight": "pytorch_model-00068-of-00080.bin", - "model.layers.23.mlp.experts.7.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.23.mlp.experts.7.up_proj.weight": "pytorch_model-00068-of-00080.bin", - "model.layers.23.mlp.experts.8.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.23.mlp.experts.8.down_proj.weight": "pytorch_model-00068-of-00080.bin", - "model.layers.23.mlp.experts.8.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.23.mlp.experts.8.gate_proj.weight": "pytorch_model-00068-of-00080.bin", - "model.layers.23.mlp.experts.8.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.23.mlp.experts.8.up_proj.weight": "pytorch_model-00068-of-00080.bin", - "model.layers.23.mlp.experts.9.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.23.mlp.experts.9.down_proj.weight": "pytorch_model-00068-of-00080.bin", - "model.layers.23.mlp.experts.9.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.23.mlp.experts.9.gate_proj.weight": "pytorch_model-00068-of-00080.bin", - "model.layers.23.mlp.experts.9.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.23.mlp.experts.9.up_proj.weight": "pytorch_model-00068-of-00080.bin", - "model.layers.23.mlp.gate.wg.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.23.mlp.shared_mlp.down_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.23.mlp.shared_mlp.down_proj.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.23.mlp.shared_mlp.gate_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.23.mlp.shared_mlp.gate_proj.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.23.mlp.shared_mlp.up_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.23.mlp.shared_mlp.up_proj.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.23.post_attention_layernorm.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.23.self_attn.key_layernorm.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.23.self_attn.o_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.23.self_attn.o_proj.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.23.self_attn.q_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.23.self_attn.q_proj.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.23.self_attn.query_layernorm.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.24.input_layernorm.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.24.mlp.experts.0.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.24.mlp.experts.0.down_proj.weight": "pytorch_model-00009-of-00080.bin", - "model.layers.24.mlp.experts.0.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.24.mlp.experts.0.gate_proj.weight": "pytorch_model-00009-of-00080.bin", - "model.layers.24.mlp.experts.0.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.24.mlp.experts.0.up_proj.weight": "pytorch_model-00009-of-00080.bin", - "model.layers.24.mlp.experts.1.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.24.mlp.experts.1.down_proj.weight": "pytorch_model-00009-of-00080.bin", - "model.layers.24.mlp.experts.1.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.24.mlp.experts.1.gate_proj.weight": "pytorch_model-00009-of-00080.bin", - "model.layers.24.mlp.experts.1.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.24.mlp.experts.1.up_proj.weight": "pytorch_model-00009-of-00080.bin", - "model.layers.24.mlp.experts.10.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.24.mlp.experts.10.down_proj.weight": "pytorch_model-00010-of-00080.bin", - "model.layers.24.mlp.experts.10.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.24.mlp.experts.10.gate_proj.weight": "pytorch_model-00010-of-00080.bin", - "model.layers.24.mlp.experts.10.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.24.mlp.experts.10.up_proj.weight": "pytorch_model-00010-of-00080.bin", - "model.layers.24.mlp.experts.11.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.24.mlp.experts.11.down_proj.weight": "pytorch_model-00010-of-00080.bin", - "model.layers.24.mlp.experts.11.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.24.mlp.experts.11.gate_proj.weight": "pytorch_model-00010-of-00080.bin", - "model.layers.24.mlp.experts.11.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.24.mlp.experts.11.up_proj.weight": "pytorch_model-00010-of-00080.bin", - "model.layers.24.mlp.experts.12.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.24.mlp.experts.12.down_proj.weight": "pytorch_model-00010-of-00080.bin", - "model.layers.24.mlp.experts.12.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.24.mlp.experts.12.gate_proj.weight": "pytorch_model-00010-of-00080.bin", - "model.layers.24.mlp.experts.12.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.24.mlp.experts.12.up_proj.weight": "pytorch_model-00010-of-00080.bin", - "model.layers.24.mlp.experts.13.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.24.mlp.experts.13.down_proj.weight": "pytorch_model-00010-of-00080.bin", - "model.layers.24.mlp.experts.13.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.24.mlp.experts.13.gate_proj.weight": "pytorch_model-00010-of-00080.bin", - "model.layers.24.mlp.experts.13.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.24.mlp.experts.13.up_proj.weight": "pytorch_model-00010-of-00080.bin", - "model.layers.24.mlp.experts.14.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.24.mlp.experts.14.down_proj.weight": "pytorch_model-00010-of-00080.bin", - "model.layers.24.mlp.experts.14.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.24.mlp.experts.14.gate_proj.weight": "pytorch_model-00010-of-00080.bin", - "model.layers.24.mlp.experts.14.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.24.mlp.experts.14.up_proj.weight": "pytorch_model-00010-of-00080.bin", - "model.layers.24.mlp.experts.15.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.24.mlp.experts.15.down_proj.weight": "pytorch_model-00010-of-00080.bin", - "model.layers.24.mlp.experts.15.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.24.mlp.experts.15.gate_proj.weight": "pytorch_model-00010-of-00080.bin", - "model.layers.24.mlp.experts.15.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.24.mlp.experts.15.up_proj.weight": "pytorch_model-00010-of-00080.bin", - "model.layers.24.mlp.experts.2.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.24.mlp.experts.2.down_proj.weight": "pytorch_model-00009-of-00080.bin", - "model.layers.24.mlp.experts.2.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.24.mlp.experts.2.gate_proj.weight": "pytorch_model-00009-of-00080.bin", - "model.layers.24.mlp.experts.2.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.24.mlp.experts.2.up_proj.weight": "pytorch_model-00009-of-00080.bin", - "model.layers.24.mlp.experts.3.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.24.mlp.experts.3.down_proj.weight": "pytorch_model-00009-of-00080.bin", - "model.layers.24.mlp.experts.3.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.24.mlp.experts.3.gate_proj.weight": "pytorch_model-00009-of-00080.bin", - "model.layers.24.mlp.experts.3.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.24.mlp.experts.3.up_proj.weight": "pytorch_model-00009-of-00080.bin", - "model.layers.24.mlp.experts.4.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.24.mlp.experts.4.down_proj.weight": "pytorch_model-00009-of-00080.bin", - "model.layers.24.mlp.experts.4.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.24.mlp.experts.4.gate_proj.weight": "pytorch_model-00009-of-00080.bin", - "model.layers.24.mlp.experts.4.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.24.mlp.experts.4.up_proj.weight": "pytorch_model-00009-of-00080.bin", - "model.layers.24.mlp.experts.5.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.24.mlp.experts.5.down_proj.weight": "pytorch_model-00009-of-00080.bin", - "model.layers.24.mlp.experts.5.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.24.mlp.experts.5.gate_proj.weight": "pytorch_model-00009-of-00080.bin", - "model.layers.24.mlp.experts.5.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.24.mlp.experts.5.up_proj.weight": "pytorch_model-00009-of-00080.bin", - "model.layers.24.mlp.experts.6.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.24.mlp.experts.6.down_proj.weight": "pytorch_model-00009-of-00080.bin", - "model.layers.24.mlp.experts.6.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.24.mlp.experts.6.gate_proj.weight": "pytorch_model-00009-of-00080.bin", - "model.layers.24.mlp.experts.6.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.24.mlp.experts.6.up_proj.weight": "pytorch_model-00009-of-00080.bin", - "model.layers.24.mlp.experts.7.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.24.mlp.experts.7.down_proj.weight": "pytorch_model-00009-of-00080.bin", - "model.layers.24.mlp.experts.7.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.24.mlp.experts.7.gate_proj.weight": "pytorch_model-00009-of-00080.bin", - "model.layers.24.mlp.experts.7.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.24.mlp.experts.7.up_proj.weight": "pytorch_model-00009-of-00080.bin", - "model.layers.24.mlp.experts.8.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.24.mlp.experts.8.down_proj.weight": "pytorch_model-00009-of-00080.bin", - "model.layers.24.mlp.experts.8.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.24.mlp.experts.8.gate_proj.weight": "pytorch_model-00009-of-00080.bin", - "model.layers.24.mlp.experts.8.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.24.mlp.experts.8.up_proj.weight": "pytorch_model-00009-of-00080.bin", - "model.layers.24.mlp.experts.9.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.24.mlp.experts.9.down_proj.weight": "pytorch_model-00010-of-00080.bin", - "model.layers.24.mlp.experts.9.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.24.mlp.experts.9.gate_proj.weight": "pytorch_model-00010-of-00080.bin", - "model.layers.24.mlp.experts.9.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.24.mlp.experts.9.up_proj.weight": "pytorch_model-00010-of-00080.bin", - "model.layers.24.mlp.gate.wg.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.24.mlp.shared_mlp.down_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.24.mlp.shared_mlp.down_proj.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.24.mlp.shared_mlp.gate_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.24.mlp.shared_mlp.gate_proj.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.24.mlp.shared_mlp.up_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.24.mlp.shared_mlp.up_proj.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.24.post_attention_layernorm.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.24.self_attn.k_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.24.self_attn.k_proj.weight": "pytorch_model-00020-of-00080.bin", - "model.layers.24.self_attn.key_layernorm.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.24.self_attn.o_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.24.self_attn.o_proj.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.24.self_attn.q_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.24.self_attn.q_proj.weight": "pytorch_model-00020-of-00080.bin", - "model.layers.24.self_attn.query_layernorm.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.24.self_attn.v_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.24.self_attn.v_proj.weight": "pytorch_model-00020-of-00080.bin", - "model.layers.25.input_layernorm.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.25.mlp.experts.0.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.25.mlp.experts.0.down_proj.weight": "pytorch_model-00010-of-00080.bin", - "model.layers.25.mlp.experts.0.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.25.mlp.experts.0.gate_proj.weight": "pytorch_model-00010-of-00080.bin", - "model.layers.25.mlp.experts.0.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.25.mlp.experts.0.up_proj.weight": "pytorch_model-00010-of-00080.bin", - "model.layers.25.mlp.experts.1.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.25.mlp.experts.1.down_proj.weight": "pytorch_model-00010-of-00080.bin", - "model.layers.25.mlp.experts.1.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.25.mlp.experts.1.gate_proj.weight": "pytorch_model-00010-of-00080.bin", - "model.layers.25.mlp.experts.1.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.25.mlp.experts.1.up_proj.weight": "pytorch_model-00010-of-00080.bin", - "model.layers.25.mlp.experts.10.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.25.mlp.experts.10.down_proj.weight": "pytorch_model-00011-of-00080.bin", - "model.layers.25.mlp.experts.10.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.25.mlp.experts.10.gate_proj.weight": "pytorch_model-00011-of-00080.bin", - "model.layers.25.mlp.experts.10.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.25.mlp.experts.10.up_proj.weight": "pytorch_model-00011-of-00080.bin", - "model.layers.25.mlp.experts.11.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.25.mlp.experts.11.down_proj.weight": "pytorch_model-00011-of-00080.bin", - "model.layers.25.mlp.experts.11.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.25.mlp.experts.11.gate_proj.weight": "pytorch_model-00011-of-00080.bin", - "model.layers.25.mlp.experts.11.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.25.mlp.experts.11.up_proj.weight": "pytorch_model-00011-of-00080.bin", - "model.layers.25.mlp.experts.12.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.25.mlp.experts.12.down_proj.weight": "pytorch_model-00011-of-00080.bin", - "model.layers.25.mlp.experts.12.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.25.mlp.experts.12.gate_proj.weight": "pytorch_model-00011-of-00080.bin", - "model.layers.25.mlp.experts.12.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.25.mlp.experts.12.up_proj.weight": "pytorch_model-00011-of-00080.bin", - "model.layers.25.mlp.experts.13.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.25.mlp.experts.13.down_proj.weight": "pytorch_model-00011-of-00080.bin", - "model.layers.25.mlp.experts.13.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.25.mlp.experts.13.gate_proj.weight": "pytorch_model-00011-of-00080.bin", - "model.layers.25.mlp.experts.13.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.25.mlp.experts.13.up_proj.weight": "pytorch_model-00011-of-00080.bin", - "model.layers.25.mlp.experts.14.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.25.mlp.experts.14.down_proj.weight": "pytorch_model-00011-of-00080.bin", - "model.layers.25.mlp.experts.14.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.25.mlp.experts.14.gate_proj.weight": "pytorch_model-00011-of-00080.bin", - "model.layers.25.mlp.experts.14.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.25.mlp.experts.14.up_proj.weight": "pytorch_model-00011-of-00080.bin", - "model.layers.25.mlp.experts.15.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.25.mlp.experts.15.down_proj.weight": "pytorch_model-00011-of-00080.bin", - "model.layers.25.mlp.experts.15.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.25.mlp.experts.15.gate_proj.weight": "pytorch_model-00011-of-00080.bin", - "model.layers.25.mlp.experts.15.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.25.mlp.experts.15.up_proj.weight": "pytorch_model-00011-of-00080.bin", - "model.layers.25.mlp.experts.2.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.25.mlp.experts.2.down_proj.weight": "pytorch_model-00010-of-00080.bin", - "model.layers.25.mlp.experts.2.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.25.mlp.experts.2.gate_proj.weight": "pytorch_model-00010-of-00080.bin", - "model.layers.25.mlp.experts.2.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.25.mlp.experts.2.up_proj.weight": "pytorch_model-00010-of-00080.bin", - "model.layers.25.mlp.experts.3.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.25.mlp.experts.3.down_proj.weight": "pytorch_model-00010-of-00080.bin", - "model.layers.25.mlp.experts.3.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.25.mlp.experts.3.gate_proj.weight": "pytorch_model-00010-of-00080.bin", - "model.layers.25.mlp.experts.3.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.25.mlp.experts.3.up_proj.weight": "pytorch_model-00010-of-00080.bin", - "model.layers.25.mlp.experts.4.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.25.mlp.experts.4.down_proj.weight": "pytorch_model-00010-of-00080.bin", - "model.layers.25.mlp.experts.4.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.25.mlp.experts.4.gate_proj.weight": "pytorch_model-00010-of-00080.bin", - "model.layers.25.mlp.experts.4.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.25.mlp.experts.4.up_proj.weight": "pytorch_model-00010-of-00080.bin", - "model.layers.25.mlp.experts.5.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.25.mlp.experts.5.down_proj.weight": "pytorch_model-00010-of-00080.bin", - "model.layers.25.mlp.experts.5.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.25.mlp.experts.5.gate_proj.weight": "pytorch_model-00010-of-00080.bin", - "model.layers.25.mlp.experts.5.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.25.mlp.experts.5.up_proj.weight": "pytorch_model-00010-of-00080.bin", - "model.layers.25.mlp.experts.6.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.25.mlp.experts.6.down_proj.weight": "pytorch_model-00010-of-00080.bin", - "model.layers.25.mlp.experts.6.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.25.mlp.experts.6.gate_proj.weight": "pytorch_model-00010-of-00080.bin", - "model.layers.25.mlp.experts.6.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.25.mlp.experts.6.up_proj.weight": "pytorch_model-00010-of-00080.bin", - "model.layers.25.mlp.experts.7.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.25.mlp.experts.7.down_proj.weight": "pytorch_model-00011-of-00080.bin", - "model.layers.25.mlp.experts.7.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.25.mlp.experts.7.gate_proj.weight": "pytorch_model-00011-of-00080.bin", - "model.layers.25.mlp.experts.7.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.25.mlp.experts.7.up_proj.weight": "pytorch_model-00011-of-00080.bin", - "model.layers.25.mlp.experts.8.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.25.mlp.experts.8.down_proj.weight": "pytorch_model-00011-of-00080.bin", - "model.layers.25.mlp.experts.8.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.25.mlp.experts.8.gate_proj.weight": "pytorch_model-00011-of-00080.bin", - "model.layers.25.mlp.experts.8.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.25.mlp.experts.8.up_proj.weight": "pytorch_model-00011-of-00080.bin", - "model.layers.25.mlp.experts.9.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.25.mlp.experts.9.down_proj.weight": "pytorch_model-00011-of-00080.bin", - "model.layers.25.mlp.experts.9.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.25.mlp.experts.9.gate_proj.weight": "pytorch_model-00011-of-00080.bin", - "model.layers.25.mlp.experts.9.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.25.mlp.experts.9.up_proj.weight": "pytorch_model-00011-of-00080.bin", - "model.layers.25.mlp.gate.wg.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.25.mlp.shared_mlp.down_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.25.mlp.shared_mlp.down_proj.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.25.mlp.shared_mlp.gate_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.25.mlp.shared_mlp.gate_proj.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.25.mlp.shared_mlp.up_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.25.mlp.shared_mlp.up_proj.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.25.post_attention_layernorm.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.25.self_attn.key_layernorm.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.25.self_attn.o_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.25.self_attn.o_proj.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.25.self_attn.q_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.25.self_attn.q_proj.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.25.self_attn.query_layernorm.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.26.input_layernorm.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.26.mlp.experts.0.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.26.mlp.experts.0.down_proj.weight": "pytorch_model-00029-of-00080.bin", - "model.layers.26.mlp.experts.0.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.26.mlp.experts.0.gate_proj.weight": "pytorch_model-00029-of-00080.bin", - "model.layers.26.mlp.experts.0.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.26.mlp.experts.0.up_proj.weight": "pytorch_model-00029-of-00080.bin", - "model.layers.26.mlp.experts.1.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.26.mlp.experts.1.down_proj.weight": "pytorch_model-00029-of-00080.bin", - "model.layers.26.mlp.experts.1.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.26.mlp.experts.1.gate_proj.weight": "pytorch_model-00029-of-00080.bin", - "model.layers.26.mlp.experts.1.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.26.mlp.experts.1.up_proj.weight": "pytorch_model-00029-of-00080.bin", - "model.layers.26.mlp.experts.10.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.26.mlp.experts.10.down_proj.weight": "pytorch_model-00029-of-00080.bin", - "model.layers.26.mlp.experts.10.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.26.mlp.experts.10.gate_proj.weight": "pytorch_model-00029-of-00080.bin", - "model.layers.26.mlp.experts.10.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.26.mlp.experts.10.up_proj.weight": "pytorch_model-00029-of-00080.bin", - "model.layers.26.mlp.experts.11.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.26.mlp.experts.11.down_proj.weight": "pytorch_model-00030-of-00080.bin", - "model.layers.26.mlp.experts.11.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.26.mlp.experts.11.gate_proj.weight": "pytorch_model-00030-of-00080.bin", - "model.layers.26.mlp.experts.11.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.26.mlp.experts.11.up_proj.weight": "pytorch_model-00029-of-00080.bin", - "model.layers.26.mlp.experts.12.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.26.mlp.experts.12.down_proj.weight": "pytorch_model-00030-of-00080.bin", - "model.layers.26.mlp.experts.12.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.26.mlp.experts.12.gate_proj.weight": "pytorch_model-00030-of-00080.bin", - "model.layers.26.mlp.experts.12.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.26.mlp.experts.12.up_proj.weight": "pytorch_model-00030-of-00080.bin", - "model.layers.26.mlp.experts.13.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.26.mlp.experts.13.down_proj.weight": "pytorch_model-00030-of-00080.bin", - "model.layers.26.mlp.experts.13.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.26.mlp.experts.13.gate_proj.weight": "pytorch_model-00030-of-00080.bin", - "model.layers.26.mlp.experts.13.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.26.mlp.experts.13.up_proj.weight": "pytorch_model-00030-of-00080.bin", - "model.layers.26.mlp.experts.14.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.26.mlp.experts.14.down_proj.weight": "pytorch_model-00030-of-00080.bin", - "model.layers.26.mlp.experts.14.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.26.mlp.experts.14.gate_proj.weight": "pytorch_model-00030-of-00080.bin", - "model.layers.26.mlp.experts.14.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.26.mlp.experts.14.up_proj.weight": "pytorch_model-00030-of-00080.bin", - "model.layers.26.mlp.experts.15.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.26.mlp.experts.15.down_proj.weight": "pytorch_model-00030-of-00080.bin", - "model.layers.26.mlp.experts.15.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.26.mlp.experts.15.gate_proj.weight": "pytorch_model-00030-of-00080.bin", - "model.layers.26.mlp.experts.15.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.26.mlp.experts.15.up_proj.weight": "pytorch_model-00030-of-00080.bin", - "model.layers.26.mlp.experts.2.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.26.mlp.experts.2.down_proj.weight": "pytorch_model-00029-of-00080.bin", - "model.layers.26.mlp.experts.2.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.26.mlp.experts.2.gate_proj.weight": "pytorch_model-00029-of-00080.bin", - "model.layers.26.mlp.experts.2.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.26.mlp.experts.2.up_proj.weight": "pytorch_model-00029-of-00080.bin", - "model.layers.26.mlp.experts.3.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.26.mlp.experts.3.down_proj.weight": "pytorch_model-00029-of-00080.bin", - "model.layers.26.mlp.experts.3.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.26.mlp.experts.3.gate_proj.weight": "pytorch_model-00029-of-00080.bin", - "model.layers.26.mlp.experts.3.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.26.mlp.experts.3.up_proj.weight": "pytorch_model-00029-of-00080.bin", - "model.layers.26.mlp.experts.4.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.26.mlp.experts.4.down_proj.weight": "pytorch_model-00029-of-00080.bin", - "model.layers.26.mlp.experts.4.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.26.mlp.experts.4.gate_proj.weight": "pytorch_model-00029-of-00080.bin", - "model.layers.26.mlp.experts.4.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.26.mlp.experts.4.up_proj.weight": "pytorch_model-00029-of-00080.bin", - "model.layers.26.mlp.experts.5.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.26.mlp.experts.5.down_proj.weight": "pytorch_model-00029-of-00080.bin", - "model.layers.26.mlp.experts.5.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.26.mlp.experts.5.gate_proj.weight": "pytorch_model-00029-of-00080.bin", - "model.layers.26.mlp.experts.5.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.26.mlp.experts.5.up_proj.weight": "pytorch_model-00029-of-00080.bin", - "model.layers.26.mlp.experts.6.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.26.mlp.experts.6.down_proj.weight": "pytorch_model-00029-of-00080.bin", - "model.layers.26.mlp.experts.6.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.26.mlp.experts.6.gate_proj.weight": "pytorch_model-00029-of-00080.bin", - "model.layers.26.mlp.experts.6.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.26.mlp.experts.6.up_proj.weight": "pytorch_model-00029-of-00080.bin", - "model.layers.26.mlp.experts.7.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.26.mlp.experts.7.down_proj.weight": "pytorch_model-00029-of-00080.bin", - "model.layers.26.mlp.experts.7.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.26.mlp.experts.7.gate_proj.weight": "pytorch_model-00029-of-00080.bin", - "model.layers.26.mlp.experts.7.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.26.mlp.experts.7.up_proj.weight": "pytorch_model-00029-of-00080.bin", - "model.layers.26.mlp.experts.8.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.26.mlp.experts.8.down_proj.weight": "pytorch_model-00029-of-00080.bin", - "model.layers.26.mlp.experts.8.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.26.mlp.experts.8.gate_proj.weight": "pytorch_model-00029-of-00080.bin", - "model.layers.26.mlp.experts.8.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.26.mlp.experts.8.up_proj.weight": "pytorch_model-00029-of-00080.bin", - "model.layers.26.mlp.experts.9.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.26.mlp.experts.9.down_proj.weight": "pytorch_model-00029-of-00080.bin", - "model.layers.26.mlp.experts.9.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.26.mlp.experts.9.gate_proj.weight": "pytorch_model-00029-of-00080.bin", - "model.layers.26.mlp.experts.9.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.26.mlp.experts.9.up_proj.weight": "pytorch_model-00029-of-00080.bin", - "model.layers.26.mlp.gate.wg.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.26.mlp.shared_mlp.down_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.26.mlp.shared_mlp.down_proj.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.26.mlp.shared_mlp.gate_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.26.mlp.shared_mlp.gate_proj.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.26.mlp.shared_mlp.up_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.26.mlp.shared_mlp.up_proj.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.26.post_attention_layernorm.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.26.self_attn.k_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.26.self_attn.k_proj.weight": "pytorch_model-00040-of-00080.bin", - "model.layers.26.self_attn.key_layernorm.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.26.self_attn.o_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.26.self_attn.o_proj.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.26.self_attn.q_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.26.self_attn.q_proj.weight": "pytorch_model-00040-of-00080.bin", - "model.layers.26.self_attn.query_layernorm.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.26.self_attn.v_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.26.self_attn.v_proj.weight": "pytorch_model-00040-of-00080.bin", - "model.layers.27.input_layernorm.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.27.mlp.experts.0.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.27.mlp.experts.0.down_proj.weight": "pytorch_model-00030-of-00080.bin", - "model.layers.27.mlp.experts.0.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.27.mlp.experts.0.gate_proj.weight": "pytorch_model-00030-of-00080.bin", - "model.layers.27.mlp.experts.0.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.27.mlp.experts.0.up_proj.weight": "pytorch_model-00030-of-00080.bin", - "model.layers.27.mlp.experts.1.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.27.mlp.experts.1.down_proj.weight": "pytorch_model-00030-of-00080.bin", - "model.layers.27.mlp.experts.1.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.27.mlp.experts.1.gate_proj.weight": "pytorch_model-00030-of-00080.bin", - "model.layers.27.mlp.experts.1.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.27.mlp.experts.1.up_proj.weight": "pytorch_model-00030-of-00080.bin", - "model.layers.27.mlp.experts.10.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.27.mlp.experts.10.down_proj.weight": "pytorch_model-00031-of-00080.bin", - "model.layers.27.mlp.experts.10.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.27.mlp.experts.10.gate_proj.weight": "pytorch_model-00031-of-00080.bin", - "model.layers.27.mlp.experts.10.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.27.mlp.experts.10.up_proj.weight": "pytorch_model-00031-of-00080.bin", - "model.layers.27.mlp.experts.11.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.27.mlp.experts.11.down_proj.weight": "pytorch_model-00031-of-00080.bin", - "model.layers.27.mlp.experts.11.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.27.mlp.experts.11.gate_proj.weight": "pytorch_model-00031-of-00080.bin", - "model.layers.27.mlp.experts.11.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.27.mlp.experts.11.up_proj.weight": "pytorch_model-00031-of-00080.bin", - "model.layers.27.mlp.experts.12.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.27.mlp.experts.12.down_proj.weight": "pytorch_model-00031-of-00080.bin", - "model.layers.27.mlp.experts.12.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.27.mlp.experts.12.gate_proj.weight": "pytorch_model-00031-of-00080.bin", - "model.layers.27.mlp.experts.12.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.27.mlp.experts.12.up_proj.weight": "pytorch_model-00031-of-00080.bin", - "model.layers.27.mlp.experts.13.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.27.mlp.experts.13.down_proj.weight": "pytorch_model-00031-of-00080.bin", - "model.layers.27.mlp.experts.13.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.27.mlp.experts.13.gate_proj.weight": "pytorch_model-00031-of-00080.bin", - "model.layers.27.mlp.experts.13.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.27.mlp.experts.13.up_proj.weight": "pytorch_model-00031-of-00080.bin", - "model.layers.27.mlp.experts.14.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.27.mlp.experts.14.down_proj.weight": "pytorch_model-00031-of-00080.bin", - "model.layers.27.mlp.experts.14.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.27.mlp.experts.14.gate_proj.weight": "pytorch_model-00031-of-00080.bin", - "model.layers.27.mlp.experts.14.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.27.mlp.experts.14.up_proj.weight": "pytorch_model-00031-of-00080.bin", - "model.layers.27.mlp.experts.15.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.27.mlp.experts.15.down_proj.weight": "pytorch_model-00031-of-00080.bin", - "model.layers.27.mlp.experts.15.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.27.mlp.experts.15.gate_proj.weight": "pytorch_model-00031-of-00080.bin", - "model.layers.27.mlp.experts.15.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.27.mlp.experts.15.up_proj.weight": "pytorch_model-00031-of-00080.bin", - "model.layers.27.mlp.experts.2.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.27.mlp.experts.2.down_proj.weight": "pytorch_model-00030-of-00080.bin", - "model.layers.27.mlp.experts.2.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.27.mlp.experts.2.gate_proj.weight": "pytorch_model-00030-of-00080.bin", - "model.layers.27.mlp.experts.2.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.27.mlp.experts.2.up_proj.weight": "pytorch_model-00030-of-00080.bin", - "model.layers.27.mlp.experts.3.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.27.mlp.experts.3.down_proj.weight": "pytorch_model-00030-of-00080.bin", - "model.layers.27.mlp.experts.3.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.27.mlp.experts.3.gate_proj.weight": "pytorch_model-00030-of-00080.bin", - "model.layers.27.mlp.experts.3.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.27.mlp.experts.3.up_proj.weight": "pytorch_model-00030-of-00080.bin", - "model.layers.27.mlp.experts.4.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.27.mlp.experts.4.down_proj.weight": "pytorch_model-00030-of-00080.bin", - "model.layers.27.mlp.experts.4.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.27.mlp.experts.4.gate_proj.weight": "pytorch_model-00030-of-00080.bin", - "model.layers.27.mlp.experts.4.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.27.mlp.experts.4.up_proj.weight": "pytorch_model-00030-of-00080.bin", - "model.layers.27.mlp.experts.5.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.27.mlp.experts.5.down_proj.weight": "pytorch_model-00030-of-00080.bin", - "model.layers.27.mlp.experts.5.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.27.mlp.experts.5.gate_proj.weight": "pytorch_model-00030-of-00080.bin", - "model.layers.27.mlp.experts.5.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.27.mlp.experts.5.up_proj.weight": "pytorch_model-00030-of-00080.bin", - "model.layers.27.mlp.experts.6.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.27.mlp.experts.6.down_proj.weight": "pytorch_model-00030-of-00080.bin", - "model.layers.27.mlp.experts.6.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.27.mlp.experts.6.gate_proj.weight": "pytorch_model-00030-of-00080.bin", - "model.layers.27.mlp.experts.6.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.27.mlp.experts.6.up_proj.weight": "pytorch_model-00030-of-00080.bin", - "model.layers.27.mlp.experts.7.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.27.mlp.experts.7.down_proj.weight": "pytorch_model-00030-of-00080.bin", - "model.layers.27.mlp.experts.7.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.27.mlp.experts.7.gate_proj.weight": "pytorch_model-00030-of-00080.bin", - "model.layers.27.mlp.experts.7.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.27.mlp.experts.7.up_proj.weight": "pytorch_model-00030-of-00080.bin", - "model.layers.27.mlp.experts.8.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.27.mlp.experts.8.down_proj.weight": "pytorch_model-00030-of-00080.bin", - "model.layers.27.mlp.experts.8.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.27.mlp.experts.8.gate_proj.weight": "pytorch_model-00030-of-00080.bin", - "model.layers.27.mlp.experts.8.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.27.mlp.experts.8.up_proj.weight": "pytorch_model-00030-of-00080.bin", - "model.layers.27.mlp.experts.9.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.27.mlp.experts.9.down_proj.weight": "pytorch_model-00031-of-00080.bin", - "model.layers.27.mlp.experts.9.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.27.mlp.experts.9.gate_proj.weight": "pytorch_model-00031-of-00080.bin", - "model.layers.27.mlp.experts.9.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.27.mlp.experts.9.up_proj.weight": "pytorch_model-00030-of-00080.bin", - "model.layers.27.mlp.gate.wg.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.27.mlp.shared_mlp.down_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.27.mlp.shared_mlp.down_proj.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.27.mlp.shared_mlp.gate_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.27.mlp.shared_mlp.gate_proj.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.27.mlp.shared_mlp.up_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.27.mlp.shared_mlp.up_proj.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.27.post_attention_layernorm.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.27.self_attn.key_layernorm.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.27.self_attn.o_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.27.self_attn.o_proj.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.27.self_attn.q_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.27.self_attn.q_proj.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.27.self_attn.query_layernorm.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.28.input_layernorm.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.28.mlp.experts.0.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.28.mlp.experts.0.down_proj.weight": "pytorch_model-00049-of-00080.bin", - "model.layers.28.mlp.experts.0.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.28.mlp.experts.0.gate_proj.weight": "pytorch_model-00049-of-00080.bin", - "model.layers.28.mlp.experts.0.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.28.mlp.experts.0.up_proj.weight": "pytorch_model-00049-of-00080.bin", - "model.layers.28.mlp.experts.1.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.28.mlp.experts.1.down_proj.weight": "pytorch_model-00049-of-00080.bin", - "model.layers.28.mlp.experts.1.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.28.mlp.experts.1.gate_proj.weight": "pytorch_model-00049-of-00080.bin", - "model.layers.28.mlp.experts.1.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.28.mlp.experts.1.up_proj.weight": "pytorch_model-00049-of-00080.bin", - "model.layers.28.mlp.experts.10.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.28.mlp.experts.10.down_proj.weight": "pytorch_model-00049-of-00080.bin", - "model.layers.28.mlp.experts.10.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.28.mlp.experts.10.gate_proj.weight": "pytorch_model-00049-of-00080.bin", - "model.layers.28.mlp.experts.10.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.28.mlp.experts.10.up_proj.weight": "pytorch_model-00049-of-00080.bin", - "model.layers.28.mlp.experts.11.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.28.mlp.experts.11.down_proj.weight": "pytorch_model-00050-of-00080.bin", - "model.layers.28.mlp.experts.11.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.28.mlp.experts.11.gate_proj.weight": "pytorch_model-00050-of-00080.bin", - "model.layers.28.mlp.experts.11.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.28.mlp.experts.11.up_proj.weight": "pytorch_model-00049-of-00080.bin", - "model.layers.28.mlp.experts.12.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.28.mlp.experts.12.down_proj.weight": "pytorch_model-00050-of-00080.bin", - "model.layers.28.mlp.experts.12.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.28.mlp.experts.12.gate_proj.weight": "pytorch_model-00050-of-00080.bin", - "model.layers.28.mlp.experts.12.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.28.mlp.experts.12.up_proj.weight": "pytorch_model-00050-of-00080.bin", - "model.layers.28.mlp.experts.13.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.28.mlp.experts.13.down_proj.weight": "pytorch_model-00050-of-00080.bin", - "model.layers.28.mlp.experts.13.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.28.mlp.experts.13.gate_proj.weight": "pytorch_model-00050-of-00080.bin", - "model.layers.28.mlp.experts.13.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.28.mlp.experts.13.up_proj.weight": "pytorch_model-00050-of-00080.bin", - "model.layers.28.mlp.experts.14.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.28.mlp.experts.14.down_proj.weight": "pytorch_model-00050-of-00080.bin", - "model.layers.28.mlp.experts.14.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.28.mlp.experts.14.gate_proj.weight": "pytorch_model-00050-of-00080.bin", - "model.layers.28.mlp.experts.14.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.28.mlp.experts.14.up_proj.weight": "pytorch_model-00050-of-00080.bin", - "model.layers.28.mlp.experts.15.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.28.mlp.experts.15.down_proj.weight": "pytorch_model-00050-of-00080.bin", - "model.layers.28.mlp.experts.15.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.28.mlp.experts.15.gate_proj.weight": "pytorch_model-00050-of-00080.bin", - "model.layers.28.mlp.experts.15.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.28.mlp.experts.15.up_proj.weight": "pytorch_model-00050-of-00080.bin", - "model.layers.28.mlp.experts.2.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.28.mlp.experts.2.down_proj.weight": "pytorch_model-00049-of-00080.bin", - "model.layers.28.mlp.experts.2.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.28.mlp.experts.2.gate_proj.weight": "pytorch_model-00049-of-00080.bin", - "model.layers.28.mlp.experts.2.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.28.mlp.experts.2.up_proj.weight": "pytorch_model-00049-of-00080.bin", - "model.layers.28.mlp.experts.3.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.28.mlp.experts.3.down_proj.weight": "pytorch_model-00049-of-00080.bin", - "model.layers.28.mlp.experts.3.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.28.mlp.experts.3.gate_proj.weight": "pytorch_model-00049-of-00080.bin", - "model.layers.28.mlp.experts.3.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.28.mlp.experts.3.up_proj.weight": "pytorch_model-00049-of-00080.bin", - "model.layers.28.mlp.experts.4.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.28.mlp.experts.4.down_proj.weight": "pytorch_model-00049-of-00080.bin", - "model.layers.28.mlp.experts.4.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.28.mlp.experts.4.gate_proj.weight": "pytorch_model-00049-of-00080.bin", - "model.layers.28.mlp.experts.4.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.28.mlp.experts.4.up_proj.weight": "pytorch_model-00049-of-00080.bin", - "model.layers.28.mlp.experts.5.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.28.mlp.experts.5.down_proj.weight": "pytorch_model-00049-of-00080.bin", - "model.layers.28.mlp.experts.5.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.28.mlp.experts.5.gate_proj.weight": "pytorch_model-00049-of-00080.bin", - "model.layers.28.mlp.experts.5.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.28.mlp.experts.5.up_proj.weight": "pytorch_model-00049-of-00080.bin", - "model.layers.28.mlp.experts.6.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.28.mlp.experts.6.down_proj.weight": "pytorch_model-00049-of-00080.bin", - "model.layers.28.mlp.experts.6.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.28.mlp.experts.6.gate_proj.weight": "pytorch_model-00049-of-00080.bin", - "model.layers.28.mlp.experts.6.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.28.mlp.experts.6.up_proj.weight": "pytorch_model-00049-of-00080.bin", - "model.layers.28.mlp.experts.7.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.28.mlp.experts.7.down_proj.weight": "pytorch_model-00049-of-00080.bin", - "model.layers.28.mlp.experts.7.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.28.mlp.experts.7.gate_proj.weight": "pytorch_model-00049-of-00080.bin", - "model.layers.28.mlp.experts.7.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.28.mlp.experts.7.up_proj.weight": "pytorch_model-00049-of-00080.bin", - "model.layers.28.mlp.experts.8.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.28.mlp.experts.8.down_proj.weight": "pytorch_model-00049-of-00080.bin", - "model.layers.28.mlp.experts.8.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.28.mlp.experts.8.gate_proj.weight": "pytorch_model-00049-of-00080.bin", - "model.layers.28.mlp.experts.8.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.28.mlp.experts.8.up_proj.weight": "pytorch_model-00049-of-00080.bin", - "model.layers.28.mlp.experts.9.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.28.mlp.experts.9.down_proj.weight": "pytorch_model-00049-of-00080.bin", - "model.layers.28.mlp.experts.9.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.28.mlp.experts.9.gate_proj.weight": "pytorch_model-00049-of-00080.bin", - "model.layers.28.mlp.experts.9.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.28.mlp.experts.9.up_proj.weight": "pytorch_model-00049-of-00080.bin", - "model.layers.28.mlp.gate.wg.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.28.mlp.shared_mlp.down_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.28.mlp.shared_mlp.down_proj.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.28.mlp.shared_mlp.gate_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.28.mlp.shared_mlp.gate_proj.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.28.mlp.shared_mlp.up_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.28.mlp.shared_mlp.up_proj.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.28.post_attention_layernorm.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.28.self_attn.k_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.28.self_attn.k_proj.weight": "pytorch_model-00060-of-00080.bin", - "model.layers.28.self_attn.key_layernorm.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.28.self_attn.o_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.28.self_attn.o_proj.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.28.self_attn.q_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.28.self_attn.q_proj.weight": "pytorch_model-00060-of-00080.bin", - "model.layers.28.self_attn.query_layernorm.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.28.self_attn.v_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.28.self_attn.v_proj.weight": "pytorch_model-00060-of-00080.bin", - "model.layers.29.input_layernorm.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.29.mlp.experts.0.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.29.mlp.experts.0.down_proj.weight": "pytorch_model-00050-of-00080.bin", - "model.layers.29.mlp.experts.0.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.29.mlp.experts.0.gate_proj.weight": "pytorch_model-00050-of-00080.bin", - "model.layers.29.mlp.experts.0.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.29.mlp.experts.0.up_proj.weight": "pytorch_model-00050-of-00080.bin", - "model.layers.29.mlp.experts.1.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.29.mlp.experts.1.down_proj.weight": "pytorch_model-00050-of-00080.bin", - "model.layers.29.mlp.experts.1.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.29.mlp.experts.1.gate_proj.weight": "pytorch_model-00050-of-00080.bin", - "model.layers.29.mlp.experts.1.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.29.mlp.experts.1.up_proj.weight": "pytorch_model-00050-of-00080.bin", - "model.layers.29.mlp.experts.10.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.29.mlp.experts.10.down_proj.weight": "pytorch_model-00051-of-00080.bin", - "model.layers.29.mlp.experts.10.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.29.mlp.experts.10.gate_proj.weight": "pytorch_model-00051-of-00080.bin", - "model.layers.29.mlp.experts.10.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.29.mlp.experts.10.up_proj.weight": "pytorch_model-00051-of-00080.bin", - "model.layers.29.mlp.experts.11.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.29.mlp.experts.11.down_proj.weight": "pytorch_model-00051-of-00080.bin", - "model.layers.29.mlp.experts.11.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.29.mlp.experts.11.gate_proj.weight": "pytorch_model-00051-of-00080.bin", - "model.layers.29.mlp.experts.11.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.29.mlp.experts.11.up_proj.weight": "pytorch_model-00051-of-00080.bin", - "model.layers.29.mlp.experts.12.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.29.mlp.experts.12.down_proj.weight": "pytorch_model-00051-of-00080.bin", - "model.layers.29.mlp.experts.12.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.29.mlp.experts.12.gate_proj.weight": "pytorch_model-00051-of-00080.bin", - "model.layers.29.mlp.experts.12.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.29.mlp.experts.12.up_proj.weight": "pytorch_model-00051-of-00080.bin", - "model.layers.29.mlp.experts.13.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.29.mlp.experts.13.down_proj.weight": "pytorch_model-00051-of-00080.bin", - "model.layers.29.mlp.experts.13.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.29.mlp.experts.13.gate_proj.weight": "pytorch_model-00051-of-00080.bin", - "model.layers.29.mlp.experts.13.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.29.mlp.experts.13.up_proj.weight": "pytorch_model-00051-of-00080.bin", - "model.layers.29.mlp.experts.14.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.29.mlp.experts.14.down_proj.weight": "pytorch_model-00051-of-00080.bin", - "model.layers.29.mlp.experts.14.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.29.mlp.experts.14.gate_proj.weight": "pytorch_model-00051-of-00080.bin", - "model.layers.29.mlp.experts.14.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.29.mlp.experts.14.up_proj.weight": "pytorch_model-00051-of-00080.bin", - "model.layers.29.mlp.experts.15.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.29.mlp.experts.15.down_proj.weight": "pytorch_model-00051-of-00080.bin", - "model.layers.29.mlp.experts.15.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.29.mlp.experts.15.gate_proj.weight": "pytorch_model-00051-of-00080.bin", - "model.layers.29.mlp.experts.15.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.29.mlp.experts.15.up_proj.weight": "pytorch_model-00051-of-00080.bin", - "model.layers.29.mlp.experts.2.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.29.mlp.experts.2.down_proj.weight": "pytorch_model-00050-of-00080.bin", - "model.layers.29.mlp.experts.2.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.29.mlp.experts.2.gate_proj.weight": "pytorch_model-00050-of-00080.bin", - "model.layers.29.mlp.experts.2.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.29.mlp.experts.2.up_proj.weight": "pytorch_model-00050-of-00080.bin", - "model.layers.29.mlp.experts.3.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.29.mlp.experts.3.down_proj.weight": "pytorch_model-00050-of-00080.bin", - "model.layers.29.mlp.experts.3.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.29.mlp.experts.3.gate_proj.weight": "pytorch_model-00050-of-00080.bin", - "model.layers.29.mlp.experts.3.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.29.mlp.experts.3.up_proj.weight": "pytorch_model-00050-of-00080.bin", - "model.layers.29.mlp.experts.4.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.29.mlp.experts.4.down_proj.weight": "pytorch_model-00050-of-00080.bin", - "model.layers.29.mlp.experts.4.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.29.mlp.experts.4.gate_proj.weight": "pytorch_model-00050-of-00080.bin", - "model.layers.29.mlp.experts.4.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.29.mlp.experts.4.up_proj.weight": "pytorch_model-00050-of-00080.bin", - "model.layers.29.mlp.experts.5.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.29.mlp.experts.5.down_proj.weight": "pytorch_model-00050-of-00080.bin", - "model.layers.29.mlp.experts.5.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.29.mlp.experts.5.gate_proj.weight": "pytorch_model-00050-of-00080.bin", - "model.layers.29.mlp.experts.5.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.29.mlp.experts.5.up_proj.weight": "pytorch_model-00050-of-00080.bin", - "model.layers.29.mlp.experts.6.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.29.mlp.experts.6.down_proj.weight": "pytorch_model-00050-of-00080.bin", - "model.layers.29.mlp.experts.6.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.29.mlp.experts.6.gate_proj.weight": "pytorch_model-00050-of-00080.bin", - "model.layers.29.mlp.experts.6.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.29.mlp.experts.6.up_proj.weight": "pytorch_model-00050-of-00080.bin", - "model.layers.29.mlp.experts.7.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.29.mlp.experts.7.down_proj.weight": "pytorch_model-00050-of-00080.bin", - "model.layers.29.mlp.experts.7.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.29.mlp.experts.7.gate_proj.weight": "pytorch_model-00050-of-00080.bin", - "model.layers.29.mlp.experts.7.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.29.mlp.experts.7.up_proj.weight": "pytorch_model-00050-of-00080.bin", - "model.layers.29.mlp.experts.8.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.29.mlp.experts.8.down_proj.weight": "pytorch_model-00050-of-00080.bin", - "model.layers.29.mlp.experts.8.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.29.mlp.experts.8.gate_proj.weight": "pytorch_model-00050-of-00080.bin", - "model.layers.29.mlp.experts.8.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.29.mlp.experts.8.up_proj.weight": "pytorch_model-00050-of-00080.bin", - "model.layers.29.mlp.experts.9.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.29.mlp.experts.9.down_proj.weight": "pytorch_model-00051-of-00080.bin", - "model.layers.29.mlp.experts.9.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.29.mlp.experts.9.gate_proj.weight": "pytorch_model-00051-of-00080.bin", - "model.layers.29.mlp.experts.9.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.29.mlp.experts.9.up_proj.weight": "pytorch_model-00050-of-00080.bin", - "model.layers.29.mlp.gate.wg.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.29.mlp.shared_mlp.down_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.29.mlp.shared_mlp.down_proj.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.29.mlp.shared_mlp.gate_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.29.mlp.shared_mlp.gate_proj.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.29.mlp.shared_mlp.up_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.29.mlp.shared_mlp.up_proj.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.29.post_attention_layernorm.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.29.self_attn.key_layernorm.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.29.self_attn.o_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.29.self_attn.o_proj.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.29.self_attn.q_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.29.self_attn.q_proj.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.29.self_attn.query_layernorm.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.3.input_layernorm.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.3.mlp.experts.0.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.3.mlp.experts.0.down_proj.weight": "pytorch_model-00023-of-00080.bin", - "model.layers.3.mlp.experts.0.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.3.mlp.experts.0.gate_proj.weight": "pytorch_model-00023-of-00080.bin", - "model.layers.3.mlp.experts.0.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.3.mlp.experts.0.up_proj.weight": "pytorch_model-00023-of-00080.bin", - "model.layers.3.mlp.experts.1.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.3.mlp.experts.1.down_proj.weight": "pytorch_model-00023-of-00080.bin", - "model.layers.3.mlp.experts.1.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.3.mlp.experts.1.gate_proj.weight": "pytorch_model-00023-of-00080.bin", - "model.layers.3.mlp.experts.1.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.3.mlp.experts.1.up_proj.weight": "pytorch_model-00023-of-00080.bin", - "model.layers.3.mlp.experts.10.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.3.mlp.experts.10.down_proj.weight": "pytorch_model-00024-of-00080.bin", - "model.layers.3.mlp.experts.10.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.3.mlp.experts.10.gate_proj.weight": "pytorch_model-00024-of-00080.bin", - "model.layers.3.mlp.experts.10.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.3.mlp.experts.10.up_proj.weight": "pytorch_model-00024-of-00080.bin", - "model.layers.3.mlp.experts.11.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.3.mlp.experts.11.down_proj.weight": "pytorch_model-00024-of-00080.bin", - "model.layers.3.mlp.experts.11.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.3.mlp.experts.11.gate_proj.weight": "pytorch_model-00024-of-00080.bin", - "model.layers.3.mlp.experts.11.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.3.mlp.experts.11.up_proj.weight": "pytorch_model-00024-of-00080.bin", - "model.layers.3.mlp.experts.12.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.3.mlp.experts.12.down_proj.weight": "pytorch_model-00024-of-00080.bin", - "model.layers.3.mlp.experts.12.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.3.mlp.experts.12.gate_proj.weight": "pytorch_model-00024-of-00080.bin", - "model.layers.3.mlp.experts.12.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.3.mlp.experts.12.up_proj.weight": "pytorch_model-00024-of-00080.bin", - "model.layers.3.mlp.experts.13.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.3.mlp.experts.13.down_proj.weight": "pytorch_model-00024-of-00080.bin", - "model.layers.3.mlp.experts.13.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.3.mlp.experts.13.gate_proj.weight": "pytorch_model-00024-of-00080.bin", - "model.layers.3.mlp.experts.13.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.3.mlp.experts.13.up_proj.weight": "pytorch_model-00024-of-00080.bin", - "model.layers.3.mlp.experts.14.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.3.mlp.experts.14.down_proj.weight": "pytorch_model-00024-of-00080.bin", - "model.layers.3.mlp.experts.14.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.3.mlp.experts.14.gate_proj.weight": "pytorch_model-00024-of-00080.bin", - "model.layers.3.mlp.experts.14.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.3.mlp.experts.14.up_proj.weight": "pytorch_model-00024-of-00080.bin", - "model.layers.3.mlp.experts.15.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.3.mlp.experts.15.down_proj.weight": "pytorch_model-00024-of-00080.bin", - "model.layers.3.mlp.experts.15.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.3.mlp.experts.15.gate_proj.weight": "pytorch_model-00024-of-00080.bin", - "model.layers.3.mlp.experts.15.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.3.mlp.experts.15.up_proj.weight": "pytorch_model-00024-of-00080.bin", - "model.layers.3.mlp.experts.2.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.3.mlp.experts.2.down_proj.weight": "pytorch_model-00023-of-00080.bin", - "model.layers.3.mlp.experts.2.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.3.mlp.experts.2.gate_proj.weight": "pytorch_model-00023-of-00080.bin", - "model.layers.3.mlp.experts.2.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.3.mlp.experts.2.up_proj.weight": "pytorch_model-00023-of-00080.bin", - "model.layers.3.mlp.experts.3.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.3.mlp.experts.3.down_proj.weight": "pytorch_model-00023-of-00080.bin", - "model.layers.3.mlp.experts.3.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.3.mlp.experts.3.gate_proj.weight": "pytorch_model-00023-of-00080.bin", - "model.layers.3.mlp.experts.3.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.3.mlp.experts.3.up_proj.weight": "pytorch_model-00023-of-00080.bin", - "model.layers.3.mlp.experts.4.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.3.mlp.experts.4.down_proj.weight": "pytorch_model-00023-of-00080.bin", - "model.layers.3.mlp.experts.4.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.3.mlp.experts.4.gate_proj.weight": "pytorch_model-00023-of-00080.bin", - "model.layers.3.mlp.experts.4.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.3.mlp.experts.4.up_proj.weight": "pytorch_model-00023-of-00080.bin", - "model.layers.3.mlp.experts.5.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.3.mlp.experts.5.down_proj.weight": "pytorch_model-00023-of-00080.bin", - "model.layers.3.mlp.experts.5.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.3.mlp.experts.5.gate_proj.weight": "pytorch_model-00023-of-00080.bin", - "model.layers.3.mlp.experts.5.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.3.mlp.experts.5.up_proj.weight": "pytorch_model-00023-of-00080.bin", - "model.layers.3.mlp.experts.6.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.3.mlp.experts.6.down_proj.weight": "pytorch_model-00023-of-00080.bin", - "model.layers.3.mlp.experts.6.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.3.mlp.experts.6.gate_proj.weight": "pytorch_model-00023-of-00080.bin", - "model.layers.3.mlp.experts.6.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.3.mlp.experts.6.up_proj.weight": "pytorch_model-00023-of-00080.bin", - "model.layers.3.mlp.experts.7.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.3.mlp.experts.7.down_proj.weight": "pytorch_model-00024-of-00080.bin", - "model.layers.3.mlp.experts.7.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.3.mlp.experts.7.gate_proj.weight": "pytorch_model-00024-of-00080.bin", - "model.layers.3.mlp.experts.7.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.3.mlp.experts.7.up_proj.weight": "pytorch_model-00023-of-00080.bin", - "model.layers.3.mlp.experts.8.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.3.mlp.experts.8.down_proj.weight": "pytorch_model-00024-of-00080.bin", - "model.layers.3.mlp.experts.8.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.3.mlp.experts.8.gate_proj.weight": "pytorch_model-00024-of-00080.bin", - "model.layers.3.mlp.experts.8.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.3.mlp.experts.8.up_proj.weight": "pytorch_model-00024-of-00080.bin", - "model.layers.3.mlp.experts.9.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.3.mlp.experts.9.down_proj.weight": "pytorch_model-00024-of-00080.bin", - "model.layers.3.mlp.experts.9.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.3.mlp.experts.9.gate_proj.weight": "pytorch_model-00024-of-00080.bin", - "model.layers.3.mlp.experts.9.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.3.mlp.experts.9.up_proj.weight": "pytorch_model-00024-of-00080.bin", - "model.layers.3.mlp.gate.wg.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.3.mlp.shared_mlp.down_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.3.mlp.shared_mlp.down_proj.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.3.mlp.shared_mlp.gate_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.3.mlp.shared_mlp.gate_proj.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.3.mlp.shared_mlp.up_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.3.mlp.shared_mlp.up_proj.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.3.post_attention_layernorm.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.3.self_attn.key_layernorm.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.3.self_attn.o_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.3.self_attn.o_proj.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.3.self_attn.q_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.3.self_attn.q_proj.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.3.self_attn.query_layernorm.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.30.input_layernorm.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.30.mlp.experts.0.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.30.mlp.experts.0.down_proj.weight": "pytorch_model-00069-of-00080.bin", - "model.layers.30.mlp.experts.0.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.30.mlp.experts.0.gate_proj.weight": "pytorch_model-00069-of-00080.bin", - "model.layers.30.mlp.experts.0.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.30.mlp.experts.0.up_proj.weight": "pytorch_model-00069-of-00080.bin", - "model.layers.30.mlp.experts.1.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.30.mlp.experts.1.down_proj.weight": "pytorch_model-00069-of-00080.bin", - "model.layers.30.mlp.experts.1.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.30.mlp.experts.1.gate_proj.weight": "pytorch_model-00069-of-00080.bin", - "model.layers.30.mlp.experts.1.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.30.mlp.experts.1.up_proj.weight": "pytorch_model-00069-of-00080.bin", - "model.layers.30.mlp.experts.10.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.30.mlp.experts.10.down_proj.weight": "pytorch_model-00070-of-00080.bin", - "model.layers.30.mlp.experts.10.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.30.mlp.experts.10.gate_proj.weight": "pytorch_model-00070-of-00080.bin", - "model.layers.30.mlp.experts.10.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.30.mlp.experts.10.up_proj.weight": "pytorch_model-00070-of-00080.bin", - "model.layers.30.mlp.experts.11.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.30.mlp.experts.11.down_proj.weight": "pytorch_model-00070-of-00080.bin", - "model.layers.30.mlp.experts.11.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.30.mlp.experts.11.gate_proj.weight": "pytorch_model-00070-of-00080.bin", - "model.layers.30.mlp.experts.11.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.30.mlp.experts.11.up_proj.weight": "pytorch_model-00070-of-00080.bin", - "model.layers.30.mlp.experts.12.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.30.mlp.experts.12.down_proj.weight": "pytorch_model-00070-of-00080.bin", - "model.layers.30.mlp.experts.12.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.30.mlp.experts.12.gate_proj.weight": "pytorch_model-00070-of-00080.bin", - "model.layers.30.mlp.experts.12.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.30.mlp.experts.12.up_proj.weight": "pytorch_model-00070-of-00080.bin", - "model.layers.30.mlp.experts.13.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.30.mlp.experts.13.down_proj.weight": "pytorch_model-00070-of-00080.bin", - "model.layers.30.mlp.experts.13.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.30.mlp.experts.13.gate_proj.weight": "pytorch_model-00070-of-00080.bin", - "model.layers.30.mlp.experts.13.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.30.mlp.experts.13.up_proj.weight": "pytorch_model-00070-of-00080.bin", - "model.layers.30.mlp.experts.14.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.30.mlp.experts.14.down_proj.weight": "pytorch_model-00070-of-00080.bin", - "model.layers.30.mlp.experts.14.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.30.mlp.experts.14.gate_proj.weight": "pytorch_model-00070-of-00080.bin", - "model.layers.30.mlp.experts.14.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.30.mlp.experts.14.up_proj.weight": "pytorch_model-00070-of-00080.bin", - "model.layers.30.mlp.experts.15.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.30.mlp.experts.15.down_proj.weight": "pytorch_model-00070-of-00080.bin", - "model.layers.30.mlp.experts.15.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.30.mlp.experts.15.gate_proj.weight": "pytorch_model-00070-of-00080.bin", - "model.layers.30.mlp.experts.15.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.30.mlp.experts.15.up_proj.weight": "pytorch_model-00070-of-00080.bin", - "model.layers.30.mlp.experts.2.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.30.mlp.experts.2.down_proj.weight": "pytorch_model-00069-of-00080.bin", - "model.layers.30.mlp.experts.2.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.30.mlp.experts.2.gate_proj.weight": "pytorch_model-00069-of-00080.bin", - "model.layers.30.mlp.experts.2.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.30.mlp.experts.2.up_proj.weight": "pytorch_model-00069-of-00080.bin", - "model.layers.30.mlp.experts.3.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.30.mlp.experts.3.down_proj.weight": "pytorch_model-00069-of-00080.bin", - "model.layers.30.mlp.experts.3.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.30.mlp.experts.3.gate_proj.weight": "pytorch_model-00069-of-00080.bin", - "model.layers.30.mlp.experts.3.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.30.mlp.experts.3.up_proj.weight": "pytorch_model-00069-of-00080.bin", - "model.layers.30.mlp.experts.4.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.30.mlp.experts.4.down_proj.weight": "pytorch_model-00069-of-00080.bin", - "model.layers.30.mlp.experts.4.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.30.mlp.experts.4.gate_proj.weight": "pytorch_model-00069-of-00080.bin", - "model.layers.30.mlp.experts.4.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.30.mlp.experts.4.up_proj.weight": "pytorch_model-00069-of-00080.bin", - "model.layers.30.mlp.experts.5.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.30.mlp.experts.5.down_proj.weight": "pytorch_model-00069-of-00080.bin", - "model.layers.30.mlp.experts.5.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.30.mlp.experts.5.gate_proj.weight": "pytorch_model-00069-of-00080.bin", - "model.layers.30.mlp.experts.5.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.30.mlp.experts.5.up_proj.weight": "pytorch_model-00069-of-00080.bin", - "model.layers.30.mlp.experts.6.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.30.mlp.experts.6.down_proj.weight": "pytorch_model-00069-of-00080.bin", - "model.layers.30.mlp.experts.6.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.30.mlp.experts.6.gate_proj.weight": "pytorch_model-00069-of-00080.bin", - "model.layers.30.mlp.experts.6.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.30.mlp.experts.6.up_proj.weight": "pytorch_model-00069-of-00080.bin", - "model.layers.30.mlp.experts.7.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.30.mlp.experts.7.down_proj.weight": "pytorch_model-00069-of-00080.bin", - "model.layers.30.mlp.experts.7.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.30.mlp.experts.7.gate_proj.weight": "pytorch_model-00069-of-00080.bin", - "model.layers.30.mlp.experts.7.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.30.mlp.experts.7.up_proj.weight": "pytorch_model-00069-of-00080.bin", - "model.layers.30.mlp.experts.8.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.30.mlp.experts.8.down_proj.weight": "pytorch_model-00069-of-00080.bin", - "model.layers.30.mlp.experts.8.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.30.mlp.experts.8.gate_proj.weight": "pytorch_model-00069-of-00080.bin", - "model.layers.30.mlp.experts.8.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.30.mlp.experts.8.up_proj.weight": "pytorch_model-00069-of-00080.bin", - "model.layers.30.mlp.experts.9.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.30.mlp.experts.9.down_proj.weight": "pytorch_model-00070-of-00080.bin", - "model.layers.30.mlp.experts.9.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.30.mlp.experts.9.gate_proj.weight": "pytorch_model-00070-of-00080.bin", - "model.layers.30.mlp.experts.9.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.30.mlp.experts.9.up_proj.weight": "pytorch_model-00070-of-00080.bin", - "model.layers.30.mlp.gate.wg.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.30.mlp.shared_mlp.down_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.30.mlp.shared_mlp.down_proj.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.30.mlp.shared_mlp.gate_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.30.mlp.shared_mlp.gate_proj.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.30.mlp.shared_mlp.up_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.30.mlp.shared_mlp.up_proj.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.30.post_attention_layernorm.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.30.self_attn.k_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.30.self_attn.k_proj.weight": "pytorch_model-00080-of-00080.bin", - "model.layers.30.self_attn.key_layernorm.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.30.self_attn.o_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.30.self_attn.o_proj.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.30.self_attn.q_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.30.self_attn.q_proj.weight": "pytorch_model-00080-of-00080.bin", - "model.layers.30.self_attn.query_layernorm.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.30.self_attn.v_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.30.self_attn.v_proj.weight": "pytorch_model-00080-of-00080.bin", - "model.layers.31.input_layernorm.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.31.mlp.experts.0.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.31.mlp.experts.0.down_proj.weight": "pytorch_model-00070-of-00080.bin", - "model.layers.31.mlp.experts.0.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.31.mlp.experts.0.gate_proj.weight": "pytorch_model-00070-of-00080.bin", - "model.layers.31.mlp.experts.0.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.31.mlp.experts.0.up_proj.weight": "pytorch_model-00070-of-00080.bin", - "model.layers.31.mlp.experts.1.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.31.mlp.experts.1.down_proj.weight": "pytorch_model-00070-of-00080.bin", - "model.layers.31.mlp.experts.1.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.31.mlp.experts.1.gate_proj.weight": "pytorch_model-00070-of-00080.bin", - "model.layers.31.mlp.experts.1.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.31.mlp.experts.1.up_proj.weight": "pytorch_model-00070-of-00080.bin", - "model.layers.31.mlp.experts.10.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.31.mlp.experts.10.down_proj.weight": "pytorch_model-00071-of-00080.bin", - "model.layers.31.mlp.experts.10.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.31.mlp.experts.10.gate_proj.weight": "pytorch_model-00071-of-00080.bin", - "model.layers.31.mlp.experts.10.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.31.mlp.experts.10.up_proj.weight": "pytorch_model-00071-of-00080.bin", - "model.layers.31.mlp.experts.11.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.31.mlp.experts.11.down_proj.weight": "pytorch_model-00071-of-00080.bin", - "model.layers.31.mlp.experts.11.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.31.mlp.experts.11.gate_proj.weight": "pytorch_model-00071-of-00080.bin", - "model.layers.31.mlp.experts.11.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.31.mlp.experts.11.up_proj.weight": "pytorch_model-00071-of-00080.bin", - "model.layers.31.mlp.experts.12.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.31.mlp.experts.12.down_proj.weight": "pytorch_model-00071-of-00080.bin", - "model.layers.31.mlp.experts.12.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.31.mlp.experts.12.gate_proj.weight": "pytorch_model-00071-of-00080.bin", - "model.layers.31.mlp.experts.12.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.31.mlp.experts.12.up_proj.weight": "pytorch_model-00071-of-00080.bin", - "model.layers.31.mlp.experts.13.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.31.mlp.experts.13.down_proj.weight": "pytorch_model-00071-of-00080.bin", - "model.layers.31.mlp.experts.13.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.31.mlp.experts.13.gate_proj.weight": "pytorch_model-00071-of-00080.bin", - "model.layers.31.mlp.experts.13.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.31.mlp.experts.13.up_proj.weight": "pytorch_model-00071-of-00080.bin", - "model.layers.31.mlp.experts.14.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.31.mlp.experts.14.down_proj.weight": "pytorch_model-00071-of-00080.bin", - "model.layers.31.mlp.experts.14.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.31.mlp.experts.14.gate_proj.weight": "pytorch_model-00071-of-00080.bin", - "model.layers.31.mlp.experts.14.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.31.mlp.experts.14.up_proj.weight": "pytorch_model-00071-of-00080.bin", - "model.layers.31.mlp.experts.15.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.31.mlp.experts.15.down_proj.weight": "pytorch_model-00071-of-00080.bin", - "model.layers.31.mlp.experts.15.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.31.mlp.experts.15.gate_proj.weight": "pytorch_model-00071-of-00080.bin", - "model.layers.31.mlp.experts.15.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.31.mlp.experts.15.up_proj.weight": "pytorch_model-00071-of-00080.bin", - "model.layers.31.mlp.experts.2.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.31.mlp.experts.2.down_proj.weight": "pytorch_model-00070-of-00080.bin", - "model.layers.31.mlp.experts.2.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.31.mlp.experts.2.gate_proj.weight": "pytorch_model-00070-of-00080.bin", - "model.layers.31.mlp.experts.2.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.31.mlp.experts.2.up_proj.weight": "pytorch_model-00070-of-00080.bin", - "model.layers.31.mlp.experts.3.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.31.mlp.experts.3.down_proj.weight": "pytorch_model-00070-of-00080.bin", - "model.layers.31.mlp.experts.3.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.31.mlp.experts.3.gate_proj.weight": "pytorch_model-00070-of-00080.bin", - "model.layers.31.mlp.experts.3.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.31.mlp.experts.3.up_proj.weight": "pytorch_model-00070-of-00080.bin", - "model.layers.31.mlp.experts.4.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.31.mlp.experts.4.down_proj.weight": "pytorch_model-00070-of-00080.bin", - "model.layers.31.mlp.experts.4.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.31.mlp.experts.4.gate_proj.weight": "pytorch_model-00070-of-00080.bin", - "model.layers.31.mlp.experts.4.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.31.mlp.experts.4.up_proj.weight": "pytorch_model-00070-of-00080.bin", - "model.layers.31.mlp.experts.5.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.31.mlp.experts.5.down_proj.weight": "pytorch_model-00070-of-00080.bin", - "model.layers.31.mlp.experts.5.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.31.mlp.experts.5.gate_proj.weight": "pytorch_model-00070-of-00080.bin", - "model.layers.31.mlp.experts.5.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.31.mlp.experts.5.up_proj.weight": "pytorch_model-00070-of-00080.bin", - "model.layers.31.mlp.experts.6.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.31.mlp.experts.6.down_proj.weight": "pytorch_model-00070-of-00080.bin", - "model.layers.31.mlp.experts.6.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.31.mlp.experts.6.gate_proj.weight": "pytorch_model-00070-of-00080.bin", - "model.layers.31.mlp.experts.6.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.31.mlp.experts.6.up_proj.weight": "pytorch_model-00070-of-00080.bin", - "model.layers.31.mlp.experts.7.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.31.mlp.experts.7.down_proj.weight": "pytorch_model-00071-of-00080.bin", - "model.layers.31.mlp.experts.7.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.31.mlp.experts.7.gate_proj.weight": "pytorch_model-00071-of-00080.bin", - "model.layers.31.mlp.experts.7.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.31.mlp.experts.7.up_proj.weight": "pytorch_model-00071-of-00080.bin", - "model.layers.31.mlp.experts.8.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.31.mlp.experts.8.down_proj.weight": "pytorch_model-00071-of-00080.bin", - "model.layers.31.mlp.experts.8.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.31.mlp.experts.8.gate_proj.weight": "pytorch_model-00071-of-00080.bin", - "model.layers.31.mlp.experts.8.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.31.mlp.experts.8.up_proj.weight": "pytorch_model-00071-of-00080.bin", - "model.layers.31.mlp.experts.9.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.31.mlp.experts.9.down_proj.weight": "pytorch_model-00071-of-00080.bin", - "model.layers.31.mlp.experts.9.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.31.mlp.experts.9.gate_proj.weight": "pytorch_model-00071-of-00080.bin", - "model.layers.31.mlp.experts.9.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.31.mlp.experts.9.up_proj.weight": "pytorch_model-00071-of-00080.bin", - "model.layers.31.mlp.gate.wg.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.31.mlp.shared_mlp.down_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.31.mlp.shared_mlp.down_proj.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.31.mlp.shared_mlp.gate_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.31.mlp.shared_mlp.gate_proj.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.31.mlp.shared_mlp.up_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.31.mlp.shared_mlp.up_proj.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.31.post_attention_layernorm.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.31.self_attn.key_layernorm.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.31.self_attn.o_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.31.self_attn.o_proj.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.31.self_attn.q_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.31.self_attn.q_proj.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.31.self_attn.query_layernorm.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.32.input_layernorm.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.32.mlp.experts.0.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.32.mlp.experts.0.down_proj.weight": "pytorch_model-00011-of-00080.bin", - "model.layers.32.mlp.experts.0.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.32.mlp.experts.0.gate_proj.weight": "pytorch_model-00011-of-00080.bin", - "model.layers.32.mlp.experts.0.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.32.mlp.experts.0.up_proj.weight": "pytorch_model-00011-of-00080.bin", - "model.layers.32.mlp.experts.1.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.32.mlp.experts.1.down_proj.weight": "pytorch_model-00011-of-00080.bin", - "model.layers.32.mlp.experts.1.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.32.mlp.experts.1.gate_proj.weight": "pytorch_model-00011-of-00080.bin", - "model.layers.32.mlp.experts.1.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.32.mlp.experts.1.up_proj.weight": "pytorch_model-00011-of-00080.bin", - "model.layers.32.mlp.experts.10.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.32.mlp.experts.10.down_proj.weight": "pytorch_model-00012-of-00080.bin", - "model.layers.32.mlp.experts.10.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.32.mlp.experts.10.gate_proj.weight": "pytorch_model-00012-of-00080.bin", - "model.layers.32.mlp.experts.10.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.32.mlp.experts.10.up_proj.weight": "pytorch_model-00012-of-00080.bin", - "model.layers.32.mlp.experts.11.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.32.mlp.experts.11.down_proj.weight": "pytorch_model-00012-of-00080.bin", - "model.layers.32.mlp.experts.11.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.32.mlp.experts.11.gate_proj.weight": "pytorch_model-00012-of-00080.bin", - "model.layers.32.mlp.experts.11.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.32.mlp.experts.11.up_proj.weight": "pytorch_model-00012-of-00080.bin", - "model.layers.32.mlp.experts.12.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.32.mlp.experts.12.down_proj.weight": "pytorch_model-00012-of-00080.bin", - "model.layers.32.mlp.experts.12.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.32.mlp.experts.12.gate_proj.weight": "pytorch_model-00012-of-00080.bin", - "model.layers.32.mlp.experts.12.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.32.mlp.experts.12.up_proj.weight": "pytorch_model-00012-of-00080.bin", - "model.layers.32.mlp.experts.13.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.32.mlp.experts.13.down_proj.weight": "pytorch_model-00012-of-00080.bin", - "model.layers.32.mlp.experts.13.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.32.mlp.experts.13.gate_proj.weight": "pytorch_model-00012-of-00080.bin", - "model.layers.32.mlp.experts.13.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.32.mlp.experts.13.up_proj.weight": "pytorch_model-00012-of-00080.bin", - "model.layers.32.mlp.experts.14.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.32.mlp.experts.14.down_proj.weight": "pytorch_model-00012-of-00080.bin", - "model.layers.32.mlp.experts.14.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.32.mlp.experts.14.gate_proj.weight": "pytorch_model-00012-of-00080.bin", - "model.layers.32.mlp.experts.14.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.32.mlp.experts.14.up_proj.weight": "pytorch_model-00012-of-00080.bin", - "model.layers.32.mlp.experts.15.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.32.mlp.experts.15.down_proj.weight": "pytorch_model-00012-of-00080.bin", - "model.layers.32.mlp.experts.15.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.32.mlp.experts.15.gate_proj.weight": "pytorch_model-00012-of-00080.bin", - "model.layers.32.mlp.experts.15.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.32.mlp.experts.15.up_proj.weight": "pytorch_model-00012-of-00080.bin", - "model.layers.32.mlp.experts.2.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.32.mlp.experts.2.down_proj.weight": "pytorch_model-00011-of-00080.bin", - "model.layers.32.mlp.experts.2.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.32.mlp.experts.2.gate_proj.weight": "pytorch_model-00011-of-00080.bin", - "model.layers.32.mlp.experts.2.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.32.mlp.experts.2.up_proj.weight": "pytorch_model-00011-of-00080.bin", - "model.layers.32.mlp.experts.3.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.32.mlp.experts.3.down_proj.weight": "pytorch_model-00011-of-00080.bin", - "model.layers.32.mlp.experts.3.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.32.mlp.experts.3.gate_proj.weight": "pytorch_model-00011-of-00080.bin", - "model.layers.32.mlp.experts.3.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.32.mlp.experts.3.up_proj.weight": "pytorch_model-00011-of-00080.bin", - "model.layers.32.mlp.experts.4.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.32.mlp.experts.4.down_proj.weight": "pytorch_model-00011-of-00080.bin", - "model.layers.32.mlp.experts.4.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.32.mlp.experts.4.gate_proj.weight": "pytorch_model-00011-of-00080.bin", - "model.layers.32.mlp.experts.4.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.32.mlp.experts.4.up_proj.weight": "pytorch_model-00011-of-00080.bin", - "model.layers.32.mlp.experts.5.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.32.mlp.experts.5.down_proj.weight": "pytorch_model-00012-of-00080.bin", - "model.layers.32.mlp.experts.5.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.32.mlp.experts.5.gate_proj.weight": "pytorch_model-00012-of-00080.bin", - "model.layers.32.mlp.experts.5.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.32.mlp.experts.5.up_proj.weight": "pytorch_model-00012-of-00080.bin", - "model.layers.32.mlp.experts.6.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.32.mlp.experts.6.down_proj.weight": "pytorch_model-00012-of-00080.bin", - "model.layers.32.mlp.experts.6.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.32.mlp.experts.6.gate_proj.weight": "pytorch_model-00012-of-00080.bin", - "model.layers.32.mlp.experts.6.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.32.mlp.experts.6.up_proj.weight": "pytorch_model-00012-of-00080.bin", - "model.layers.32.mlp.experts.7.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.32.mlp.experts.7.down_proj.weight": "pytorch_model-00012-of-00080.bin", - "model.layers.32.mlp.experts.7.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.32.mlp.experts.7.gate_proj.weight": "pytorch_model-00012-of-00080.bin", - "model.layers.32.mlp.experts.7.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.32.mlp.experts.7.up_proj.weight": "pytorch_model-00012-of-00080.bin", - "model.layers.32.mlp.experts.8.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.32.mlp.experts.8.down_proj.weight": "pytorch_model-00012-of-00080.bin", - "model.layers.32.mlp.experts.8.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.32.mlp.experts.8.gate_proj.weight": "pytorch_model-00012-of-00080.bin", - "model.layers.32.mlp.experts.8.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.32.mlp.experts.8.up_proj.weight": "pytorch_model-00012-of-00080.bin", - "model.layers.32.mlp.experts.9.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.32.mlp.experts.9.down_proj.weight": "pytorch_model-00012-of-00080.bin", - "model.layers.32.mlp.experts.9.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.32.mlp.experts.9.gate_proj.weight": "pytorch_model-00012-of-00080.bin", - "model.layers.32.mlp.experts.9.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.32.mlp.experts.9.up_proj.weight": "pytorch_model-00012-of-00080.bin", - "model.layers.32.mlp.gate.wg.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.32.mlp.shared_mlp.down_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.32.mlp.shared_mlp.down_proj.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.32.mlp.shared_mlp.gate_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.32.mlp.shared_mlp.gate_proj.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.32.mlp.shared_mlp.up_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.32.mlp.shared_mlp.up_proj.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.32.post_attention_layernorm.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.32.self_attn.k_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.32.self_attn.k_proj.weight": "pytorch_model-00020-of-00080.bin", - "model.layers.32.self_attn.key_layernorm.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.32.self_attn.o_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.32.self_attn.o_proj.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.32.self_attn.q_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.32.self_attn.q_proj.weight": "pytorch_model-00020-of-00080.bin", - "model.layers.32.self_attn.query_layernorm.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.32.self_attn.v_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.32.self_attn.v_proj.weight": "pytorch_model-00020-of-00080.bin", - "model.layers.33.input_layernorm.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.33.mlp.experts.0.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.33.mlp.experts.0.down_proj.weight": "pytorch_model-00012-of-00080.bin", - "model.layers.33.mlp.experts.0.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.33.mlp.experts.0.gate_proj.weight": "pytorch_model-00012-of-00080.bin", - "model.layers.33.mlp.experts.0.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.33.mlp.experts.0.up_proj.weight": "pytorch_model-00012-of-00080.bin", - "model.layers.33.mlp.experts.1.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.33.mlp.experts.1.down_proj.weight": "pytorch_model-00012-of-00080.bin", - "model.layers.33.mlp.experts.1.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.33.mlp.experts.1.gate_proj.weight": "pytorch_model-00012-of-00080.bin", - "model.layers.33.mlp.experts.1.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.33.mlp.experts.1.up_proj.weight": "pytorch_model-00012-of-00080.bin", - "model.layers.33.mlp.experts.10.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.33.mlp.experts.10.down_proj.weight": "pytorch_model-00013-of-00080.bin", - "model.layers.33.mlp.experts.10.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.33.mlp.experts.10.gate_proj.weight": "pytorch_model-00013-of-00080.bin", - "model.layers.33.mlp.experts.10.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.33.mlp.experts.10.up_proj.weight": "pytorch_model-00013-of-00080.bin", - "model.layers.33.mlp.experts.11.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.33.mlp.experts.11.down_proj.weight": "pytorch_model-00013-of-00080.bin", - "model.layers.33.mlp.experts.11.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.33.mlp.experts.11.gate_proj.weight": "pytorch_model-00013-of-00080.bin", - "model.layers.33.mlp.experts.11.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.33.mlp.experts.11.up_proj.weight": "pytorch_model-00013-of-00080.bin", - "model.layers.33.mlp.experts.12.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.33.mlp.experts.12.down_proj.weight": "pytorch_model-00013-of-00080.bin", - "model.layers.33.mlp.experts.12.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.33.mlp.experts.12.gate_proj.weight": "pytorch_model-00013-of-00080.bin", - "model.layers.33.mlp.experts.12.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.33.mlp.experts.12.up_proj.weight": "pytorch_model-00013-of-00080.bin", - "model.layers.33.mlp.experts.13.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.33.mlp.experts.13.down_proj.weight": "pytorch_model-00013-of-00080.bin", - "model.layers.33.mlp.experts.13.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.33.mlp.experts.13.gate_proj.weight": "pytorch_model-00013-of-00080.bin", - "model.layers.33.mlp.experts.13.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.33.mlp.experts.13.up_proj.weight": "pytorch_model-00013-of-00080.bin", - "model.layers.33.mlp.experts.14.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.33.mlp.experts.14.down_proj.weight": "pytorch_model-00013-of-00080.bin", - "model.layers.33.mlp.experts.14.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.33.mlp.experts.14.gate_proj.weight": "pytorch_model-00013-of-00080.bin", - "model.layers.33.mlp.experts.14.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.33.mlp.experts.14.up_proj.weight": "pytorch_model-00013-of-00080.bin", - "model.layers.33.mlp.experts.15.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.33.mlp.experts.15.down_proj.weight": "pytorch_model-00013-of-00080.bin", - "model.layers.33.mlp.experts.15.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.33.mlp.experts.15.gate_proj.weight": "pytorch_model-00013-of-00080.bin", - "model.layers.33.mlp.experts.15.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.33.mlp.experts.15.up_proj.weight": "pytorch_model-00013-of-00080.bin", - "model.layers.33.mlp.experts.2.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.33.mlp.experts.2.down_proj.weight": "pytorch_model-00012-of-00080.bin", - "model.layers.33.mlp.experts.2.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.33.mlp.experts.2.gate_proj.weight": "pytorch_model-00012-of-00080.bin", - "model.layers.33.mlp.experts.2.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.33.mlp.experts.2.up_proj.weight": "pytorch_model-00012-of-00080.bin", - "model.layers.33.mlp.experts.3.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.33.mlp.experts.3.down_proj.weight": "pytorch_model-00013-of-00080.bin", - "model.layers.33.mlp.experts.3.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.33.mlp.experts.3.gate_proj.weight": "pytorch_model-00013-of-00080.bin", - "model.layers.33.mlp.experts.3.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.33.mlp.experts.3.up_proj.weight": "pytorch_model-00013-of-00080.bin", - "model.layers.33.mlp.experts.4.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.33.mlp.experts.4.down_proj.weight": "pytorch_model-00013-of-00080.bin", - "model.layers.33.mlp.experts.4.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.33.mlp.experts.4.gate_proj.weight": "pytorch_model-00013-of-00080.bin", - "model.layers.33.mlp.experts.4.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.33.mlp.experts.4.up_proj.weight": "pytorch_model-00013-of-00080.bin", - "model.layers.33.mlp.experts.5.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.33.mlp.experts.5.down_proj.weight": "pytorch_model-00013-of-00080.bin", - "model.layers.33.mlp.experts.5.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.33.mlp.experts.5.gate_proj.weight": "pytorch_model-00013-of-00080.bin", - "model.layers.33.mlp.experts.5.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.33.mlp.experts.5.up_proj.weight": "pytorch_model-00013-of-00080.bin", - "model.layers.33.mlp.experts.6.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.33.mlp.experts.6.down_proj.weight": "pytorch_model-00013-of-00080.bin", - "model.layers.33.mlp.experts.6.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.33.mlp.experts.6.gate_proj.weight": "pytorch_model-00013-of-00080.bin", - "model.layers.33.mlp.experts.6.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.33.mlp.experts.6.up_proj.weight": "pytorch_model-00013-of-00080.bin", - "model.layers.33.mlp.experts.7.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.33.mlp.experts.7.down_proj.weight": "pytorch_model-00013-of-00080.bin", - "model.layers.33.mlp.experts.7.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.33.mlp.experts.7.gate_proj.weight": "pytorch_model-00013-of-00080.bin", - "model.layers.33.mlp.experts.7.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.33.mlp.experts.7.up_proj.weight": "pytorch_model-00013-of-00080.bin", - "model.layers.33.mlp.experts.8.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.33.mlp.experts.8.down_proj.weight": "pytorch_model-00013-of-00080.bin", - "model.layers.33.mlp.experts.8.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.33.mlp.experts.8.gate_proj.weight": "pytorch_model-00013-of-00080.bin", - "model.layers.33.mlp.experts.8.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.33.mlp.experts.8.up_proj.weight": "pytorch_model-00013-of-00080.bin", - "model.layers.33.mlp.experts.9.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.33.mlp.experts.9.down_proj.weight": "pytorch_model-00013-of-00080.bin", - "model.layers.33.mlp.experts.9.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.33.mlp.experts.9.gate_proj.weight": "pytorch_model-00013-of-00080.bin", - "model.layers.33.mlp.experts.9.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.33.mlp.experts.9.up_proj.weight": "pytorch_model-00013-of-00080.bin", - "model.layers.33.mlp.gate.wg.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.33.mlp.shared_mlp.down_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.33.mlp.shared_mlp.down_proj.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.33.mlp.shared_mlp.gate_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.33.mlp.shared_mlp.gate_proj.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.33.mlp.shared_mlp.up_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.33.mlp.shared_mlp.up_proj.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.33.post_attention_layernorm.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.33.self_attn.key_layernorm.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.33.self_attn.o_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.33.self_attn.o_proj.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.33.self_attn.q_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.33.self_attn.q_proj.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.33.self_attn.query_layernorm.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.34.input_layernorm.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.34.mlp.experts.0.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.34.mlp.experts.0.down_proj.weight": "pytorch_model-00031-of-00080.bin", - "model.layers.34.mlp.experts.0.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.34.mlp.experts.0.gate_proj.weight": "pytorch_model-00031-of-00080.bin", - "model.layers.34.mlp.experts.0.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.34.mlp.experts.0.up_proj.weight": "pytorch_model-00031-of-00080.bin", - "model.layers.34.mlp.experts.1.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.34.mlp.experts.1.down_proj.weight": "pytorch_model-00031-of-00080.bin", - "model.layers.34.mlp.experts.1.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.34.mlp.experts.1.gate_proj.weight": "pytorch_model-00031-of-00080.bin", - "model.layers.34.mlp.experts.1.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.34.mlp.experts.1.up_proj.weight": "pytorch_model-00031-of-00080.bin", - "model.layers.34.mlp.experts.10.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.34.mlp.experts.10.down_proj.weight": "pytorch_model-00032-of-00080.bin", - "model.layers.34.mlp.experts.10.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.34.mlp.experts.10.gate_proj.weight": "pytorch_model-00032-of-00080.bin", - "model.layers.34.mlp.experts.10.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.34.mlp.experts.10.up_proj.weight": "pytorch_model-00032-of-00080.bin", - "model.layers.34.mlp.experts.11.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.34.mlp.experts.11.down_proj.weight": "pytorch_model-00032-of-00080.bin", - "model.layers.34.mlp.experts.11.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.34.mlp.experts.11.gate_proj.weight": "pytorch_model-00032-of-00080.bin", - "model.layers.34.mlp.experts.11.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.34.mlp.experts.11.up_proj.weight": "pytorch_model-00032-of-00080.bin", - "model.layers.34.mlp.experts.12.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.34.mlp.experts.12.down_proj.weight": "pytorch_model-00032-of-00080.bin", - "model.layers.34.mlp.experts.12.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.34.mlp.experts.12.gate_proj.weight": "pytorch_model-00032-of-00080.bin", - "model.layers.34.mlp.experts.12.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.34.mlp.experts.12.up_proj.weight": "pytorch_model-00032-of-00080.bin", - "model.layers.34.mlp.experts.13.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.34.mlp.experts.13.down_proj.weight": "pytorch_model-00032-of-00080.bin", - "model.layers.34.mlp.experts.13.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.34.mlp.experts.13.gate_proj.weight": "pytorch_model-00032-of-00080.bin", - "model.layers.34.mlp.experts.13.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.34.mlp.experts.13.up_proj.weight": "pytorch_model-00032-of-00080.bin", - "model.layers.34.mlp.experts.14.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.34.mlp.experts.14.down_proj.weight": "pytorch_model-00032-of-00080.bin", - "model.layers.34.mlp.experts.14.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.34.mlp.experts.14.gate_proj.weight": "pytorch_model-00032-of-00080.bin", - "model.layers.34.mlp.experts.14.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.34.mlp.experts.14.up_proj.weight": "pytorch_model-00032-of-00080.bin", - "model.layers.34.mlp.experts.15.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.34.mlp.experts.15.down_proj.weight": "pytorch_model-00032-of-00080.bin", - "model.layers.34.mlp.experts.15.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.34.mlp.experts.15.gate_proj.weight": "pytorch_model-00032-of-00080.bin", - "model.layers.34.mlp.experts.15.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.34.mlp.experts.15.up_proj.weight": "pytorch_model-00032-of-00080.bin", - "model.layers.34.mlp.experts.2.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.34.mlp.experts.2.down_proj.weight": "pytorch_model-00031-of-00080.bin", - "model.layers.34.mlp.experts.2.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.34.mlp.experts.2.gate_proj.weight": "pytorch_model-00031-of-00080.bin", - "model.layers.34.mlp.experts.2.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.34.mlp.experts.2.up_proj.weight": "pytorch_model-00031-of-00080.bin", - "model.layers.34.mlp.experts.3.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.34.mlp.experts.3.down_proj.weight": "pytorch_model-00031-of-00080.bin", - "model.layers.34.mlp.experts.3.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.34.mlp.experts.3.gate_proj.weight": "pytorch_model-00031-of-00080.bin", - "model.layers.34.mlp.experts.3.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.34.mlp.experts.3.up_proj.weight": "pytorch_model-00031-of-00080.bin", - "model.layers.34.mlp.experts.4.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.34.mlp.experts.4.down_proj.weight": "pytorch_model-00031-of-00080.bin", - "model.layers.34.mlp.experts.4.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.34.mlp.experts.4.gate_proj.weight": "pytorch_model-00031-of-00080.bin", - "model.layers.34.mlp.experts.4.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.34.mlp.experts.4.up_proj.weight": "pytorch_model-00031-of-00080.bin", - "model.layers.34.mlp.experts.5.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.34.mlp.experts.5.down_proj.weight": "pytorch_model-00031-of-00080.bin", - "model.layers.34.mlp.experts.5.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.34.mlp.experts.5.gate_proj.weight": "pytorch_model-00031-of-00080.bin", - "model.layers.34.mlp.experts.5.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.34.mlp.experts.5.up_proj.weight": "pytorch_model-00031-of-00080.bin", - "model.layers.34.mlp.experts.6.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.34.mlp.experts.6.down_proj.weight": "pytorch_model-00031-of-00080.bin", - "model.layers.34.mlp.experts.6.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.34.mlp.experts.6.gate_proj.weight": "pytorch_model-00031-of-00080.bin", - "model.layers.34.mlp.experts.6.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.34.mlp.experts.6.up_proj.weight": "pytorch_model-00031-of-00080.bin", - "model.layers.34.mlp.experts.7.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.34.mlp.experts.7.down_proj.weight": "pytorch_model-00032-of-00080.bin", - "model.layers.34.mlp.experts.7.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.34.mlp.experts.7.gate_proj.weight": "pytorch_model-00032-of-00080.bin", - "model.layers.34.mlp.experts.7.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.34.mlp.experts.7.up_proj.weight": "pytorch_model-00031-of-00080.bin", - "model.layers.34.mlp.experts.8.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.34.mlp.experts.8.down_proj.weight": "pytorch_model-00032-of-00080.bin", - "model.layers.34.mlp.experts.8.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.34.mlp.experts.8.gate_proj.weight": "pytorch_model-00032-of-00080.bin", - "model.layers.34.mlp.experts.8.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.34.mlp.experts.8.up_proj.weight": "pytorch_model-00032-of-00080.bin", - "model.layers.34.mlp.experts.9.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.34.mlp.experts.9.down_proj.weight": "pytorch_model-00032-of-00080.bin", - "model.layers.34.mlp.experts.9.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.34.mlp.experts.9.gate_proj.weight": "pytorch_model-00032-of-00080.bin", - "model.layers.34.mlp.experts.9.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.34.mlp.experts.9.up_proj.weight": "pytorch_model-00032-of-00080.bin", - "model.layers.34.mlp.gate.wg.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.34.mlp.shared_mlp.down_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.34.mlp.shared_mlp.down_proj.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.34.mlp.shared_mlp.gate_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.34.mlp.shared_mlp.gate_proj.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.34.mlp.shared_mlp.up_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.34.mlp.shared_mlp.up_proj.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.34.post_attention_layernorm.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.34.self_attn.k_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.34.self_attn.k_proj.weight": "pytorch_model-00040-of-00080.bin", - "model.layers.34.self_attn.key_layernorm.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.34.self_attn.o_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.34.self_attn.o_proj.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.34.self_attn.q_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.34.self_attn.q_proj.weight": "pytorch_model-00040-of-00080.bin", - "model.layers.34.self_attn.query_layernorm.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.34.self_attn.v_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.34.self_attn.v_proj.weight": "pytorch_model-00040-of-00080.bin", - "model.layers.35.input_layernorm.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.35.mlp.experts.0.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.35.mlp.experts.0.down_proj.weight": "pytorch_model-00032-of-00080.bin", - "model.layers.35.mlp.experts.0.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.35.mlp.experts.0.gate_proj.weight": "pytorch_model-00032-of-00080.bin", - "model.layers.35.mlp.experts.0.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.35.mlp.experts.0.up_proj.weight": "pytorch_model-00032-of-00080.bin", - "model.layers.35.mlp.experts.1.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.35.mlp.experts.1.down_proj.weight": "pytorch_model-00032-of-00080.bin", - "model.layers.35.mlp.experts.1.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.35.mlp.experts.1.gate_proj.weight": "pytorch_model-00032-of-00080.bin", - "model.layers.35.mlp.experts.1.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.35.mlp.experts.1.up_proj.weight": "pytorch_model-00032-of-00080.bin", - "model.layers.35.mlp.experts.10.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.35.mlp.experts.10.down_proj.weight": "pytorch_model-00033-of-00080.bin", - "model.layers.35.mlp.experts.10.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.35.mlp.experts.10.gate_proj.weight": "pytorch_model-00033-of-00080.bin", - "model.layers.35.mlp.experts.10.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.35.mlp.experts.10.up_proj.weight": "pytorch_model-00033-of-00080.bin", - "model.layers.35.mlp.experts.11.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.35.mlp.experts.11.down_proj.weight": "pytorch_model-00033-of-00080.bin", - "model.layers.35.mlp.experts.11.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.35.mlp.experts.11.gate_proj.weight": "pytorch_model-00033-of-00080.bin", - "model.layers.35.mlp.experts.11.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.35.mlp.experts.11.up_proj.weight": "pytorch_model-00033-of-00080.bin", - "model.layers.35.mlp.experts.12.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.35.mlp.experts.12.down_proj.weight": "pytorch_model-00033-of-00080.bin", - "model.layers.35.mlp.experts.12.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.35.mlp.experts.12.gate_proj.weight": "pytorch_model-00033-of-00080.bin", - "model.layers.35.mlp.experts.12.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.35.mlp.experts.12.up_proj.weight": "pytorch_model-00033-of-00080.bin", - "model.layers.35.mlp.experts.13.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.35.mlp.experts.13.down_proj.weight": "pytorch_model-00033-of-00080.bin", - "model.layers.35.mlp.experts.13.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.35.mlp.experts.13.gate_proj.weight": "pytorch_model-00033-of-00080.bin", - "model.layers.35.mlp.experts.13.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.35.mlp.experts.13.up_proj.weight": "pytorch_model-00033-of-00080.bin", - "model.layers.35.mlp.experts.14.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.35.mlp.experts.14.down_proj.weight": "pytorch_model-00033-of-00080.bin", - "model.layers.35.mlp.experts.14.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.35.mlp.experts.14.gate_proj.weight": "pytorch_model-00033-of-00080.bin", - "model.layers.35.mlp.experts.14.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.35.mlp.experts.14.up_proj.weight": "pytorch_model-00033-of-00080.bin", - "model.layers.35.mlp.experts.15.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.35.mlp.experts.15.down_proj.weight": "pytorch_model-00033-of-00080.bin", - "model.layers.35.mlp.experts.15.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.35.mlp.experts.15.gate_proj.weight": "pytorch_model-00033-of-00080.bin", - "model.layers.35.mlp.experts.15.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.35.mlp.experts.15.up_proj.weight": "pytorch_model-00033-of-00080.bin", - "model.layers.35.mlp.experts.2.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.35.mlp.experts.2.down_proj.weight": "pytorch_model-00032-of-00080.bin", - "model.layers.35.mlp.experts.2.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.35.mlp.experts.2.gate_proj.weight": "pytorch_model-00032-of-00080.bin", - "model.layers.35.mlp.experts.2.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.35.mlp.experts.2.up_proj.weight": "pytorch_model-00032-of-00080.bin", - "model.layers.35.mlp.experts.3.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.35.mlp.experts.3.down_proj.weight": "pytorch_model-00032-of-00080.bin", - "model.layers.35.mlp.experts.3.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.35.mlp.experts.3.gate_proj.weight": "pytorch_model-00032-of-00080.bin", - "model.layers.35.mlp.experts.3.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.35.mlp.experts.3.up_proj.weight": "pytorch_model-00032-of-00080.bin", - "model.layers.35.mlp.experts.4.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.35.mlp.experts.4.down_proj.weight": "pytorch_model-00032-of-00080.bin", - "model.layers.35.mlp.experts.4.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.35.mlp.experts.4.gate_proj.weight": "pytorch_model-00032-of-00080.bin", - "model.layers.35.mlp.experts.4.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.35.mlp.experts.4.up_proj.weight": "pytorch_model-00032-of-00080.bin", - "model.layers.35.mlp.experts.5.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.35.mlp.experts.5.down_proj.weight": "pytorch_model-00033-of-00080.bin", - "model.layers.35.mlp.experts.5.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.35.mlp.experts.5.gate_proj.weight": "pytorch_model-00033-of-00080.bin", - "model.layers.35.mlp.experts.5.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.35.mlp.experts.5.up_proj.weight": "pytorch_model-00032-of-00080.bin", - "model.layers.35.mlp.experts.6.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.35.mlp.experts.6.down_proj.weight": "pytorch_model-00033-of-00080.bin", - "model.layers.35.mlp.experts.6.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.35.mlp.experts.6.gate_proj.weight": "pytorch_model-00033-of-00080.bin", - "model.layers.35.mlp.experts.6.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.35.mlp.experts.6.up_proj.weight": "pytorch_model-00033-of-00080.bin", - "model.layers.35.mlp.experts.7.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.35.mlp.experts.7.down_proj.weight": "pytorch_model-00033-of-00080.bin", - "model.layers.35.mlp.experts.7.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.35.mlp.experts.7.gate_proj.weight": "pytorch_model-00033-of-00080.bin", - "model.layers.35.mlp.experts.7.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.35.mlp.experts.7.up_proj.weight": "pytorch_model-00033-of-00080.bin", - "model.layers.35.mlp.experts.8.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.35.mlp.experts.8.down_proj.weight": "pytorch_model-00033-of-00080.bin", - "model.layers.35.mlp.experts.8.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.35.mlp.experts.8.gate_proj.weight": "pytorch_model-00033-of-00080.bin", - "model.layers.35.mlp.experts.8.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.35.mlp.experts.8.up_proj.weight": "pytorch_model-00033-of-00080.bin", - "model.layers.35.mlp.experts.9.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.35.mlp.experts.9.down_proj.weight": "pytorch_model-00033-of-00080.bin", - "model.layers.35.mlp.experts.9.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.35.mlp.experts.9.gate_proj.weight": "pytorch_model-00033-of-00080.bin", - "model.layers.35.mlp.experts.9.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.35.mlp.experts.9.up_proj.weight": "pytorch_model-00033-of-00080.bin", - "model.layers.35.mlp.gate.wg.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.35.mlp.shared_mlp.down_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.35.mlp.shared_mlp.down_proj.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.35.mlp.shared_mlp.gate_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.35.mlp.shared_mlp.gate_proj.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.35.mlp.shared_mlp.up_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.35.mlp.shared_mlp.up_proj.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.35.post_attention_layernorm.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.35.self_attn.key_layernorm.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.35.self_attn.o_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.35.self_attn.o_proj.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.35.self_attn.q_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.35.self_attn.q_proj.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.35.self_attn.query_layernorm.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.36.input_layernorm.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.36.mlp.experts.0.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.36.mlp.experts.0.down_proj.weight": "pytorch_model-00051-of-00080.bin", - "model.layers.36.mlp.experts.0.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.36.mlp.experts.0.gate_proj.weight": "pytorch_model-00051-of-00080.bin", - "model.layers.36.mlp.experts.0.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.36.mlp.experts.0.up_proj.weight": "pytorch_model-00051-of-00080.bin", - "model.layers.36.mlp.experts.1.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.36.mlp.experts.1.down_proj.weight": "pytorch_model-00051-of-00080.bin", - "model.layers.36.mlp.experts.1.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.36.mlp.experts.1.gate_proj.weight": "pytorch_model-00051-of-00080.bin", - "model.layers.36.mlp.experts.1.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.36.mlp.experts.1.up_proj.weight": "pytorch_model-00051-of-00080.bin", - "model.layers.36.mlp.experts.10.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.36.mlp.experts.10.down_proj.weight": "pytorch_model-00052-of-00080.bin", - "model.layers.36.mlp.experts.10.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.36.mlp.experts.10.gate_proj.weight": "pytorch_model-00052-of-00080.bin", - "model.layers.36.mlp.experts.10.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.36.mlp.experts.10.up_proj.weight": "pytorch_model-00052-of-00080.bin", - "model.layers.36.mlp.experts.11.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.36.mlp.experts.11.down_proj.weight": "pytorch_model-00052-of-00080.bin", - "model.layers.36.mlp.experts.11.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.36.mlp.experts.11.gate_proj.weight": "pytorch_model-00052-of-00080.bin", - "model.layers.36.mlp.experts.11.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.36.mlp.experts.11.up_proj.weight": "pytorch_model-00052-of-00080.bin", - "model.layers.36.mlp.experts.12.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.36.mlp.experts.12.down_proj.weight": "pytorch_model-00052-of-00080.bin", - "model.layers.36.mlp.experts.12.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.36.mlp.experts.12.gate_proj.weight": "pytorch_model-00052-of-00080.bin", - "model.layers.36.mlp.experts.12.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.36.mlp.experts.12.up_proj.weight": "pytorch_model-00052-of-00080.bin", - "model.layers.36.mlp.experts.13.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.36.mlp.experts.13.down_proj.weight": "pytorch_model-00052-of-00080.bin", - "model.layers.36.mlp.experts.13.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.36.mlp.experts.13.gate_proj.weight": "pytorch_model-00052-of-00080.bin", - "model.layers.36.mlp.experts.13.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.36.mlp.experts.13.up_proj.weight": "pytorch_model-00052-of-00080.bin", - "model.layers.36.mlp.experts.14.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.36.mlp.experts.14.down_proj.weight": "pytorch_model-00052-of-00080.bin", - "model.layers.36.mlp.experts.14.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.36.mlp.experts.14.gate_proj.weight": "pytorch_model-00052-of-00080.bin", - "model.layers.36.mlp.experts.14.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.36.mlp.experts.14.up_proj.weight": "pytorch_model-00052-of-00080.bin", - "model.layers.36.mlp.experts.15.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.36.mlp.experts.15.down_proj.weight": "pytorch_model-00052-of-00080.bin", - "model.layers.36.mlp.experts.15.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.36.mlp.experts.15.gate_proj.weight": "pytorch_model-00052-of-00080.bin", - "model.layers.36.mlp.experts.15.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.36.mlp.experts.15.up_proj.weight": "pytorch_model-00052-of-00080.bin", - "model.layers.36.mlp.experts.2.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.36.mlp.experts.2.down_proj.weight": "pytorch_model-00051-of-00080.bin", - "model.layers.36.mlp.experts.2.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.36.mlp.experts.2.gate_proj.weight": "pytorch_model-00051-of-00080.bin", - "model.layers.36.mlp.experts.2.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.36.mlp.experts.2.up_proj.weight": "pytorch_model-00051-of-00080.bin", - "model.layers.36.mlp.experts.3.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.36.mlp.experts.3.down_proj.weight": "pytorch_model-00051-of-00080.bin", - "model.layers.36.mlp.experts.3.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.36.mlp.experts.3.gate_proj.weight": "pytorch_model-00051-of-00080.bin", - "model.layers.36.mlp.experts.3.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.36.mlp.experts.3.up_proj.weight": "pytorch_model-00051-of-00080.bin", - "model.layers.36.mlp.experts.4.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.36.mlp.experts.4.down_proj.weight": "pytorch_model-00051-of-00080.bin", - "model.layers.36.mlp.experts.4.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.36.mlp.experts.4.gate_proj.weight": "pytorch_model-00051-of-00080.bin", - "model.layers.36.mlp.experts.4.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.36.mlp.experts.4.up_proj.weight": "pytorch_model-00051-of-00080.bin", - "model.layers.36.mlp.experts.5.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.36.mlp.experts.5.down_proj.weight": "pytorch_model-00051-of-00080.bin", - "model.layers.36.mlp.experts.5.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.36.mlp.experts.5.gate_proj.weight": "pytorch_model-00051-of-00080.bin", - "model.layers.36.mlp.experts.5.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.36.mlp.experts.5.up_proj.weight": "pytorch_model-00051-of-00080.bin", - "model.layers.36.mlp.experts.6.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.36.mlp.experts.6.down_proj.weight": "pytorch_model-00051-of-00080.bin", - "model.layers.36.mlp.experts.6.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.36.mlp.experts.6.gate_proj.weight": "pytorch_model-00051-of-00080.bin", - "model.layers.36.mlp.experts.6.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.36.mlp.experts.6.up_proj.weight": "pytorch_model-00051-of-00080.bin", - "model.layers.36.mlp.experts.7.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.36.mlp.experts.7.down_proj.weight": "pytorch_model-00052-of-00080.bin", - "model.layers.36.mlp.experts.7.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.36.mlp.experts.7.gate_proj.weight": "pytorch_model-00052-of-00080.bin", - "model.layers.36.mlp.experts.7.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.36.mlp.experts.7.up_proj.weight": "pytorch_model-00051-of-00080.bin", - "model.layers.36.mlp.experts.8.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.36.mlp.experts.8.down_proj.weight": "pytorch_model-00052-of-00080.bin", - "model.layers.36.mlp.experts.8.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.36.mlp.experts.8.gate_proj.weight": "pytorch_model-00052-of-00080.bin", - "model.layers.36.mlp.experts.8.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.36.mlp.experts.8.up_proj.weight": "pytorch_model-00052-of-00080.bin", - "model.layers.36.mlp.experts.9.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.36.mlp.experts.9.down_proj.weight": "pytorch_model-00052-of-00080.bin", - "model.layers.36.mlp.experts.9.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.36.mlp.experts.9.gate_proj.weight": "pytorch_model-00052-of-00080.bin", - "model.layers.36.mlp.experts.9.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.36.mlp.experts.9.up_proj.weight": "pytorch_model-00052-of-00080.bin", - "model.layers.36.mlp.gate.wg.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.36.mlp.shared_mlp.down_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.36.mlp.shared_mlp.down_proj.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.36.mlp.shared_mlp.gate_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.36.mlp.shared_mlp.gate_proj.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.36.mlp.shared_mlp.up_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.36.mlp.shared_mlp.up_proj.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.36.post_attention_layernorm.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.36.self_attn.k_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.36.self_attn.k_proj.weight": "pytorch_model-00060-of-00080.bin", - "model.layers.36.self_attn.key_layernorm.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.36.self_attn.o_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.36.self_attn.o_proj.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.36.self_attn.q_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.36.self_attn.q_proj.weight": "pytorch_model-00060-of-00080.bin", - "model.layers.36.self_attn.query_layernorm.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.36.self_attn.v_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.36.self_attn.v_proj.weight": "pytorch_model-00060-of-00080.bin", - "model.layers.37.input_layernorm.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.37.mlp.experts.0.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.37.mlp.experts.0.down_proj.weight": "pytorch_model-00052-of-00080.bin", - "model.layers.37.mlp.experts.0.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.37.mlp.experts.0.gate_proj.weight": "pytorch_model-00052-of-00080.bin", - "model.layers.37.mlp.experts.0.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.37.mlp.experts.0.up_proj.weight": "pytorch_model-00052-of-00080.bin", - "model.layers.37.mlp.experts.1.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.37.mlp.experts.1.down_proj.weight": "pytorch_model-00052-of-00080.bin", - "model.layers.37.mlp.experts.1.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.37.mlp.experts.1.gate_proj.weight": "pytorch_model-00052-of-00080.bin", - "model.layers.37.mlp.experts.1.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.37.mlp.experts.1.up_proj.weight": "pytorch_model-00052-of-00080.bin", - "model.layers.37.mlp.experts.10.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.37.mlp.experts.10.down_proj.weight": "pytorch_model-00053-of-00080.bin", - "model.layers.37.mlp.experts.10.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.37.mlp.experts.10.gate_proj.weight": "pytorch_model-00053-of-00080.bin", - "model.layers.37.mlp.experts.10.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.37.mlp.experts.10.up_proj.weight": "pytorch_model-00053-of-00080.bin", - "model.layers.37.mlp.experts.11.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.37.mlp.experts.11.down_proj.weight": "pytorch_model-00053-of-00080.bin", - "model.layers.37.mlp.experts.11.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.37.mlp.experts.11.gate_proj.weight": "pytorch_model-00053-of-00080.bin", - "model.layers.37.mlp.experts.11.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.37.mlp.experts.11.up_proj.weight": "pytorch_model-00053-of-00080.bin", - "model.layers.37.mlp.experts.12.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.37.mlp.experts.12.down_proj.weight": "pytorch_model-00053-of-00080.bin", - "model.layers.37.mlp.experts.12.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.37.mlp.experts.12.gate_proj.weight": "pytorch_model-00053-of-00080.bin", - "model.layers.37.mlp.experts.12.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.37.mlp.experts.12.up_proj.weight": "pytorch_model-00053-of-00080.bin", - "model.layers.37.mlp.experts.13.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.37.mlp.experts.13.down_proj.weight": "pytorch_model-00053-of-00080.bin", - "model.layers.37.mlp.experts.13.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.37.mlp.experts.13.gate_proj.weight": "pytorch_model-00053-of-00080.bin", - "model.layers.37.mlp.experts.13.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.37.mlp.experts.13.up_proj.weight": "pytorch_model-00053-of-00080.bin", - "model.layers.37.mlp.experts.14.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.37.mlp.experts.14.down_proj.weight": "pytorch_model-00053-of-00080.bin", - "model.layers.37.mlp.experts.14.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.37.mlp.experts.14.gate_proj.weight": "pytorch_model-00053-of-00080.bin", - "model.layers.37.mlp.experts.14.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.37.mlp.experts.14.up_proj.weight": "pytorch_model-00053-of-00080.bin", - "model.layers.37.mlp.experts.15.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.37.mlp.experts.15.down_proj.weight": "pytorch_model-00053-of-00080.bin", - "model.layers.37.mlp.experts.15.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.37.mlp.experts.15.gate_proj.weight": "pytorch_model-00053-of-00080.bin", - "model.layers.37.mlp.experts.15.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.37.mlp.experts.15.up_proj.weight": "pytorch_model-00053-of-00080.bin", - "model.layers.37.mlp.experts.2.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.37.mlp.experts.2.down_proj.weight": "pytorch_model-00052-of-00080.bin", - "model.layers.37.mlp.experts.2.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.37.mlp.experts.2.gate_proj.weight": "pytorch_model-00052-of-00080.bin", - "model.layers.37.mlp.experts.2.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.37.mlp.experts.2.up_proj.weight": "pytorch_model-00052-of-00080.bin", - "model.layers.37.mlp.experts.3.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.37.mlp.experts.3.down_proj.weight": "pytorch_model-00052-of-00080.bin", - "model.layers.37.mlp.experts.3.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.37.mlp.experts.3.gate_proj.weight": "pytorch_model-00052-of-00080.bin", - "model.layers.37.mlp.experts.3.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.37.mlp.experts.3.up_proj.weight": "pytorch_model-00052-of-00080.bin", - "model.layers.37.mlp.experts.4.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.37.mlp.experts.4.down_proj.weight": "pytorch_model-00052-of-00080.bin", - "model.layers.37.mlp.experts.4.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.37.mlp.experts.4.gate_proj.weight": "pytorch_model-00052-of-00080.bin", - "model.layers.37.mlp.experts.4.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.37.mlp.experts.4.up_proj.weight": "pytorch_model-00052-of-00080.bin", - "model.layers.37.mlp.experts.5.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.37.mlp.experts.5.down_proj.weight": "pytorch_model-00053-of-00080.bin", - "model.layers.37.mlp.experts.5.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.37.mlp.experts.5.gate_proj.weight": "pytorch_model-00053-of-00080.bin", - "model.layers.37.mlp.experts.5.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.37.mlp.experts.5.up_proj.weight": "pytorch_model-00052-of-00080.bin", - "model.layers.37.mlp.experts.6.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.37.mlp.experts.6.down_proj.weight": "pytorch_model-00053-of-00080.bin", - "model.layers.37.mlp.experts.6.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.37.mlp.experts.6.gate_proj.weight": "pytorch_model-00053-of-00080.bin", - "model.layers.37.mlp.experts.6.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.37.mlp.experts.6.up_proj.weight": "pytorch_model-00053-of-00080.bin", - "model.layers.37.mlp.experts.7.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.37.mlp.experts.7.down_proj.weight": "pytorch_model-00053-of-00080.bin", - "model.layers.37.mlp.experts.7.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.37.mlp.experts.7.gate_proj.weight": "pytorch_model-00053-of-00080.bin", - "model.layers.37.mlp.experts.7.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.37.mlp.experts.7.up_proj.weight": "pytorch_model-00053-of-00080.bin", - "model.layers.37.mlp.experts.8.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.37.mlp.experts.8.down_proj.weight": "pytorch_model-00053-of-00080.bin", - "model.layers.37.mlp.experts.8.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.37.mlp.experts.8.gate_proj.weight": "pytorch_model-00053-of-00080.bin", - "model.layers.37.mlp.experts.8.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.37.mlp.experts.8.up_proj.weight": "pytorch_model-00053-of-00080.bin", - "model.layers.37.mlp.experts.9.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.37.mlp.experts.9.down_proj.weight": "pytorch_model-00053-of-00080.bin", - "model.layers.37.mlp.experts.9.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.37.mlp.experts.9.gate_proj.weight": "pytorch_model-00053-of-00080.bin", - "model.layers.37.mlp.experts.9.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.37.mlp.experts.9.up_proj.weight": "pytorch_model-00053-of-00080.bin", - "model.layers.37.mlp.gate.wg.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.37.mlp.shared_mlp.down_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.37.mlp.shared_mlp.down_proj.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.37.mlp.shared_mlp.gate_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.37.mlp.shared_mlp.gate_proj.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.37.mlp.shared_mlp.up_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.37.mlp.shared_mlp.up_proj.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.37.post_attention_layernorm.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.37.self_attn.key_layernorm.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.37.self_attn.o_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.37.self_attn.o_proj.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.37.self_attn.q_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.37.self_attn.q_proj.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.37.self_attn.query_layernorm.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.38.input_layernorm.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.38.mlp.experts.0.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.38.mlp.experts.0.down_proj.weight": "pytorch_model-00071-of-00080.bin", - "model.layers.38.mlp.experts.0.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.38.mlp.experts.0.gate_proj.weight": "pytorch_model-00071-of-00080.bin", - "model.layers.38.mlp.experts.0.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.38.mlp.experts.0.up_proj.weight": "pytorch_model-00071-of-00080.bin", - "model.layers.38.mlp.experts.1.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.38.mlp.experts.1.down_proj.weight": "pytorch_model-00071-of-00080.bin", - "model.layers.38.mlp.experts.1.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.38.mlp.experts.1.gate_proj.weight": "pytorch_model-00071-of-00080.bin", - "model.layers.38.mlp.experts.1.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.38.mlp.experts.1.up_proj.weight": "pytorch_model-00071-of-00080.bin", - "model.layers.38.mlp.experts.10.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.38.mlp.experts.10.down_proj.weight": "pytorch_model-00072-of-00080.bin", - "model.layers.38.mlp.experts.10.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.38.mlp.experts.10.gate_proj.weight": "pytorch_model-00072-of-00080.bin", - "model.layers.38.mlp.experts.10.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.38.mlp.experts.10.up_proj.weight": "pytorch_model-00072-of-00080.bin", - "model.layers.38.mlp.experts.11.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.38.mlp.experts.11.down_proj.weight": "pytorch_model-00072-of-00080.bin", - "model.layers.38.mlp.experts.11.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.38.mlp.experts.11.gate_proj.weight": "pytorch_model-00072-of-00080.bin", - "model.layers.38.mlp.experts.11.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.38.mlp.experts.11.up_proj.weight": "pytorch_model-00072-of-00080.bin", - "model.layers.38.mlp.experts.12.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.38.mlp.experts.12.down_proj.weight": "pytorch_model-00072-of-00080.bin", - "model.layers.38.mlp.experts.12.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.38.mlp.experts.12.gate_proj.weight": "pytorch_model-00072-of-00080.bin", - "model.layers.38.mlp.experts.12.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.38.mlp.experts.12.up_proj.weight": "pytorch_model-00072-of-00080.bin", - "model.layers.38.mlp.experts.13.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.38.mlp.experts.13.down_proj.weight": "pytorch_model-00072-of-00080.bin", - "model.layers.38.mlp.experts.13.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.38.mlp.experts.13.gate_proj.weight": "pytorch_model-00072-of-00080.bin", - "model.layers.38.mlp.experts.13.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.38.mlp.experts.13.up_proj.weight": "pytorch_model-00072-of-00080.bin", - "model.layers.38.mlp.experts.14.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.38.mlp.experts.14.down_proj.weight": "pytorch_model-00072-of-00080.bin", - "model.layers.38.mlp.experts.14.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.38.mlp.experts.14.gate_proj.weight": "pytorch_model-00072-of-00080.bin", - "model.layers.38.mlp.experts.14.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.38.mlp.experts.14.up_proj.weight": "pytorch_model-00072-of-00080.bin", - "model.layers.38.mlp.experts.15.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.38.mlp.experts.15.down_proj.weight": "pytorch_model-00072-of-00080.bin", - "model.layers.38.mlp.experts.15.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.38.mlp.experts.15.gate_proj.weight": "pytorch_model-00072-of-00080.bin", - "model.layers.38.mlp.experts.15.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.38.mlp.experts.15.up_proj.weight": "pytorch_model-00072-of-00080.bin", - "model.layers.38.mlp.experts.2.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.38.mlp.experts.2.down_proj.weight": "pytorch_model-00071-of-00080.bin", - "model.layers.38.mlp.experts.2.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.38.mlp.experts.2.gate_proj.weight": "pytorch_model-00071-of-00080.bin", - "model.layers.38.mlp.experts.2.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.38.mlp.experts.2.up_proj.weight": "pytorch_model-00071-of-00080.bin", - "model.layers.38.mlp.experts.3.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.38.mlp.experts.3.down_proj.weight": "pytorch_model-00071-of-00080.bin", - "model.layers.38.mlp.experts.3.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.38.mlp.experts.3.gate_proj.weight": "pytorch_model-00071-of-00080.bin", - "model.layers.38.mlp.experts.3.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.38.mlp.experts.3.up_proj.weight": "pytorch_model-00071-of-00080.bin", - "model.layers.38.mlp.experts.4.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.38.mlp.experts.4.down_proj.weight": "pytorch_model-00071-of-00080.bin", - "model.layers.38.mlp.experts.4.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.38.mlp.experts.4.gate_proj.weight": "pytorch_model-00071-of-00080.bin", - "model.layers.38.mlp.experts.4.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.38.mlp.experts.4.up_proj.weight": "pytorch_model-00071-of-00080.bin", - "model.layers.38.mlp.experts.5.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.38.mlp.experts.5.down_proj.weight": "pytorch_model-00072-of-00080.bin", - "model.layers.38.mlp.experts.5.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.38.mlp.experts.5.gate_proj.weight": "pytorch_model-00072-of-00080.bin", - "model.layers.38.mlp.experts.5.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.38.mlp.experts.5.up_proj.weight": "pytorch_model-00072-of-00080.bin", - "model.layers.38.mlp.experts.6.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.38.mlp.experts.6.down_proj.weight": "pytorch_model-00072-of-00080.bin", - "model.layers.38.mlp.experts.6.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.38.mlp.experts.6.gate_proj.weight": "pytorch_model-00072-of-00080.bin", - "model.layers.38.mlp.experts.6.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.38.mlp.experts.6.up_proj.weight": "pytorch_model-00072-of-00080.bin", - "model.layers.38.mlp.experts.7.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.38.mlp.experts.7.down_proj.weight": "pytorch_model-00072-of-00080.bin", - "model.layers.38.mlp.experts.7.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.38.mlp.experts.7.gate_proj.weight": "pytorch_model-00072-of-00080.bin", - "model.layers.38.mlp.experts.7.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.38.mlp.experts.7.up_proj.weight": "pytorch_model-00072-of-00080.bin", - "model.layers.38.mlp.experts.8.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.38.mlp.experts.8.down_proj.weight": "pytorch_model-00072-of-00080.bin", - "model.layers.38.mlp.experts.8.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.38.mlp.experts.8.gate_proj.weight": "pytorch_model-00072-of-00080.bin", - "model.layers.38.mlp.experts.8.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.38.mlp.experts.8.up_proj.weight": "pytorch_model-00072-of-00080.bin", - "model.layers.38.mlp.experts.9.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.38.mlp.experts.9.down_proj.weight": "pytorch_model-00072-of-00080.bin", - "model.layers.38.mlp.experts.9.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.38.mlp.experts.9.gate_proj.weight": "pytorch_model-00072-of-00080.bin", - "model.layers.38.mlp.experts.9.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.38.mlp.experts.9.up_proj.weight": "pytorch_model-00072-of-00080.bin", - "model.layers.38.mlp.gate.wg.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.38.mlp.shared_mlp.down_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.38.mlp.shared_mlp.down_proj.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.38.mlp.shared_mlp.gate_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.38.mlp.shared_mlp.gate_proj.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.38.mlp.shared_mlp.up_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.38.mlp.shared_mlp.up_proj.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.38.post_attention_layernorm.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.38.self_attn.k_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.38.self_attn.k_proj.weight": "pytorch_model-00080-of-00080.bin", - "model.layers.38.self_attn.key_layernorm.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.38.self_attn.o_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.38.self_attn.o_proj.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.38.self_attn.q_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.38.self_attn.q_proj.weight": "pytorch_model-00080-of-00080.bin", - "model.layers.38.self_attn.query_layernorm.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.38.self_attn.v_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.38.self_attn.v_proj.weight": "pytorch_model-00080-of-00080.bin", - "model.layers.39.input_layernorm.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.39.mlp.experts.0.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.39.mlp.experts.0.down_proj.weight": "pytorch_model-00072-of-00080.bin", - "model.layers.39.mlp.experts.0.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.39.mlp.experts.0.gate_proj.weight": "pytorch_model-00072-of-00080.bin", - "model.layers.39.mlp.experts.0.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.39.mlp.experts.0.up_proj.weight": "pytorch_model-00072-of-00080.bin", - "model.layers.39.mlp.experts.1.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.39.mlp.experts.1.down_proj.weight": "pytorch_model-00072-of-00080.bin", - "model.layers.39.mlp.experts.1.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.39.mlp.experts.1.gate_proj.weight": "pytorch_model-00072-of-00080.bin", - "model.layers.39.mlp.experts.1.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.39.mlp.experts.1.up_proj.weight": "pytorch_model-00072-of-00080.bin", - "model.layers.39.mlp.experts.10.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.39.mlp.experts.10.down_proj.weight": "pytorch_model-00073-of-00080.bin", - "model.layers.39.mlp.experts.10.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.39.mlp.experts.10.gate_proj.weight": "pytorch_model-00073-of-00080.bin", - "model.layers.39.mlp.experts.10.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.39.mlp.experts.10.up_proj.weight": "pytorch_model-00073-of-00080.bin", - "model.layers.39.mlp.experts.11.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.39.mlp.experts.11.down_proj.weight": "pytorch_model-00073-of-00080.bin", - "model.layers.39.mlp.experts.11.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.39.mlp.experts.11.gate_proj.weight": "pytorch_model-00073-of-00080.bin", - "model.layers.39.mlp.experts.11.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.39.mlp.experts.11.up_proj.weight": "pytorch_model-00073-of-00080.bin", - "model.layers.39.mlp.experts.12.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.39.mlp.experts.12.down_proj.weight": "pytorch_model-00073-of-00080.bin", - "model.layers.39.mlp.experts.12.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.39.mlp.experts.12.gate_proj.weight": "pytorch_model-00073-of-00080.bin", - "model.layers.39.mlp.experts.12.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.39.mlp.experts.12.up_proj.weight": "pytorch_model-00073-of-00080.bin", - "model.layers.39.mlp.experts.13.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.39.mlp.experts.13.down_proj.weight": "pytorch_model-00073-of-00080.bin", - "model.layers.39.mlp.experts.13.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.39.mlp.experts.13.gate_proj.weight": "pytorch_model-00073-of-00080.bin", - "model.layers.39.mlp.experts.13.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.39.mlp.experts.13.up_proj.weight": "pytorch_model-00073-of-00080.bin", - "model.layers.39.mlp.experts.14.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.39.mlp.experts.14.down_proj.weight": "pytorch_model-00073-of-00080.bin", - "model.layers.39.mlp.experts.14.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.39.mlp.experts.14.gate_proj.weight": "pytorch_model-00073-of-00080.bin", - "model.layers.39.mlp.experts.14.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.39.mlp.experts.14.up_proj.weight": "pytorch_model-00073-of-00080.bin", - "model.layers.39.mlp.experts.15.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.39.mlp.experts.15.down_proj.weight": "pytorch_model-00073-of-00080.bin", - "model.layers.39.mlp.experts.15.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.39.mlp.experts.15.gate_proj.weight": "pytorch_model-00073-of-00080.bin", - "model.layers.39.mlp.experts.15.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.39.mlp.experts.15.up_proj.weight": "pytorch_model-00073-of-00080.bin", - "model.layers.39.mlp.experts.2.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.39.mlp.experts.2.down_proj.weight": "pytorch_model-00072-of-00080.bin", - "model.layers.39.mlp.experts.2.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.39.mlp.experts.2.gate_proj.weight": "pytorch_model-00072-of-00080.bin", - "model.layers.39.mlp.experts.2.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.39.mlp.experts.2.up_proj.weight": "pytorch_model-00072-of-00080.bin", - "model.layers.39.mlp.experts.3.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.39.mlp.experts.3.down_proj.weight": "pytorch_model-00073-of-00080.bin", - "model.layers.39.mlp.experts.3.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.39.mlp.experts.3.gate_proj.weight": "pytorch_model-00073-of-00080.bin", - "model.layers.39.mlp.experts.3.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.39.mlp.experts.3.up_proj.weight": "pytorch_model-00073-of-00080.bin", - "model.layers.39.mlp.experts.4.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.39.mlp.experts.4.down_proj.weight": "pytorch_model-00073-of-00080.bin", - "model.layers.39.mlp.experts.4.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.39.mlp.experts.4.gate_proj.weight": "pytorch_model-00073-of-00080.bin", - "model.layers.39.mlp.experts.4.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.39.mlp.experts.4.up_proj.weight": "pytorch_model-00073-of-00080.bin", - "model.layers.39.mlp.experts.5.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.39.mlp.experts.5.down_proj.weight": "pytorch_model-00073-of-00080.bin", - "model.layers.39.mlp.experts.5.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.39.mlp.experts.5.gate_proj.weight": "pytorch_model-00073-of-00080.bin", - "model.layers.39.mlp.experts.5.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.39.mlp.experts.5.up_proj.weight": "pytorch_model-00073-of-00080.bin", - "model.layers.39.mlp.experts.6.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.39.mlp.experts.6.down_proj.weight": "pytorch_model-00073-of-00080.bin", - "model.layers.39.mlp.experts.6.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.39.mlp.experts.6.gate_proj.weight": "pytorch_model-00073-of-00080.bin", - "model.layers.39.mlp.experts.6.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.39.mlp.experts.6.up_proj.weight": "pytorch_model-00073-of-00080.bin", - "model.layers.39.mlp.experts.7.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.39.mlp.experts.7.down_proj.weight": "pytorch_model-00073-of-00080.bin", - "model.layers.39.mlp.experts.7.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.39.mlp.experts.7.gate_proj.weight": "pytorch_model-00073-of-00080.bin", - "model.layers.39.mlp.experts.7.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.39.mlp.experts.7.up_proj.weight": "pytorch_model-00073-of-00080.bin", - "model.layers.39.mlp.experts.8.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.39.mlp.experts.8.down_proj.weight": "pytorch_model-00073-of-00080.bin", - "model.layers.39.mlp.experts.8.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.39.mlp.experts.8.gate_proj.weight": "pytorch_model-00073-of-00080.bin", - "model.layers.39.mlp.experts.8.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.39.mlp.experts.8.up_proj.weight": "pytorch_model-00073-of-00080.bin", - "model.layers.39.mlp.experts.9.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.39.mlp.experts.9.down_proj.weight": "pytorch_model-00073-of-00080.bin", - "model.layers.39.mlp.experts.9.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.39.mlp.experts.9.gate_proj.weight": "pytorch_model-00073-of-00080.bin", - "model.layers.39.mlp.experts.9.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.39.mlp.experts.9.up_proj.weight": "pytorch_model-00073-of-00080.bin", - "model.layers.39.mlp.gate.wg.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.39.mlp.shared_mlp.down_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.39.mlp.shared_mlp.down_proj.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.39.mlp.shared_mlp.gate_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.39.mlp.shared_mlp.gate_proj.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.39.mlp.shared_mlp.up_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.39.mlp.shared_mlp.up_proj.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.39.post_attention_layernorm.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.39.self_attn.key_layernorm.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.39.self_attn.o_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.39.self_attn.o_proj.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.39.self_attn.q_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.39.self_attn.q_proj.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.39.self_attn.query_layernorm.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.4.input_layernorm.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.4.mlp.experts.0.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.4.mlp.experts.0.down_proj.weight": "pytorch_model-00042-of-00080.bin", - "model.layers.4.mlp.experts.0.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.4.mlp.experts.0.gate_proj.weight": "pytorch_model-00042-of-00080.bin", - "model.layers.4.mlp.experts.0.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.4.mlp.experts.0.up_proj.weight": "pytorch_model-00042-of-00080.bin", - "model.layers.4.mlp.experts.1.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.4.mlp.experts.1.down_proj.weight": "pytorch_model-00042-of-00080.bin", - "model.layers.4.mlp.experts.1.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.4.mlp.experts.1.gate_proj.weight": "pytorch_model-00042-of-00080.bin", - "model.layers.4.mlp.experts.1.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.4.mlp.experts.1.up_proj.weight": "pytorch_model-00042-of-00080.bin", - "model.layers.4.mlp.experts.10.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.4.mlp.experts.10.down_proj.weight": "pytorch_model-00043-of-00080.bin", - "model.layers.4.mlp.experts.10.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.4.mlp.experts.10.gate_proj.weight": "pytorch_model-00043-of-00080.bin", - "model.layers.4.mlp.experts.10.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.4.mlp.experts.10.up_proj.weight": "pytorch_model-00043-of-00080.bin", - "model.layers.4.mlp.experts.11.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.4.mlp.experts.11.down_proj.weight": "pytorch_model-00043-of-00080.bin", - "model.layers.4.mlp.experts.11.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.4.mlp.experts.11.gate_proj.weight": "pytorch_model-00043-of-00080.bin", - "model.layers.4.mlp.experts.11.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.4.mlp.experts.11.up_proj.weight": "pytorch_model-00043-of-00080.bin", - "model.layers.4.mlp.experts.12.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.4.mlp.experts.12.down_proj.weight": "pytorch_model-00043-of-00080.bin", - "model.layers.4.mlp.experts.12.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.4.mlp.experts.12.gate_proj.weight": "pytorch_model-00043-of-00080.bin", - "model.layers.4.mlp.experts.12.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.4.mlp.experts.12.up_proj.weight": "pytorch_model-00043-of-00080.bin", - "model.layers.4.mlp.experts.13.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.4.mlp.experts.13.down_proj.weight": "pytorch_model-00043-of-00080.bin", - "model.layers.4.mlp.experts.13.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.4.mlp.experts.13.gate_proj.weight": "pytorch_model-00043-of-00080.bin", - "model.layers.4.mlp.experts.13.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.4.mlp.experts.13.up_proj.weight": "pytorch_model-00043-of-00080.bin", - "model.layers.4.mlp.experts.14.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.4.mlp.experts.14.down_proj.weight": "pytorch_model-00043-of-00080.bin", - "model.layers.4.mlp.experts.14.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.4.mlp.experts.14.gate_proj.weight": "pytorch_model-00043-of-00080.bin", - "model.layers.4.mlp.experts.14.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.4.mlp.experts.14.up_proj.weight": "pytorch_model-00043-of-00080.bin", - "model.layers.4.mlp.experts.15.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.4.mlp.experts.15.down_proj.weight": "pytorch_model-00043-of-00080.bin", - "model.layers.4.mlp.experts.15.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.4.mlp.experts.15.gate_proj.weight": "pytorch_model-00043-of-00080.bin", - "model.layers.4.mlp.experts.15.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.4.mlp.experts.15.up_proj.weight": "pytorch_model-00043-of-00080.bin", - "model.layers.4.mlp.experts.2.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.4.mlp.experts.2.down_proj.weight": "pytorch_model-00042-of-00080.bin", - "model.layers.4.mlp.experts.2.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.4.mlp.experts.2.gate_proj.weight": "pytorch_model-00042-of-00080.bin", - "model.layers.4.mlp.experts.2.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.4.mlp.experts.2.up_proj.weight": "pytorch_model-00042-of-00080.bin", - "model.layers.4.mlp.experts.3.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.4.mlp.experts.3.down_proj.weight": "pytorch_model-00042-of-00080.bin", - "model.layers.4.mlp.experts.3.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.4.mlp.experts.3.gate_proj.weight": "pytorch_model-00042-of-00080.bin", - "model.layers.4.mlp.experts.3.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.4.mlp.experts.3.up_proj.weight": "pytorch_model-00042-of-00080.bin", - "model.layers.4.mlp.experts.4.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.4.mlp.experts.4.down_proj.weight": "pytorch_model-00042-of-00080.bin", - "model.layers.4.mlp.experts.4.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.4.mlp.experts.4.gate_proj.weight": "pytorch_model-00042-of-00080.bin", - "model.layers.4.mlp.experts.4.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.4.mlp.experts.4.up_proj.weight": "pytorch_model-00042-of-00080.bin", - "model.layers.4.mlp.experts.5.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.4.mlp.experts.5.down_proj.weight": "pytorch_model-00042-of-00080.bin", - "model.layers.4.mlp.experts.5.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.4.mlp.experts.5.gate_proj.weight": "pytorch_model-00042-of-00080.bin", - "model.layers.4.mlp.experts.5.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.4.mlp.experts.5.up_proj.weight": "pytorch_model-00042-of-00080.bin", - "model.layers.4.mlp.experts.6.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.4.mlp.experts.6.down_proj.weight": "pytorch_model-00042-of-00080.bin", - "model.layers.4.mlp.experts.6.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.4.mlp.experts.6.gate_proj.weight": "pytorch_model-00042-of-00080.bin", - "model.layers.4.mlp.experts.6.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.4.mlp.experts.6.up_proj.weight": "pytorch_model-00042-of-00080.bin", - "model.layers.4.mlp.experts.7.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.4.mlp.experts.7.down_proj.weight": "pytorch_model-00042-of-00080.bin", - "model.layers.4.mlp.experts.7.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.4.mlp.experts.7.gate_proj.weight": "pytorch_model-00042-of-00080.bin", - "model.layers.4.mlp.experts.7.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.4.mlp.experts.7.up_proj.weight": "pytorch_model-00042-of-00080.bin", - "model.layers.4.mlp.experts.8.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.4.mlp.experts.8.down_proj.weight": "pytorch_model-00042-of-00080.bin", - "model.layers.4.mlp.experts.8.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.4.mlp.experts.8.gate_proj.weight": "pytorch_model-00042-of-00080.bin", - "model.layers.4.mlp.experts.8.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.4.mlp.experts.8.up_proj.weight": "pytorch_model-00042-of-00080.bin", - "model.layers.4.mlp.experts.9.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.4.mlp.experts.9.down_proj.weight": "pytorch_model-00043-of-00080.bin", - "model.layers.4.mlp.experts.9.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.4.mlp.experts.9.gate_proj.weight": "pytorch_model-00043-of-00080.bin", - "model.layers.4.mlp.experts.9.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.4.mlp.experts.9.up_proj.weight": "pytorch_model-00042-of-00080.bin", - "model.layers.4.mlp.gate.wg.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.4.mlp.shared_mlp.down_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.4.mlp.shared_mlp.down_proj.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.4.mlp.shared_mlp.gate_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.4.mlp.shared_mlp.gate_proj.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.4.mlp.shared_mlp.up_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.4.mlp.shared_mlp.up_proj.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.4.post_attention_layernorm.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.4.self_attn.k_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.4.self_attn.k_proj.weight": "pytorch_model-00060-of-00080.bin", - "model.layers.4.self_attn.key_layernorm.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.4.self_attn.o_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.4.self_attn.o_proj.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.4.self_attn.q_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.4.self_attn.q_proj.weight": "pytorch_model-00060-of-00080.bin", - "model.layers.4.self_attn.query_layernorm.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.4.self_attn.v_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.4.self_attn.v_proj.weight": "pytorch_model-00060-of-00080.bin", - "model.layers.40.input_layernorm.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.40.mlp.experts.0.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.40.mlp.experts.0.down_proj.weight": "pytorch_model-00013-of-00080.bin", - "model.layers.40.mlp.experts.0.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.40.mlp.experts.0.gate_proj.weight": "pytorch_model-00013-of-00080.bin", - "model.layers.40.mlp.experts.0.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.40.mlp.experts.0.up_proj.weight": "pytorch_model-00013-of-00080.bin", - "model.layers.40.mlp.experts.1.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.40.mlp.experts.1.down_proj.weight": "pytorch_model-00014-of-00080.bin", - "model.layers.40.mlp.experts.1.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.40.mlp.experts.1.gate_proj.weight": "pytorch_model-00014-of-00080.bin", - "model.layers.40.mlp.experts.1.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.40.mlp.experts.1.up_proj.weight": "pytorch_model-00014-of-00080.bin", - "model.layers.40.mlp.experts.10.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.40.mlp.experts.10.down_proj.weight": "pytorch_model-00014-of-00080.bin", - "model.layers.40.mlp.experts.10.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.40.mlp.experts.10.gate_proj.weight": "pytorch_model-00014-of-00080.bin", - "model.layers.40.mlp.experts.10.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.40.mlp.experts.10.up_proj.weight": "pytorch_model-00014-of-00080.bin", - "model.layers.40.mlp.experts.11.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.40.mlp.experts.11.down_proj.weight": "pytorch_model-00014-of-00080.bin", - "model.layers.40.mlp.experts.11.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.40.mlp.experts.11.gate_proj.weight": "pytorch_model-00014-of-00080.bin", - "model.layers.40.mlp.experts.11.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.40.mlp.experts.11.up_proj.weight": "pytorch_model-00014-of-00080.bin", - "model.layers.40.mlp.experts.12.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.40.mlp.experts.12.down_proj.weight": "pytorch_model-00014-of-00080.bin", - "model.layers.40.mlp.experts.12.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.40.mlp.experts.12.gate_proj.weight": "pytorch_model-00014-of-00080.bin", - "model.layers.40.mlp.experts.12.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.40.mlp.experts.12.up_proj.weight": "pytorch_model-00014-of-00080.bin", - "model.layers.40.mlp.experts.13.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.40.mlp.experts.13.down_proj.weight": "pytorch_model-00014-of-00080.bin", - "model.layers.40.mlp.experts.13.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.40.mlp.experts.13.gate_proj.weight": "pytorch_model-00014-of-00080.bin", - "model.layers.40.mlp.experts.13.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.40.mlp.experts.13.up_proj.weight": "pytorch_model-00014-of-00080.bin", - "model.layers.40.mlp.experts.14.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.40.mlp.experts.14.down_proj.weight": "pytorch_model-00014-of-00080.bin", - "model.layers.40.mlp.experts.14.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.40.mlp.experts.14.gate_proj.weight": "pytorch_model-00014-of-00080.bin", - "model.layers.40.mlp.experts.14.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.40.mlp.experts.14.up_proj.weight": "pytorch_model-00014-of-00080.bin", - "model.layers.40.mlp.experts.15.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.40.mlp.experts.15.down_proj.weight": "pytorch_model-00015-of-00080.bin", - "model.layers.40.mlp.experts.15.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.40.mlp.experts.15.gate_proj.weight": "pytorch_model-00015-of-00080.bin", - "model.layers.40.mlp.experts.15.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.40.mlp.experts.15.up_proj.weight": "pytorch_model-00015-of-00080.bin", - "model.layers.40.mlp.experts.2.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.40.mlp.experts.2.down_proj.weight": "pytorch_model-00014-of-00080.bin", - "model.layers.40.mlp.experts.2.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.40.mlp.experts.2.gate_proj.weight": "pytorch_model-00014-of-00080.bin", - "model.layers.40.mlp.experts.2.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.40.mlp.experts.2.up_proj.weight": "pytorch_model-00014-of-00080.bin", - "model.layers.40.mlp.experts.3.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.40.mlp.experts.3.down_proj.weight": "pytorch_model-00014-of-00080.bin", - "model.layers.40.mlp.experts.3.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.40.mlp.experts.3.gate_proj.weight": "pytorch_model-00014-of-00080.bin", - "model.layers.40.mlp.experts.3.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.40.mlp.experts.3.up_proj.weight": "pytorch_model-00014-of-00080.bin", - "model.layers.40.mlp.experts.4.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.40.mlp.experts.4.down_proj.weight": "pytorch_model-00014-of-00080.bin", - "model.layers.40.mlp.experts.4.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.40.mlp.experts.4.gate_proj.weight": "pytorch_model-00014-of-00080.bin", - "model.layers.40.mlp.experts.4.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.40.mlp.experts.4.up_proj.weight": "pytorch_model-00014-of-00080.bin", - "model.layers.40.mlp.experts.5.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.40.mlp.experts.5.down_proj.weight": "pytorch_model-00014-of-00080.bin", - "model.layers.40.mlp.experts.5.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.40.mlp.experts.5.gate_proj.weight": "pytorch_model-00014-of-00080.bin", - "model.layers.40.mlp.experts.5.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.40.mlp.experts.5.up_proj.weight": "pytorch_model-00014-of-00080.bin", - "model.layers.40.mlp.experts.6.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.40.mlp.experts.6.down_proj.weight": "pytorch_model-00014-of-00080.bin", - "model.layers.40.mlp.experts.6.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.40.mlp.experts.6.gate_proj.weight": "pytorch_model-00014-of-00080.bin", - "model.layers.40.mlp.experts.6.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.40.mlp.experts.6.up_proj.weight": "pytorch_model-00014-of-00080.bin", - "model.layers.40.mlp.experts.7.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.40.mlp.experts.7.down_proj.weight": "pytorch_model-00014-of-00080.bin", - "model.layers.40.mlp.experts.7.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.40.mlp.experts.7.gate_proj.weight": "pytorch_model-00014-of-00080.bin", - "model.layers.40.mlp.experts.7.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.40.mlp.experts.7.up_proj.weight": "pytorch_model-00014-of-00080.bin", - "model.layers.40.mlp.experts.8.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.40.mlp.experts.8.down_proj.weight": "pytorch_model-00014-of-00080.bin", - "model.layers.40.mlp.experts.8.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.40.mlp.experts.8.gate_proj.weight": "pytorch_model-00014-of-00080.bin", - "model.layers.40.mlp.experts.8.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.40.mlp.experts.8.up_proj.weight": "pytorch_model-00014-of-00080.bin", - "model.layers.40.mlp.experts.9.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.40.mlp.experts.9.down_proj.weight": "pytorch_model-00014-of-00080.bin", - "model.layers.40.mlp.experts.9.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.40.mlp.experts.9.gate_proj.weight": "pytorch_model-00014-of-00080.bin", - "model.layers.40.mlp.experts.9.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.40.mlp.experts.9.up_proj.weight": "pytorch_model-00014-of-00080.bin", - "model.layers.40.mlp.gate.wg.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.40.mlp.shared_mlp.down_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.40.mlp.shared_mlp.down_proj.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.40.mlp.shared_mlp.gate_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.40.mlp.shared_mlp.gate_proj.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.40.mlp.shared_mlp.up_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.40.mlp.shared_mlp.up_proj.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.40.post_attention_layernorm.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.40.self_attn.k_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.40.self_attn.k_proj.weight": "pytorch_model-00020-of-00080.bin", - "model.layers.40.self_attn.key_layernorm.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.40.self_attn.o_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.40.self_attn.o_proj.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.40.self_attn.q_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.40.self_attn.q_proj.weight": "pytorch_model-00020-of-00080.bin", - "model.layers.40.self_attn.query_layernorm.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.40.self_attn.v_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.40.self_attn.v_proj.weight": "pytorch_model-00020-of-00080.bin", - "model.layers.41.input_layernorm.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.41.mlp.experts.0.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.41.mlp.experts.0.down_proj.weight": "pytorch_model-00015-of-00080.bin", - "model.layers.41.mlp.experts.0.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.41.mlp.experts.0.gate_proj.weight": "pytorch_model-00015-of-00080.bin", - "model.layers.41.mlp.experts.0.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.41.mlp.experts.0.up_proj.weight": "pytorch_model-00015-of-00080.bin", - "model.layers.41.mlp.experts.1.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.41.mlp.experts.1.down_proj.weight": "pytorch_model-00015-of-00080.bin", - "model.layers.41.mlp.experts.1.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.41.mlp.experts.1.gate_proj.weight": "pytorch_model-00015-of-00080.bin", - "model.layers.41.mlp.experts.1.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.41.mlp.experts.1.up_proj.weight": "pytorch_model-00015-of-00080.bin", - "model.layers.41.mlp.experts.10.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.41.mlp.experts.10.down_proj.weight": "pytorch_model-00015-of-00080.bin", - "model.layers.41.mlp.experts.10.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.41.mlp.experts.10.gate_proj.weight": "pytorch_model-00015-of-00080.bin", - "model.layers.41.mlp.experts.10.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.41.mlp.experts.10.up_proj.weight": "pytorch_model-00015-of-00080.bin", - "model.layers.41.mlp.experts.11.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.41.mlp.experts.11.down_proj.weight": "pytorch_model-00015-of-00080.bin", - "model.layers.41.mlp.experts.11.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.41.mlp.experts.11.gate_proj.weight": "pytorch_model-00015-of-00080.bin", - "model.layers.41.mlp.experts.11.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.41.mlp.experts.11.up_proj.weight": "pytorch_model-00015-of-00080.bin", - "model.layers.41.mlp.experts.12.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.41.mlp.experts.12.down_proj.weight": "pytorch_model-00015-of-00080.bin", - "model.layers.41.mlp.experts.12.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.41.mlp.experts.12.gate_proj.weight": "pytorch_model-00015-of-00080.bin", - "model.layers.41.mlp.experts.12.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.41.mlp.experts.12.up_proj.weight": "pytorch_model-00015-of-00080.bin", - "model.layers.41.mlp.experts.13.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.41.mlp.experts.13.down_proj.weight": "pytorch_model-00016-of-00080.bin", - "model.layers.41.mlp.experts.13.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.41.mlp.experts.13.gate_proj.weight": "pytorch_model-00016-of-00080.bin", - "model.layers.41.mlp.experts.13.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.41.mlp.experts.13.up_proj.weight": "pytorch_model-00016-of-00080.bin", - "model.layers.41.mlp.experts.14.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.41.mlp.experts.14.down_proj.weight": "pytorch_model-00016-of-00080.bin", - "model.layers.41.mlp.experts.14.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.41.mlp.experts.14.gate_proj.weight": "pytorch_model-00016-of-00080.bin", - "model.layers.41.mlp.experts.14.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.41.mlp.experts.14.up_proj.weight": "pytorch_model-00016-of-00080.bin", - "model.layers.41.mlp.experts.15.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.41.mlp.experts.15.down_proj.weight": "pytorch_model-00016-of-00080.bin", - "model.layers.41.mlp.experts.15.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.41.mlp.experts.15.gate_proj.weight": "pytorch_model-00016-of-00080.bin", - "model.layers.41.mlp.experts.15.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.41.mlp.experts.15.up_proj.weight": "pytorch_model-00016-of-00080.bin", - "model.layers.41.mlp.experts.2.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.41.mlp.experts.2.down_proj.weight": "pytorch_model-00015-of-00080.bin", - "model.layers.41.mlp.experts.2.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.41.mlp.experts.2.gate_proj.weight": "pytorch_model-00015-of-00080.bin", - "model.layers.41.mlp.experts.2.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.41.mlp.experts.2.up_proj.weight": "pytorch_model-00015-of-00080.bin", - "model.layers.41.mlp.experts.3.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.41.mlp.experts.3.down_proj.weight": "pytorch_model-00015-of-00080.bin", - "model.layers.41.mlp.experts.3.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.41.mlp.experts.3.gate_proj.weight": "pytorch_model-00015-of-00080.bin", - "model.layers.41.mlp.experts.3.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.41.mlp.experts.3.up_proj.weight": "pytorch_model-00015-of-00080.bin", - "model.layers.41.mlp.experts.4.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.41.mlp.experts.4.down_proj.weight": "pytorch_model-00015-of-00080.bin", - "model.layers.41.mlp.experts.4.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.41.mlp.experts.4.gate_proj.weight": "pytorch_model-00015-of-00080.bin", - "model.layers.41.mlp.experts.4.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.41.mlp.experts.4.up_proj.weight": "pytorch_model-00015-of-00080.bin", - "model.layers.41.mlp.experts.5.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.41.mlp.experts.5.down_proj.weight": "pytorch_model-00015-of-00080.bin", - "model.layers.41.mlp.experts.5.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.41.mlp.experts.5.gate_proj.weight": "pytorch_model-00015-of-00080.bin", - "model.layers.41.mlp.experts.5.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.41.mlp.experts.5.up_proj.weight": "pytorch_model-00015-of-00080.bin", - "model.layers.41.mlp.experts.6.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.41.mlp.experts.6.down_proj.weight": "pytorch_model-00015-of-00080.bin", - "model.layers.41.mlp.experts.6.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.41.mlp.experts.6.gate_proj.weight": "pytorch_model-00015-of-00080.bin", - "model.layers.41.mlp.experts.6.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.41.mlp.experts.6.up_proj.weight": "pytorch_model-00015-of-00080.bin", - "model.layers.41.mlp.experts.7.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.41.mlp.experts.7.down_proj.weight": "pytorch_model-00015-of-00080.bin", - "model.layers.41.mlp.experts.7.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.41.mlp.experts.7.gate_proj.weight": "pytorch_model-00015-of-00080.bin", - "model.layers.41.mlp.experts.7.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.41.mlp.experts.7.up_proj.weight": "pytorch_model-00015-of-00080.bin", - "model.layers.41.mlp.experts.8.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.41.mlp.experts.8.down_proj.weight": "pytorch_model-00015-of-00080.bin", - "model.layers.41.mlp.experts.8.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.41.mlp.experts.8.gate_proj.weight": "pytorch_model-00015-of-00080.bin", - "model.layers.41.mlp.experts.8.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.41.mlp.experts.8.up_proj.weight": "pytorch_model-00015-of-00080.bin", - "model.layers.41.mlp.experts.9.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.41.mlp.experts.9.down_proj.weight": "pytorch_model-00015-of-00080.bin", - "model.layers.41.mlp.experts.9.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.41.mlp.experts.9.gate_proj.weight": "pytorch_model-00015-of-00080.bin", - "model.layers.41.mlp.experts.9.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.41.mlp.experts.9.up_proj.weight": "pytorch_model-00015-of-00080.bin", - "model.layers.41.mlp.gate.wg.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.41.mlp.shared_mlp.down_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.41.mlp.shared_mlp.down_proj.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.41.mlp.shared_mlp.gate_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.41.mlp.shared_mlp.gate_proj.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.41.mlp.shared_mlp.up_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.41.mlp.shared_mlp.up_proj.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.41.post_attention_layernorm.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.41.self_attn.key_layernorm.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.41.self_attn.o_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.41.self_attn.o_proj.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.41.self_attn.q_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.41.self_attn.q_proj.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.41.self_attn.query_layernorm.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.42.input_layernorm.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.42.mlp.experts.0.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.42.mlp.experts.0.down_proj.weight": "pytorch_model-00033-of-00080.bin", - "model.layers.42.mlp.experts.0.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.42.mlp.experts.0.gate_proj.weight": "pytorch_model-00033-of-00080.bin", - "model.layers.42.mlp.experts.0.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.42.mlp.experts.0.up_proj.weight": "pytorch_model-00033-of-00080.bin", - "model.layers.42.mlp.experts.1.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.42.mlp.experts.1.down_proj.weight": "pytorch_model-00033-of-00080.bin", - "model.layers.42.mlp.experts.1.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.42.mlp.experts.1.gate_proj.weight": "pytorch_model-00033-of-00080.bin", - "model.layers.42.mlp.experts.1.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.42.mlp.experts.1.up_proj.weight": "pytorch_model-00033-of-00080.bin", - "model.layers.42.mlp.experts.10.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.42.mlp.experts.10.down_proj.weight": "pytorch_model-00034-of-00080.bin", - "model.layers.42.mlp.experts.10.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.42.mlp.experts.10.gate_proj.weight": "pytorch_model-00034-of-00080.bin", - "model.layers.42.mlp.experts.10.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.42.mlp.experts.10.up_proj.weight": "pytorch_model-00034-of-00080.bin", - "model.layers.42.mlp.experts.11.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.42.mlp.experts.11.down_proj.weight": "pytorch_model-00034-of-00080.bin", - "model.layers.42.mlp.experts.11.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.42.mlp.experts.11.gate_proj.weight": "pytorch_model-00034-of-00080.bin", - "model.layers.42.mlp.experts.11.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.42.mlp.experts.11.up_proj.weight": "pytorch_model-00034-of-00080.bin", - "model.layers.42.mlp.experts.12.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.42.mlp.experts.12.down_proj.weight": "pytorch_model-00034-of-00080.bin", - "model.layers.42.mlp.experts.12.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.42.mlp.experts.12.gate_proj.weight": "pytorch_model-00034-of-00080.bin", - "model.layers.42.mlp.experts.12.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.42.mlp.experts.12.up_proj.weight": "pytorch_model-00034-of-00080.bin", - "model.layers.42.mlp.experts.13.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.42.mlp.experts.13.down_proj.weight": "pytorch_model-00034-of-00080.bin", - "model.layers.42.mlp.experts.13.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.42.mlp.experts.13.gate_proj.weight": "pytorch_model-00034-of-00080.bin", - "model.layers.42.mlp.experts.13.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.42.mlp.experts.13.up_proj.weight": "pytorch_model-00034-of-00080.bin", - "model.layers.42.mlp.experts.14.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.42.mlp.experts.14.down_proj.weight": "pytorch_model-00034-of-00080.bin", - "model.layers.42.mlp.experts.14.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.42.mlp.experts.14.gate_proj.weight": "pytorch_model-00034-of-00080.bin", - "model.layers.42.mlp.experts.14.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.42.mlp.experts.14.up_proj.weight": "pytorch_model-00034-of-00080.bin", - "model.layers.42.mlp.experts.15.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.42.mlp.experts.15.down_proj.weight": "pytorch_model-00034-of-00080.bin", - "model.layers.42.mlp.experts.15.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.42.mlp.experts.15.gate_proj.weight": "pytorch_model-00034-of-00080.bin", - "model.layers.42.mlp.experts.15.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.42.mlp.experts.15.up_proj.weight": "pytorch_model-00034-of-00080.bin", - "model.layers.42.mlp.experts.2.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.42.mlp.experts.2.down_proj.weight": "pytorch_model-00033-of-00080.bin", - "model.layers.42.mlp.experts.2.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.42.mlp.experts.2.gate_proj.weight": "pytorch_model-00033-of-00080.bin", - "model.layers.42.mlp.experts.2.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.42.mlp.experts.2.up_proj.weight": "pytorch_model-00033-of-00080.bin", - "model.layers.42.mlp.experts.3.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.42.mlp.experts.3.down_proj.weight": "pytorch_model-00034-of-00080.bin", - "model.layers.42.mlp.experts.3.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.42.mlp.experts.3.gate_proj.weight": "pytorch_model-00034-of-00080.bin", - "model.layers.42.mlp.experts.3.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.42.mlp.experts.3.up_proj.weight": "pytorch_model-00033-of-00080.bin", - "model.layers.42.mlp.experts.4.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.42.mlp.experts.4.down_proj.weight": "pytorch_model-00034-of-00080.bin", - "model.layers.42.mlp.experts.4.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.42.mlp.experts.4.gate_proj.weight": "pytorch_model-00034-of-00080.bin", - "model.layers.42.mlp.experts.4.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.42.mlp.experts.4.up_proj.weight": "pytorch_model-00034-of-00080.bin", - "model.layers.42.mlp.experts.5.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.42.mlp.experts.5.down_proj.weight": "pytorch_model-00034-of-00080.bin", - "model.layers.42.mlp.experts.5.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.42.mlp.experts.5.gate_proj.weight": "pytorch_model-00034-of-00080.bin", - "model.layers.42.mlp.experts.5.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.42.mlp.experts.5.up_proj.weight": "pytorch_model-00034-of-00080.bin", - "model.layers.42.mlp.experts.6.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.42.mlp.experts.6.down_proj.weight": "pytorch_model-00034-of-00080.bin", - "model.layers.42.mlp.experts.6.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.42.mlp.experts.6.gate_proj.weight": "pytorch_model-00034-of-00080.bin", - "model.layers.42.mlp.experts.6.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.42.mlp.experts.6.up_proj.weight": "pytorch_model-00034-of-00080.bin", - "model.layers.42.mlp.experts.7.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.42.mlp.experts.7.down_proj.weight": "pytorch_model-00034-of-00080.bin", - "model.layers.42.mlp.experts.7.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.42.mlp.experts.7.gate_proj.weight": "pytorch_model-00034-of-00080.bin", - "model.layers.42.mlp.experts.7.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.42.mlp.experts.7.up_proj.weight": "pytorch_model-00034-of-00080.bin", - "model.layers.42.mlp.experts.8.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.42.mlp.experts.8.down_proj.weight": "pytorch_model-00034-of-00080.bin", - "model.layers.42.mlp.experts.8.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.42.mlp.experts.8.gate_proj.weight": "pytorch_model-00034-of-00080.bin", - "model.layers.42.mlp.experts.8.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.42.mlp.experts.8.up_proj.weight": "pytorch_model-00034-of-00080.bin", - "model.layers.42.mlp.experts.9.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.42.mlp.experts.9.down_proj.weight": "pytorch_model-00034-of-00080.bin", - "model.layers.42.mlp.experts.9.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.42.mlp.experts.9.gate_proj.weight": "pytorch_model-00034-of-00080.bin", - "model.layers.42.mlp.experts.9.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.42.mlp.experts.9.up_proj.weight": "pytorch_model-00034-of-00080.bin", - "model.layers.42.mlp.gate.wg.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.42.mlp.shared_mlp.down_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.42.mlp.shared_mlp.down_proj.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.42.mlp.shared_mlp.gate_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.42.mlp.shared_mlp.gate_proj.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.42.mlp.shared_mlp.up_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.42.mlp.shared_mlp.up_proj.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.42.post_attention_layernorm.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.42.self_attn.k_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.42.self_attn.k_proj.weight": "pytorch_model-00040-of-00080.bin", - "model.layers.42.self_attn.key_layernorm.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.42.self_attn.o_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.42.self_attn.o_proj.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.42.self_attn.q_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.42.self_attn.q_proj.weight": "pytorch_model-00040-of-00080.bin", - "model.layers.42.self_attn.query_layernorm.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.42.self_attn.v_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.42.self_attn.v_proj.weight": "pytorch_model-00040-of-00080.bin", - "model.layers.43.input_layernorm.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.43.mlp.experts.0.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.43.mlp.experts.0.down_proj.weight": "pytorch_model-00034-of-00080.bin", - "model.layers.43.mlp.experts.0.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.43.mlp.experts.0.gate_proj.weight": "pytorch_model-00034-of-00080.bin", - "model.layers.43.mlp.experts.0.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.43.mlp.experts.0.up_proj.weight": "pytorch_model-00034-of-00080.bin", - "model.layers.43.mlp.experts.1.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.43.mlp.experts.1.down_proj.weight": "pytorch_model-00035-of-00080.bin", - "model.layers.43.mlp.experts.1.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.43.mlp.experts.1.gate_proj.weight": "pytorch_model-00035-of-00080.bin", - "model.layers.43.mlp.experts.1.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.43.mlp.experts.1.up_proj.weight": "pytorch_model-00034-of-00080.bin", - "model.layers.43.mlp.experts.10.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.43.mlp.experts.10.down_proj.weight": "pytorch_model-00035-of-00080.bin", - "model.layers.43.mlp.experts.10.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.43.mlp.experts.10.gate_proj.weight": "pytorch_model-00035-of-00080.bin", - "model.layers.43.mlp.experts.10.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.43.mlp.experts.10.up_proj.weight": "pytorch_model-00035-of-00080.bin", - "model.layers.43.mlp.experts.11.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.43.mlp.experts.11.down_proj.weight": "pytorch_model-00035-of-00080.bin", - "model.layers.43.mlp.experts.11.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.43.mlp.experts.11.gate_proj.weight": "pytorch_model-00035-of-00080.bin", - "model.layers.43.mlp.experts.11.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.43.mlp.experts.11.up_proj.weight": "pytorch_model-00035-of-00080.bin", - "model.layers.43.mlp.experts.12.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.43.mlp.experts.12.down_proj.weight": "pytorch_model-00035-of-00080.bin", - "model.layers.43.mlp.experts.12.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.43.mlp.experts.12.gate_proj.weight": "pytorch_model-00035-of-00080.bin", - "model.layers.43.mlp.experts.12.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.43.mlp.experts.12.up_proj.weight": "pytorch_model-00035-of-00080.bin", - "model.layers.43.mlp.experts.13.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.43.mlp.experts.13.down_proj.weight": "pytorch_model-00035-of-00080.bin", - "model.layers.43.mlp.experts.13.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.43.mlp.experts.13.gate_proj.weight": "pytorch_model-00035-of-00080.bin", - "model.layers.43.mlp.experts.13.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.43.mlp.experts.13.up_proj.weight": "pytorch_model-00035-of-00080.bin", - "model.layers.43.mlp.experts.14.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.43.mlp.experts.14.down_proj.weight": "pytorch_model-00035-of-00080.bin", - "model.layers.43.mlp.experts.14.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.43.mlp.experts.14.gate_proj.weight": "pytorch_model-00035-of-00080.bin", - "model.layers.43.mlp.experts.14.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.43.mlp.experts.14.up_proj.weight": "pytorch_model-00035-of-00080.bin", - "model.layers.43.mlp.experts.15.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.43.mlp.experts.15.down_proj.weight": "pytorch_model-00036-of-00080.bin", - "model.layers.43.mlp.experts.15.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.43.mlp.experts.15.gate_proj.weight": "pytorch_model-00036-of-00080.bin", - "model.layers.43.mlp.experts.15.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.43.mlp.experts.15.up_proj.weight": "pytorch_model-00035-of-00080.bin", - "model.layers.43.mlp.experts.2.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.43.mlp.experts.2.down_proj.weight": "pytorch_model-00035-of-00080.bin", - "model.layers.43.mlp.experts.2.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.43.mlp.experts.2.gate_proj.weight": "pytorch_model-00035-of-00080.bin", - "model.layers.43.mlp.experts.2.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.43.mlp.experts.2.up_proj.weight": "pytorch_model-00035-of-00080.bin", - "model.layers.43.mlp.experts.3.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.43.mlp.experts.3.down_proj.weight": "pytorch_model-00035-of-00080.bin", - "model.layers.43.mlp.experts.3.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.43.mlp.experts.3.gate_proj.weight": "pytorch_model-00035-of-00080.bin", - "model.layers.43.mlp.experts.3.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.43.mlp.experts.3.up_proj.weight": "pytorch_model-00035-of-00080.bin", - "model.layers.43.mlp.experts.4.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.43.mlp.experts.4.down_proj.weight": "pytorch_model-00035-of-00080.bin", - "model.layers.43.mlp.experts.4.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.43.mlp.experts.4.gate_proj.weight": "pytorch_model-00035-of-00080.bin", - "model.layers.43.mlp.experts.4.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.43.mlp.experts.4.up_proj.weight": "pytorch_model-00035-of-00080.bin", - "model.layers.43.mlp.experts.5.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.43.mlp.experts.5.down_proj.weight": "pytorch_model-00035-of-00080.bin", - "model.layers.43.mlp.experts.5.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.43.mlp.experts.5.gate_proj.weight": "pytorch_model-00035-of-00080.bin", - "model.layers.43.mlp.experts.5.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.43.mlp.experts.5.up_proj.weight": "pytorch_model-00035-of-00080.bin", - "model.layers.43.mlp.experts.6.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.43.mlp.experts.6.down_proj.weight": "pytorch_model-00035-of-00080.bin", - "model.layers.43.mlp.experts.6.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.43.mlp.experts.6.gate_proj.weight": "pytorch_model-00035-of-00080.bin", - "model.layers.43.mlp.experts.6.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.43.mlp.experts.6.up_proj.weight": "pytorch_model-00035-of-00080.bin", - "model.layers.43.mlp.experts.7.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.43.mlp.experts.7.down_proj.weight": "pytorch_model-00035-of-00080.bin", - "model.layers.43.mlp.experts.7.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.43.mlp.experts.7.gate_proj.weight": "pytorch_model-00035-of-00080.bin", - "model.layers.43.mlp.experts.7.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.43.mlp.experts.7.up_proj.weight": "pytorch_model-00035-of-00080.bin", - "model.layers.43.mlp.experts.8.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.43.mlp.experts.8.down_proj.weight": "pytorch_model-00035-of-00080.bin", - "model.layers.43.mlp.experts.8.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.43.mlp.experts.8.gate_proj.weight": "pytorch_model-00035-of-00080.bin", - "model.layers.43.mlp.experts.8.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.43.mlp.experts.8.up_proj.weight": "pytorch_model-00035-of-00080.bin", - "model.layers.43.mlp.experts.9.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.43.mlp.experts.9.down_proj.weight": "pytorch_model-00035-of-00080.bin", - "model.layers.43.mlp.experts.9.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.43.mlp.experts.9.gate_proj.weight": "pytorch_model-00035-of-00080.bin", - "model.layers.43.mlp.experts.9.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.43.mlp.experts.9.up_proj.weight": "pytorch_model-00035-of-00080.bin", - "model.layers.43.mlp.gate.wg.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.43.mlp.shared_mlp.down_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.43.mlp.shared_mlp.down_proj.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.43.mlp.shared_mlp.gate_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.43.mlp.shared_mlp.gate_proj.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.43.mlp.shared_mlp.up_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.43.mlp.shared_mlp.up_proj.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.43.post_attention_layernorm.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.43.self_attn.key_layernorm.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.43.self_attn.o_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.43.self_attn.o_proj.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.43.self_attn.q_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.43.self_attn.q_proj.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.43.self_attn.query_layernorm.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.44.input_layernorm.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.44.mlp.experts.0.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.44.mlp.experts.0.down_proj.weight": "pytorch_model-00053-of-00080.bin", - "model.layers.44.mlp.experts.0.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.44.mlp.experts.0.gate_proj.weight": "pytorch_model-00053-of-00080.bin", - "model.layers.44.mlp.experts.0.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.44.mlp.experts.0.up_proj.weight": "pytorch_model-00053-of-00080.bin", - "model.layers.44.mlp.experts.1.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.44.mlp.experts.1.down_proj.weight": "pytorch_model-00053-of-00080.bin", - "model.layers.44.mlp.experts.1.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.44.mlp.experts.1.gate_proj.weight": "pytorch_model-00053-of-00080.bin", - "model.layers.44.mlp.experts.1.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.44.mlp.experts.1.up_proj.weight": "pytorch_model-00053-of-00080.bin", - "model.layers.44.mlp.experts.10.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.44.mlp.experts.10.down_proj.weight": "pytorch_model-00054-of-00080.bin", - "model.layers.44.mlp.experts.10.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.44.mlp.experts.10.gate_proj.weight": "pytorch_model-00054-of-00080.bin", - "model.layers.44.mlp.experts.10.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.44.mlp.experts.10.up_proj.weight": "pytorch_model-00054-of-00080.bin", - "model.layers.44.mlp.experts.11.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.44.mlp.experts.11.down_proj.weight": "pytorch_model-00054-of-00080.bin", - "model.layers.44.mlp.experts.11.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.44.mlp.experts.11.gate_proj.weight": "pytorch_model-00054-of-00080.bin", - "model.layers.44.mlp.experts.11.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.44.mlp.experts.11.up_proj.weight": "pytorch_model-00054-of-00080.bin", - "model.layers.44.mlp.experts.12.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.44.mlp.experts.12.down_proj.weight": "pytorch_model-00054-of-00080.bin", - "model.layers.44.mlp.experts.12.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.44.mlp.experts.12.gate_proj.weight": "pytorch_model-00054-of-00080.bin", - "model.layers.44.mlp.experts.12.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.44.mlp.experts.12.up_proj.weight": "pytorch_model-00054-of-00080.bin", - "model.layers.44.mlp.experts.13.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.44.mlp.experts.13.down_proj.weight": "pytorch_model-00054-of-00080.bin", - "model.layers.44.mlp.experts.13.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.44.mlp.experts.13.gate_proj.weight": "pytorch_model-00054-of-00080.bin", - "model.layers.44.mlp.experts.13.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.44.mlp.experts.13.up_proj.weight": "pytorch_model-00054-of-00080.bin", - "model.layers.44.mlp.experts.14.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.44.mlp.experts.14.down_proj.weight": "pytorch_model-00054-of-00080.bin", - "model.layers.44.mlp.experts.14.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.44.mlp.experts.14.gate_proj.weight": "pytorch_model-00054-of-00080.bin", - "model.layers.44.mlp.experts.14.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.44.mlp.experts.14.up_proj.weight": "pytorch_model-00054-of-00080.bin", - "model.layers.44.mlp.experts.15.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.44.mlp.experts.15.down_proj.weight": "pytorch_model-00054-of-00080.bin", - "model.layers.44.mlp.experts.15.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.44.mlp.experts.15.gate_proj.weight": "pytorch_model-00054-of-00080.bin", - "model.layers.44.mlp.experts.15.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.44.mlp.experts.15.up_proj.weight": "pytorch_model-00054-of-00080.bin", - "model.layers.44.mlp.experts.2.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.44.mlp.experts.2.down_proj.weight": "pytorch_model-00053-of-00080.bin", - "model.layers.44.mlp.experts.2.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.44.mlp.experts.2.gate_proj.weight": "pytorch_model-00053-of-00080.bin", - "model.layers.44.mlp.experts.2.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.44.mlp.experts.2.up_proj.weight": "pytorch_model-00053-of-00080.bin", - "model.layers.44.mlp.experts.3.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.44.mlp.experts.3.down_proj.weight": "pytorch_model-00054-of-00080.bin", - "model.layers.44.mlp.experts.3.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.44.mlp.experts.3.gate_proj.weight": "pytorch_model-00054-of-00080.bin", - "model.layers.44.mlp.experts.3.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.44.mlp.experts.3.up_proj.weight": "pytorch_model-00053-of-00080.bin", - "model.layers.44.mlp.experts.4.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.44.mlp.experts.4.down_proj.weight": "pytorch_model-00054-of-00080.bin", - "model.layers.44.mlp.experts.4.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.44.mlp.experts.4.gate_proj.weight": "pytorch_model-00054-of-00080.bin", - "model.layers.44.mlp.experts.4.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.44.mlp.experts.4.up_proj.weight": "pytorch_model-00054-of-00080.bin", - "model.layers.44.mlp.experts.5.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.44.mlp.experts.5.down_proj.weight": "pytorch_model-00054-of-00080.bin", - "model.layers.44.mlp.experts.5.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.44.mlp.experts.5.gate_proj.weight": "pytorch_model-00054-of-00080.bin", - "model.layers.44.mlp.experts.5.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.44.mlp.experts.5.up_proj.weight": "pytorch_model-00054-of-00080.bin", - "model.layers.44.mlp.experts.6.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.44.mlp.experts.6.down_proj.weight": "pytorch_model-00054-of-00080.bin", - "model.layers.44.mlp.experts.6.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.44.mlp.experts.6.gate_proj.weight": "pytorch_model-00054-of-00080.bin", - "model.layers.44.mlp.experts.6.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.44.mlp.experts.6.up_proj.weight": "pytorch_model-00054-of-00080.bin", - "model.layers.44.mlp.experts.7.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.44.mlp.experts.7.down_proj.weight": "pytorch_model-00054-of-00080.bin", - "model.layers.44.mlp.experts.7.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.44.mlp.experts.7.gate_proj.weight": "pytorch_model-00054-of-00080.bin", - "model.layers.44.mlp.experts.7.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.44.mlp.experts.7.up_proj.weight": "pytorch_model-00054-of-00080.bin", - "model.layers.44.mlp.experts.8.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.44.mlp.experts.8.down_proj.weight": "pytorch_model-00054-of-00080.bin", - "model.layers.44.mlp.experts.8.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.44.mlp.experts.8.gate_proj.weight": "pytorch_model-00054-of-00080.bin", - "model.layers.44.mlp.experts.8.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.44.mlp.experts.8.up_proj.weight": "pytorch_model-00054-of-00080.bin", - "model.layers.44.mlp.experts.9.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.44.mlp.experts.9.down_proj.weight": "pytorch_model-00054-of-00080.bin", - "model.layers.44.mlp.experts.9.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.44.mlp.experts.9.gate_proj.weight": "pytorch_model-00054-of-00080.bin", - "model.layers.44.mlp.experts.9.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.44.mlp.experts.9.up_proj.weight": "pytorch_model-00054-of-00080.bin", - "model.layers.44.mlp.gate.wg.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.44.mlp.shared_mlp.down_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.44.mlp.shared_mlp.down_proj.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.44.mlp.shared_mlp.gate_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.44.mlp.shared_mlp.gate_proj.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.44.mlp.shared_mlp.up_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.44.mlp.shared_mlp.up_proj.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.44.post_attention_layernorm.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.44.self_attn.k_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.44.self_attn.k_proj.weight": "pytorch_model-00060-of-00080.bin", - "model.layers.44.self_attn.key_layernorm.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.44.self_attn.o_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.44.self_attn.o_proj.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.44.self_attn.q_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.44.self_attn.q_proj.weight": "pytorch_model-00060-of-00080.bin", - "model.layers.44.self_attn.query_layernorm.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.44.self_attn.v_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.44.self_attn.v_proj.weight": "pytorch_model-00060-of-00080.bin", - "model.layers.45.input_layernorm.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.45.mlp.experts.0.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.45.mlp.experts.0.down_proj.weight": "pytorch_model-00054-of-00080.bin", - "model.layers.45.mlp.experts.0.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.45.mlp.experts.0.gate_proj.weight": "pytorch_model-00054-of-00080.bin", - "model.layers.45.mlp.experts.0.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.45.mlp.experts.0.up_proj.weight": "pytorch_model-00054-of-00080.bin", - "model.layers.45.mlp.experts.1.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.45.mlp.experts.1.down_proj.weight": "pytorch_model-00055-of-00080.bin", - "model.layers.45.mlp.experts.1.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.45.mlp.experts.1.gate_proj.weight": "pytorch_model-00055-of-00080.bin", - "model.layers.45.mlp.experts.1.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.45.mlp.experts.1.up_proj.weight": "pytorch_model-00054-of-00080.bin", - "model.layers.45.mlp.experts.10.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.45.mlp.experts.10.down_proj.weight": "pytorch_model-00055-of-00080.bin", - "model.layers.45.mlp.experts.10.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.45.mlp.experts.10.gate_proj.weight": "pytorch_model-00055-of-00080.bin", - "model.layers.45.mlp.experts.10.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.45.mlp.experts.10.up_proj.weight": "pytorch_model-00055-of-00080.bin", - "model.layers.45.mlp.experts.11.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.45.mlp.experts.11.down_proj.weight": "pytorch_model-00055-of-00080.bin", - "model.layers.45.mlp.experts.11.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.45.mlp.experts.11.gate_proj.weight": "pytorch_model-00055-of-00080.bin", - "model.layers.45.mlp.experts.11.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.45.mlp.experts.11.up_proj.weight": "pytorch_model-00055-of-00080.bin", - "model.layers.45.mlp.experts.12.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.45.mlp.experts.12.down_proj.weight": "pytorch_model-00055-of-00080.bin", - "model.layers.45.mlp.experts.12.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.45.mlp.experts.12.gate_proj.weight": "pytorch_model-00055-of-00080.bin", - "model.layers.45.mlp.experts.12.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.45.mlp.experts.12.up_proj.weight": "pytorch_model-00055-of-00080.bin", - "model.layers.45.mlp.experts.13.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.45.mlp.experts.13.down_proj.weight": "pytorch_model-00055-of-00080.bin", - "model.layers.45.mlp.experts.13.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.45.mlp.experts.13.gate_proj.weight": "pytorch_model-00055-of-00080.bin", - "model.layers.45.mlp.experts.13.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.45.mlp.experts.13.up_proj.weight": "pytorch_model-00055-of-00080.bin", - "model.layers.45.mlp.experts.14.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.45.mlp.experts.14.down_proj.weight": "pytorch_model-00055-of-00080.bin", - "model.layers.45.mlp.experts.14.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.45.mlp.experts.14.gate_proj.weight": "pytorch_model-00055-of-00080.bin", - "model.layers.45.mlp.experts.14.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.45.mlp.experts.14.up_proj.weight": "pytorch_model-00055-of-00080.bin", - "model.layers.45.mlp.experts.15.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.45.mlp.experts.15.down_proj.weight": "pytorch_model-00056-of-00080.bin", - "model.layers.45.mlp.experts.15.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.45.mlp.experts.15.gate_proj.weight": "pytorch_model-00056-of-00080.bin", - "model.layers.45.mlp.experts.15.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.45.mlp.experts.15.up_proj.weight": "pytorch_model-00055-of-00080.bin", - "model.layers.45.mlp.experts.2.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.45.mlp.experts.2.down_proj.weight": "pytorch_model-00055-of-00080.bin", - "model.layers.45.mlp.experts.2.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.45.mlp.experts.2.gate_proj.weight": "pytorch_model-00055-of-00080.bin", - "model.layers.45.mlp.experts.2.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.45.mlp.experts.2.up_proj.weight": "pytorch_model-00055-of-00080.bin", - "model.layers.45.mlp.experts.3.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.45.mlp.experts.3.down_proj.weight": "pytorch_model-00055-of-00080.bin", - "model.layers.45.mlp.experts.3.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.45.mlp.experts.3.gate_proj.weight": "pytorch_model-00055-of-00080.bin", - "model.layers.45.mlp.experts.3.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.45.mlp.experts.3.up_proj.weight": "pytorch_model-00055-of-00080.bin", - "model.layers.45.mlp.experts.4.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.45.mlp.experts.4.down_proj.weight": "pytorch_model-00055-of-00080.bin", - "model.layers.45.mlp.experts.4.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.45.mlp.experts.4.gate_proj.weight": "pytorch_model-00055-of-00080.bin", - "model.layers.45.mlp.experts.4.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.45.mlp.experts.4.up_proj.weight": "pytorch_model-00055-of-00080.bin", - "model.layers.45.mlp.experts.5.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.45.mlp.experts.5.down_proj.weight": "pytorch_model-00055-of-00080.bin", - "model.layers.45.mlp.experts.5.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.45.mlp.experts.5.gate_proj.weight": "pytorch_model-00055-of-00080.bin", - "model.layers.45.mlp.experts.5.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.45.mlp.experts.5.up_proj.weight": "pytorch_model-00055-of-00080.bin", - "model.layers.45.mlp.experts.6.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.45.mlp.experts.6.down_proj.weight": "pytorch_model-00055-of-00080.bin", - "model.layers.45.mlp.experts.6.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.45.mlp.experts.6.gate_proj.weight": "pytorch_model-00055-of-00080.bin", - "model.layers.45.mlp.experts.6.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.45.mlp.experts.6.up_proj.weight": "pytorch_model-00055-of-00080.bin", - "model.layers.45.mlp.experts.7.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.45.mlp.experts.7.down_proj.weight": "pytorch_model-00055-of-00080.bin", - "model.layers.45.mlp.experts.7.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.45.mlp.experts.7.gate_proj.weight": "pytorch_model-00055-of-00080.bin", - "model.layers.45.mlp.experts.7.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.45.mlp.experts.7.up_proj.weight": "pytorch_model-00055-of-00080.bin", - "model.layers.45.mlp.experts.8.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.45.mlp.experts.8.down_proj.weight": "pytorch_model-00055-of-00080.bin", - "model.layers.45.mlp.experts.8.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.45.mlp.experts.8.gate_proj.weight": "pytorch_model-00055-of-00080.bin", - "model.layers.45.mlp.experts.8.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.45.mlp.experts.8.up_proj.weight": "pytorch_model-00055-of-00080.bin", - "model.layers.45.mlp.experts.9.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.45.mlp.experts.9.down_proj.weight": "pytorch_model-00055-of-00080.bin", - "model.layers.45.mlp.experts.9.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.45.mlp.experts.9.gate_proj.weight": "pytorch_model-00055-of-00080.bin", - "model.layers.45.mlp.experts.9.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.45.mlp.experts.9.up_proj.weight": "pytorch_model-00055-of-00080.bin", - "model.layers.45.mlp.gate.wg.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.45.mlp.shared_mlp.down_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.45.mlp.shared_mlp.down_proj.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.45.mlp.shared_mlp.gate_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.45.mlp.shared_mlp.gate_proj.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.45.mlp.shared_mlp.up_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.45.mlp.shared_mlp.up_proj.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.45.post_attention_layernorm.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.45.self_attn.key_layernorm.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.45.self_attn.o_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.45.self_attn.o_proj.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.45.self_attn.q_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.45.self_attn.q_proj.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.45.self_attn.query_layernorm.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.46.input_layernorm.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.46.mlp.experts.0.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.46.mlp.experts.0.down_proj.weight": "pytorch_model-00073-of-00080.bin", - "model.layers.46.mlp.experts.0.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.46.mlp.experts.0.gate_proj.weight": "pytorch_model-00073-of-00080.bin", - "model.layers.46.mlp.experts.0.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.46.mlp.experts.0.up_proj.weight": "pytorch_model-00073-of-00080.bin", - "model.layers.46.mlp.experts.1.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.46.mlp.experts.1.down_proj.weight": "pytorch_model-00074-of-00080.bin", - "model.layers.46.mlp.experts.1.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.46.mlp.experts.1.gate_proj.weight": "pytorch_model-00074-of-00080.bin", - "model.layers.46.mlp.experts.1.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.46.mlp.experts.1.up_proj.weight": "pytorch_model-00074-of-00080.bin", - "model.layers.46.mlp.experts.10.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.46.mlp.experts.10.down_proj.weight": "pytorch_model-00074-of-00080.bin", - "model.layers.46.mlp.experts.10.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.46.mlp.experts.10.gate_proj.weight": "pytorch_model-00074-of-00080.bin", - "model.layers.46.mlp.experts.10.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.46.mlp.experts.10.up_proj.weight": "pytorch_model-00074-of-00080.bin", - "model.layers.46.mlp.experts.11.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.46.mlp.experts.11.down_proj.weight": "pytorch_model-00074-of-00080.bin", - "model.layers.46.mlp.experts.11.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.46.mlp.experts.11.gate_proj.weight": "pytorch_model-00074-of-00080.bin", - "model.layers.46.mlp.experts.11.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.46.mlp.experts.11.up_proj.weight": "pytorch_model-00074-of-00080.bin", - "model.layers.46.mlp.experts.12.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.46.mlp.experts.12.down_proj.weight": "pytorch_model-00074-of-00080.bin", - "model.layers.46.mlp.experts.12.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.46.mlp.experts.12.gate_proj.weight": "pytorch_model-00074-of-00080.bin", - "model.layers.46.mlp.experts.12.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.46.mlp.experts.12.up_proj.weight": "pytorch_model-00074-of-00080.bin", - "model.layers.46.mlp.experts.13.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.46.mlp.experts.13.down_proj.weight": "pytorch_model-00074-of-00080.bin", - "model.layers.46.mlp.experts.13.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.46.mlp.experts.13.gate_proj.weight": "pytorch_model-00074-of-00080.bin", - "model.layers.46.mlp.experts.13.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.46.mlp.experts.13.up_proj.weight": "pytorch_model-00074-of-00080.bin", - "model.layers.46.mlp.experts.14.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.46.mlp.experts.14.down_proj.weight": "pytorch_model-00074-of-00080.bin", - "model.layers.46.mlp.experts.14.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.46.mlp.experts.14.gate_proj.weight": "pytorch_model-00074-of-00080.bin", - "model.layers.46.mlp.experts.14.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.46.mlp.experts.14.up_proj.weight": "pytorch_model-00074-of-00080.bin", - "model.layers.46.mlp.experts.15.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.46.mlp.experts.15.down_proj.weight": "pytorch_model-00075-of-00080.bin", - "model.layers.46.mlp.experts.15.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.46.mlp.experts.15.gate_proj.weight": "pytorch_model-00075-of-00080.bin", - "model.layers.46.mlp.experts.15.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.46.mlp.experts.15.up_proj.weight": "pytorch_model-00075-of-00080.bin", - "model.layers.46.mlp.experts.2.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.46.mlp.experts.2.down_proj.weight": "pytorch_model-00074-of-00080.bin", - "model.layers.46.mlp.experts.2.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.46.mlp.experts.2.gate_proj.weight": "pytorch_model-00074-of-00080.bin", - "model.layers.46.mlp.experts.2.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.46.mlp.experts.2.up_proj.weight": "pytorch_model-00074-of-00080.bin", - "model.layers.46.mlp.experts.3.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.46.mlp.experts.3.down_proj.weight": "pytorch_model-00074-of-00080.bin", - "model.layers.46.mlp.experts.3.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.46.mlp.experts.3.gate_proj.weight": "pytorch_model-00074-of-00080.bin", - "model.layers.46.mlp.experts.3.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.46.mlp.experts.3.up_proj.weight": "pytorch_model-00074-of-00080.bin", - "model.layers.46.mlp.experts.4.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.46.mlp.experts.4.down_proj.weight": "pytorch_model-00074-of-00080.bin", - "model.layers.46.mlp.experts.4.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.46.mlp.experts.4.gate_proj.weight": "pytorch_model-00074-of-00080.bin", - "model.layers.46.mlp.experts.4.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.46.mlp.experts.4.up_proj.weight": "pytorch_model-00074-of-00080.bin", - "model.layers.46.mlp.experts.5.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.46.mlp.experts.5.down_proj.weight": "pytorch_model-00074-of-00080.bin", - "model.layers.46.mlp.experts.5.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.46.mlp.experts.5.gate_proj.weight": "pytorch_model-00074-of-00080.bin", - "model.layers.46.mlp.experts.5.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.46.mlp.experts.5.up_proj.weight": "pytorch_model-00074-of-00080.bin", - "model.layers.46.mlp.experts.6.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.46.mlp.experts.6.down_proj.weight": "pytorch_model-00074-of-00080.bin", - "model.layers.46.mlp.experts.6.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.46.mlp.experts.6.gate_proj.weight": "pytorch_model-00074-of-00080.bin", - "model.layers.46.mlp.experts.6.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.46.mlp.experts.6.up_proj.weight": "pytorch_model-00074-of-00080.bin", - "model.layers.46.mlp.experts.7.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.46.mlp.experts.7.down_proj.weight": "pytorch_model-00074-of-00080.bin", - "model.layers.46.mlp.experts.7.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.46.mlp.experts.7.gate_proj.weight": "pytorch_model-00074-of-00080.bin", - "model.layers.46.mlp.experts.7.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.46.mlp.experts.7.up_proj.weight": "pytorch_model-00074-of-00080.bin", - "model.layers.46.mlp.experts.8.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.46.mlp.experts.8.down_proj.weight": "pytorch_model-00074-of-00080.bin", - "model.layers.46.mlp.experts.8.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.46.mlp.experts.8.gate_proj.weight": "pytorch_model-00074-of-00080.bin", - "model.layers.46.mlp.experts.8.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.46.mlp.experts.8.up_proj.weight": "pytorch_model-00074-of-00080.bin", - "model.layers.46.mlp.experts.9.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.46.mlp.experts.9.down_proj.weight": "pytorch_model-00074-of-00080.bin", - "model.layers.46.mlp.experts.9.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.46.mlp.experts.9.gate_proj.weight": "pytorch_model-00074-of-00080.bin", - "model.layers.46.mlp.experts.9.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.46.mlp.experts.9.up_proj.weight": "pytorch_model-00074-of-00080.bin", - "model.layers.46.mlp.gate.wg.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.46.mlp.shared_mlp.down_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.46.mlp.shared_mlp.down_proj.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.46.mlp.shared_mlp.gate_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.46.mlp.shared_mlp.gate_proj.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.46.mlp.shared_mlp.up_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.46.mlp.shared_mlp.up_proj.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.46.post_attention_layernorm.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.46.self_attn.k_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.46.self_attn.k_proj.weight": "pytorch_model-00080-of-00080.bin", - "model.layers.46.self_attn.key_layernorm.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.46.self_attn.o_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.46.self_attn.o_proj.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.46.self_attn.q_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.46.self_attn.q_proj.weight": "pytorch_model-00080-of-00080.bin", - "model.layers.46.self_attn.query_layernorm.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.46.self_attn.v_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.46.self_attn.v_proj.weight": "pytorch_model-00080-of-00080.bin", - "model.layers.47.input_layernorm.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.47.mlp.experts.0.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.47.mlp.experts.0.down_proj.weight": "pytorch_model-00075-of-00080.bin", - "model.layers.47.mlp.experts.0.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.47.mlp.experts.0.gate_proj.weight": "pytorch_model-00075-of-00080.bin", - "model.layers.47.mlp.experts.0.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.47.mlp.experts.0.up_proj.weight": "pytorch_model-00075-of-00080.bin", - "model.layers.47.mlp.experts.1.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.47.mlp.experts.1.down_proj.weight": "pytorch_model-00075-of-00080.bin", - "model.layers.47.mlp.experts.1.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.47.mlp.experts.1.gate_proj.weight": "pytorch_model-00075-of-00080.bin", - "model.layers.47.mlp.experts.1.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.47.mlp.experts.1.up_proj.weight": "pytorch_model-00075-of-00080.bin", - "model.layers.47.mlp.experts.10.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.47.mlp.experts.10.down_proj.weight": "pytorch_model-00075-of-00080.bin", - "model.layers.47.mlp.experts.10.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.47.mlp.experts.10.gate_proj.weight": "pytorch_model-00075-of-00080.bin", - "model.layers.47.mlp.experts.10.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.47.mlp.experts.10.up_proj.weight": "pytorch_model-00075-of-00080.bin", - "model.layers.47.mlp.experts.11.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.47.mlp.experts.11.down_proj.weight": "pytorch_model-00075-of-00080.bin", - "model.layers.47.mlp.experts.11.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.47.mlp.experts.11.gate_proj.weight": "pytorch_model-00075-of-00080.bin", - "model.layers.47.mlp.experts.11.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.47.mlp.experts.11.up_proj.weight": "pytorch_model-00075-of-00080.bin", - "model.layers.47.mlp.experts.12.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.47.mlp.experts.12.down_proj.weight": "pytorch_model-00075-of-00080.bin", - "model.layers.47.mlp.experts.12.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.47.mlp.experts.12.gate_proj.weight": "pytorch_model-00075-of-00080.bin", - "model.layers.47.mlp.experts.12.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.47.mlp.experts.12.up_proj.weight": "pytorch_model-00075-of-00080.bin", - "model.layers.47.mlp.experts.13.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.47.mlp.experts.13.down_proj.weight": "pytorch_model-00076-of-00080.bin", - "model.layers.47.mlp.experts.13.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.47.mlp.experts.13.gate_proj.weight": "pytorch_model-00076-of-00080.bin", - "model.layers.47.mlp.experts.13.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.47.mlp.experts.13.up_proj.weight": "pytorch_model-00076-of-00080.bin", - "model.layers.47.mlp.experts.14.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.47.mlp.experts.14.down_proj.weight": "pytorch_model-00076-of-00080.bin", - "model.layers.47.mlp.experts.14.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.47.mlp.experts.14.gate_proj.weight": "pytorch_model-00076-of-00080.bin", - "model.layers.47.mlp.experts.14.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.47.mlp.experts.14.up_proj.weight": "pytorch_model-00076-of-00080.bin", - "model.layers.47.mlp.experts.15.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.47.mlp.experts.15.down_proj.weight": "pytorch_model-00076-of-00080.bin", - "model.layers.47.mlp.experts.15.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.47.mlp.experts.15.gate_proj.weight": "pytorch_model-00076-of-00080.bin", - "model.layers.47.mlp.experts.15.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.47.mlp.experts.15.up_proj.weight": "pytorch_model-00076-of-00080.bin", - "model.layers.47.mlp.experts.2.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.47.mlp.experts.2.down_proj.weight": "pytorch_model-00075-of-00080.bin", - "model.layers.47.mlp.experts.2.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.47.mlp.experts.2.gate_proj.weight": "pytorch_model-00075-of-00080.bin", - "model.layers.47.mlp.experts.2.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.47.mlp.experts.2.up_proj.weight": "pytorch_model-00075-of-00080.bin", - "model.layers.47.mlp.experts.3.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.47.mlp.experts.3.down_proj.weight": "pytorch_model-00075-of-00080.bin", - "model.layers.47.mlp.experts.3.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.47.mlp.experts.3.gate_proj.weight": "pytorch_model-00075-of-00080.bin", - "model.layers.47.mlp.experts.3.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.47.mlp.experts.3.up_proj.weight": "pytorch_model-00075-of-00080.bin", - "model.layers.47.mlp.experts.4.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.47.mlp.experts.4.down_proj.weight": "pytorch_model-00075-of-00080.bin", - "model.layers.47.mlp.experts.4.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.47.mlp.experts.4.gate_proj.weight": "pytorch_model-00075-of-00080.bin", - "model.layers.47.mlp.experts.4.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.47.mlp.experts.4.up_proj.weight": "pytorch_model-00075-of-00080.bin", - "model.layers.47.mlp.experts.5.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.47.mlp.experts.5.down_proj.weight": "pytorch_model-00075-of-00080.bin", - "model.layers.47.mlp.experts.5.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.47.mlp.experts.5.gate_proj.weight": "pytorch_model-00075-of-00080.bin", - "model.layers.47.mlp.experts.5.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.47.mlp.experts.5.up_proj.weight": "pytorch_model-00075-of-00080.bin", - "model.layers.47.mlp.experts.6.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.47.mlp.experts.6.down_proj.weight": "pytorch_model-00075-of-00080.bin", - "model.layers.47.mlp.experts.6.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.47.mlp.experts.6.gate_proj.weight": "pytorch_model-00075-of-00080.bin", - "model.layers.47.mlp.experts.6.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.47.mlp.experts.6.up_proj.weight": "pytorch_model-00075-of-00080.bin", - "model.layers.47.mlp.experts.7.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.47.mlp.experts.7.down_proj.weight": "pytorch_model-00075-of-00080.bin", - "model.layers.47.mlp.experts.7.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.47.mlp.experts.7.gate_proj.weight": "pytorch_model-00075-of-00080.bin", - "model.layers.47.mlp.experts.7.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.47.mlp.experts.7.up_proj.weight": "pytorch_model-00075-of-00080.bin", - "model.layers.47.mlp.experts.8.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.47.mlp.experts.8.down_proj.weight": "pytorch_model-00075-of-00080.bin", - "model.layers.47.mlp.experts.8.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.47.mlp.experts.8.gate_proj.weight": "pytorch_model-00075-of-00080.bin", - "model.layers.47.mlp.experts.8.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.47.mlp.experts.8.up_proj.weight": "pytorch_model-00075-of-00080.bin", - "model.layers.47.mlp.experts.9.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.47.mlp.experts.9.down_proj.weight": "pytorch_model-00075-of-00080.bin", - "model.layers.47.mlp.experts.9.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.47.mlp.experts.9.gate_proj.weight": "pytorch_model-00075-of-00080.bin", - "model.layers.47.mlp.experts.9.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.47.mlp.experts.9.up_proj.weight": "pytorch_model-00075-of-00080.bin", - "model.layers.47.mlp.gate.wg.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.47.mlp.shared_mlp.down_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.47.mlp.shared_mlp.down_proj.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.47.mlp.shared_mlp.gate_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.47.mlp.shared_mlp.gate_proj.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.47.mlp.shared_mlp.up_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.47.mlp.shared_mlp.up_proj.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.47.post_attention_layernorm.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.47.self_attn.key_layernorm.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.47.self_attn.o_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.47.self_attn.o_proj.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.47.self_attn.q_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.47.self_attn.q_proj.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.47.self_attn.query_layernorm.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.48.input_layernorm.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.48.mlp.experts.0.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.48.mlp.experts.0.down_proj.weight": "pytorch_model-00016-of-00080.bin", - "model.layers.48.mlp.experts.0.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.48.mlp.experts.0.gate_proj.weight": "pytorch_model-00016-of-00080.bin", - "model.layers.48.mlp.experts.0.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.48.mlp.experts.0.up_proj.weight": "pytorch_model-00016-of-00080.bin", - "model.layers.48.mlp.experts.1.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.48.mlp.experts.1.down_proj.weight": "pytorch_model-00016-of-00080.bin", - "model.layers.48.mlp.experts.1.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.48.mlp.experts.1.gate_proj.weight": "pytorch_model-00016-of-00080.bin", - "model.layers.48.mlp.experts.1.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.48.mlp.experts.1.up_proj.weight": "pytorch_model-00016-of-00080.bin", - "model.layers.48.mlp.experts.10.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.48.mlp.experts.10.down_proj.weight": "pytorch_model-00016-of-00080.bin", - "model.layers.48.mlp.experts.10.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.48.mlp.experts.10.gate_proj.weight": "pytorch_model-00016-of-00080.bin", - "model.layers.48.mlp.experts.10.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.48.mlp.experts.10.up_proj.weight": "pytorch_model-00016-of-00080.bin", - "model.layers.48.mlp.experts.11.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.48.mlp.experts.11.down_proj.weight": "pytorch_model-00017-of-00080.bin", - "model.layers.48.mlp.experts.11.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.48.mlp.experts.11.gate_proj.weight": "pytorch_model-00017-of-00080.bin", - "model.layers.48.mlp.experts.11.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.48.mlp.experts.11.up_proj.weight": "pytorch_model-00017-of-00080.bin", - "model.layers.48.mlp.experts.12.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.48.mlp.experts.12.down_proj.weight": "pytorch_model-00017-of-00080.bin", - "model.layers.48.mlp.experts.12.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.48.mlp.experts.12.gate_proj.weight": "pytorch_model-00017-of-00080.bin", - "model.layers.48.mlp.experts.12.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.48.mlp.experts.12.up_proj.weight": "pytorch_model-00017-of-00080.bin", - "model.layers.48.mlp.experts.13.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.48.mlp.experts.13.down_proj.weight": "pytorch_model-00017-of-00080.bin", - "model.layers.48.mlp.experts.13.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.48.mlp.experts.13.gate_proj.weight": "pytorch_model-00017-of-00080.bin", - "model.layers.48.mlp.experts.13.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.48.mlp.experts.13.up_proj.weight": "pytorch_model-00017-of-00080.bin", - "model.layers.48.mlp.experts.14.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.48.mlp.experts.14.down_proj.weight": "pytorch_model-00017-of-00080.bin", - "model.layers.48.mlp.experts.14.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.48.mlp.experts.14.gate_proj.weight": "pytorch_model-00017-of-00080.bin", - "model.layers.48.mlp.experts.14.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.48.mlp.experts.14.up_proj.weight": "pytorch_model-00017-of-00080.bin", - "model.layers.48.mlp.experts.15.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.48.mlp.experts.15.down_proj.weight": "pytorch_model-00017-of-00080.bin", - "model.layers.48.mlp.experts.15.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.48.mlp.experts.15.gate_proj.weight": "pytorch_model-00017-of-00080.bin", - "model.layers.48.mlp.experts.15.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.48.mlp.experts.15.up_proj.weight": "pytorch_model-00017-of-00080.bin", - "model.layers.48.mlp.experts.2.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.48.mlp.experts.2.down_proj.weight": "pytorch_model-00016-of-00080.bin", - "model.layers.48.mlp.experts.2.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.48.mlp.experts.2.gate_proj.weight": "pytorch_model-00016-of-00080.bin", - "model.layers.48.mlp.experts.2.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.48.mlp.experts.2.up_proj.weight": "pytorch_model-00016-of-00080.bin", - "model.layers.48.mlp.experts.3.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.48.mlp.experts.3.down_proj.weight": "pytorch_model-00016-of-00080.bin", - "model.layers.48.mlp.experts.3.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.48.mlp.experts.3.gate_proj.weight": "pytorch_model-00016-of-00080.bin", - "model.layers.48.mlp.experts.3.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.48.mlp.experts.3.up_proj.weight": "pytorch_model-00016-of-00080.bin", - "model.layers.48.mlp.experts.4.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.48.mlp.experts.4.down_proj.weight": "pytorch_model-00016-of-00080.bin", - "model.layers.48.mlp.experts.4.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.48.mlp.experts.4.gate_proj.weight": "pytorch_model-00016-of-00080.bin", - "model.layers.48.mlp.experts.4.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.48.mlp.experts.4.up_proj.weight": "pytorch_model-00016-of-00080.bin", - "model.layers.48.mlp.experts.5.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.48.mlp.experts.5.down_proj.weight": "pytorch_model-00016-of-00080.bin", - "model.layers.48.mlp.experts.5.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.48.mlp.experts.5.gate_proj.weight": "pytorch_model-00016-of-00080.bin", - "model.layers.48.mlp.experts.5.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.48.mlp.experts.5.up_proj.weight": "pytorch_model-00016-of-00080.bin", - "model.layers.48.mlp.experts.6.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.48.mlp.experts.6.down_proj.weight": "pytorch_model-00016-of-00080.bin", - "model.layers.48.mlp.experts.6.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.48.mlp.experts.6.gate_proj.weight": "pytorch_model-00016-of-00080.bin", - "model.layers.48.mlp.experts.6.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.48.mlp.experts.6.up_proj.weight": "pytorch_model-00016-of-00080.bin", - "model.layers.48.mlp.experts.7.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.48.mlp.experts.7.down_proj.weight": "pytorch_model-00016-of-00080.bin", - "model.layers.48.mlp.experts.7.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.48.mlp.experts.7.gate_proj.weight": "pytorch_model-00016-of-00080.bin", - "model.layers.48.mlp.experts.7.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.48.mlp.experts.7.up_proj.weight": "pytorch_model-00016-of-00080.bin", - "model.layers.48.mlp.experts.8.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.48.mlp.experts.8.down_proj.weight": "pytorch_model-00016-of-00080.bin", - "model.layers.48.mlp.experts.8.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.48.mlp.experts.8.gate_proj.weight": "pytorch_model-00016-of-00080.bin", - "model.layers.48.mlp.experts.8.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.48.mlp.experts.8.up_proj.weight": "pytorch_model-00016-of-00080.bin", - "model.layers.48.mlp.experts.9.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.48.mlp.experts.9.down_proj.weight": "pytorch_model-00016-of-00080.bin", - "model.layers.48.mlp.experts.9.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.48.mlp.experts.9.gate_proj.weight": "pytorch_model-00016-of-00080.bin", - "model.layers.48.mlp.experts.9.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.48.mlp.experts.9.up_proj.weight": "pytorch_model-00016-of-00080.bin", - "model.layers.48.mlp.gate.wg.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.48.mlp.shared_mlp.down_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.48.mlp.shared_mlp.down_proj.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.48.mlp.shared_mlp.gate_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.48.mlp.shared_mlp.gate_proj.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.48.mlp.shared_mlp.up_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.48.mlp.shared_mlp.up_proj.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.48.post_attention_layernorm.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.48.self_attn.k_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.48.self_attn.k_proj.weight": "pytorch_model-00020-of-00080.bin", - "model.layers.48.self_attn.key_layernorm.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.48.self_attn.o_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.48.self_attn.o_proj.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.48.self_attn.q_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.48.self_attn.q_proj.weight": "pytorch_model-00020-of-00080.bin", - "model.layers.48.self_attn.query_layernorm.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.48.self_attn.v_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.48.self_attn.v_proj.weight": "pytorch_model-00020-of-00080.bin", - "model.layers.49.input_layernorm.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.49.mlp.experts.0.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.49.mlp.experts.0.down_proj.weight": "pytorch_model-00017-of-00080.bin", - "model.layers.49.mlp.experts.0.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.49.mlp.experts.0.gate_proj.weight": "pytorch_model-00017-of-00080.bin", - "model.layers.49.mlp.experts.0.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.49.mlp.experts.0.up_proj.weight": "pytorch_model-00017-of-00080.bin", - "model.layers.49.mlp.experts.1.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.49.mlp.experts.1.down_proj.weight": "pytorch_model-00017-of-00080.bin", - "model.layers.49.mlp.experts.1.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.49.mlp.experts.1.gate_proj.weight": "pytorch_model-00017-of-00080.bin", - "model.layers.49.mlp.experts.1.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.49.mlp.experts.1.up_proj.weight": "pytorch_model-00017-of-00080.bin", - "model.layers.49.mlp.experts.10.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.49.mlp.experts.10.down_proj.weight": "pytorch_model-00018-of-00080.bin", - "model.layers.49.mlp.experts.10.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.49.mlp.experts.10.gate_proj.weight": "pytorch_model-00018-of-00080.bin", - "model.layers.49.mlp.experts.10.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.49.mlp.experts.10.up_proj.weight": "pytorch_model-00018-of-00080.bin", - "model.layers.49.mlp.experts.11.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.49.mlp.experts.11.down_proj.weight": "pytorch_model-00018-of-00080.bin", - "model.layers.49.mlp.experts.11.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.49.mlp.experts.11.gate_proj.weight": "pytorch_model-00018-of-00080.bin", - "model.layers.49.mlp.experts.11.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.49.mlp.experts.11.up_proj.weight": "pytorch_model-00018-of-00080.bin", - "model.layers.49.mlp.experts.12.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.49.mlp.experts.12.down_proj.weight": "pytorch_model-00018-of-00080.bin", - "model.layers.49.mlp.experts.12.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.49.mlp.experts.12.gate_proj.weight": "pytorch_model-00018-of-00080.bin", - "model.layers.49.mlp.experts.12.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.49.mlp.experts.12.up_proj.weight": "pytorch_model-00018-of-00080.bin", - "model.layers.49.mlp.experts.13.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.49.mlp.experts.13.down_proj.weight": "pytorch_model-00018-of-00080.bin", - "model.layers.49.mlp.experts.13.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.49.mlp.experts.13.gate_proj.weight": "pytorch_model-00018-of-00080.bin", - "model.layers.49.mlp.experts.13.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.49.mlp.experts.13.up_proj.weight": "pytorch_model-00018-of-00080.bin", - "model.layers.49.mlp.experts.14.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.49.mlp.experts.14.down_proj.weight": "pytorch_model-00018-of-00080.bin", - "model.layers.49.mlp.experts.14.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.49.mlp.experts.14.gate_proj.weight": "pytorch_model-00018-of-00080.bin", - "model.layers.49.mlp.experts.14.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.49.mlp.experts.14.up_proj.weight": "pytorch_model-00018-of-00080.bin", - "model.layers.49.mlp.experts.15.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.49.mlp.experts.15.down_proj.weight": "pytorch_model-00018-of-00080.bin", - "model.layers.49.mlp.experts.15.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.49.mlp.experts.15.gate_proj.weight": "pytorch_model-00018-of-00080.bin", - "model.layers.49.mlp.experts.15.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.49.mlp.experts.15.up_proj.weight": "pytorch_model-00018-of-00080.bin", - "model.layers.49.mlp.experts.2.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.49.mlp.experts.2.down_proj.weight": "pytorch_model-00017-of-00080.bin", - "model.layers.49.mlp.experts.2.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.49.mlp.experts.2.gate_proj.weight": "pytorch_model-00017-of-00080.bin", - "model.layers.49.mlp.experts.2.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.49.mlp.experts.2.up_proj.weight": "pytorch_model-00017-of-00080.bin", - "model.layers.49.mlp.experts.3.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.49.mlp.experts.3.down_proj.weight": "pytorch_model-00017-of-00080.bin", - "model.layers.49.mlp.experts.3.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.49.mlp.experts.3.gate_proj.weight": "pytorch_model-00017-of-00080.bin", - "model.layers.49.mlp.experts.3.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.49.mlp.experts.3.up_proj.weight": "pytorch_model-00017-of-00080.bin", - "model.layers.49.mlp.experts.4.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.49.mlp.experts.4.down_proj.weight": "pytorch_model-00017-of-00080.bin", - "model.layers.49.mlp.experts.4.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.49.mlp.experts.4.gate_proj.weight": "pytorch_model-00017-of-00080.bin", - "model.layers.49.mlp.experts.4.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.49.mlp.experts.4.up_proj.weight": "pytorch_model-00017-of-00080.bin", - "model.layers.49.mlp.experts.5.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.49.mlp.experts.5.down_proj.weight": "pytorch_model-00017-of-00080.bin", - "model.layers.49.mlp.experts.5.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.49.mlp.experts.5.gate_proj.weight": "pytorch_model-00017-of-00080.bin", - "model.layers.49.mlp.experts.5.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.49.mlp.experts.5.up_proj.weight": "pytorch_model-00017-of-00080.bin", - "model.layers.49.mlp.experts.6.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.49.mlp.experts.6.down_proj.weight": "pytorch_model-00017-of-00080.bin", - "model.layers.49.mlp.experts.6.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.49.mlp.experts.6.gate_proj.weight": "pytorch_model-00017-of-00080.bin", - "model.layers.49.mlp.experts.6.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.49.mlp.experts.6.up_proj.weight": "pytorch_model-00017-of-00080.bin", - "model.layers.49.mlp.experts.7.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.49.mlp.experts.7.down_proj.weight": "pytorch_model-00017-of-00080.bin", - "model.layers.49.mlp.experts.7.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.49.mlp.experts.7.gate_proj.weight": "pytorch_model-00017-of-00080.bin", - "model.layers.49.mlp.experts.7.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.49.mlp.experts.7.up_proj.weight": "pytorch_model-00017-of-00080.bin", - "model.layers.49.mlp.experts.8.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.49.mlp.experts.8.down_proj.weight": "pytorch_model-00017-of-00080.bin", - "model.layers.49.mlp.experts.8.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.49.mlp.experts.8.gate_proj.weight": "pytorch_model-00017-of-00080.bin", - "model.layers.49.mlp.experts.8.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.49.mlp.experts.8.up_proj.weight": "pytorch_model-00017-of-00080.bin", - "model.layers.49.mlp.experts.9.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.49.mlp.experts.9.down_proj.weight": "pytorch_model-00018-of-00080.bin", - "model.layers.49.mlp.experts.9.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.49.mlp.experts.9.gate_proj.weight": "pytorch_model-00018-of-00080.bin", - "model.layers.49.mlp.experts.9.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.49.mlp.experts.9.up_proj.weight": "pytorch_model-00018-of-00080.bin", - "model.layers.49.mlp.gate.wg.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.49.mlp.shared_mlp.down_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.49.mlp.shared_mlp.down_proj.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.49.mlp.shared_mlp.gate_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.49.mlp.shared_mlp.gate_proj.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.49.mlp.shared_mlp.up_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.49.mlp.shared_mlp.up_proj.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.49.post_attention_layernorm.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.49.self_attn.key_layernorm.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.49.self_attn.o_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.49.self_attn.o_proj.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.49.self_attn.q_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.49.self_attn.q_proj.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.49.self_attn.query_layernorm.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.5.input_layernorm.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.5.mlp.experts.0.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.5.mlp.experts.0.down_proj.weight": "pytorch_model-00043-of-00080.bin", - "model.layers.5.mlp.experts.0.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.5.mlp.experts.0.gate_proj.weight": "pytorch_model-00043-of-00080.bin", - "model.layers.5.mlp.experts.0.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.5.mlp.experts.0.up_proj.weight": "pytorch_model-00043-of-00080.bin", - "model.layers.5.mlp.experts.1.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.5.mlp.experts.1.down_proj.weight": "pytorch_model-00043-of-00080.bin", - "model.layers.5.mlp.experts.1.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.5.mlp.experts.1.gate_proj.weight": "pytorch_model-00043-of-00080.bin", - "model.layers.5.mlp.experts.1.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.5.mlp.experts.1.up_proj.weight": "pytorch_model-00043-of-00080.bin", - "model.layers.5.mlp.experts.10.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.5.mlp.experts.10.down_proj.weight": "pytorch_model-00044-of-00080.bin", - "model.layers.5.mlp.experts.10.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.5.mlp.experts.10.gate_proj.weight": "pytorch_model-00044-of-00080.bin", - "model.layers.5.mlp.experts.10.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.5.mlp.experts.10.up_proj.weight": "pytorch_model-00044-of-00080.bin", - "model.layers.5.mlp.experts.11.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.5.mlp.experts.11.down_proj.weight": "pytorch_model-00044-of-00080.bin", - "model.layers.5.mlp.experts.11.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.5.mlp.experts.11.gate_proj.weight": "pytorch_model-00044-of-00080.bin", - "model.layers.5.mlp.experts.11.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.5.mlp.experts.11.up_proj.weight": "pytorch_model-00044-of-00080.bin", - "model.layers.5.mlp.experts.12.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.5.mlp.experts.12.down_proj.weight": "pytorch_model-00044-of-00080.bin", - "model.layers.5.mlp.experts.12.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.5.mlp.experts.12.gate_proj.weight": "pytorch_model-00044-of-00080.bin", - "model.layers.5.mlp.experts.12.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.5.mlp.experts.12.up_proj.weight": "pytorch_model-00044-of-00080.bin", - "model.layers.5.mlp.experts.13.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.5.mlp.experts.13.down_proj.weight": "pytorch_model-00044-of-00080.bin", - "model.layers.5.mlp.experts.13.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.5.mlp.experts.13.gate_proj.weight": "pytorch_model-00044-of-00080.bin", - "model.layers.5.mlp.experts.13.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.5.mlp.experts.13.up_proj.weight": "pytorch_model-00044-of-00080.bin", - "model.layers.5.mlp.experts.14.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.5.mlp.experts.14.down_proj.weight": "pytorch_model-00044-of-00080.bin", - "model.layers.5.mlp.experts.14.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.5.mlp.experts.14.gate_proj.weight": "pytorch_model-00044-of-00080.bin", - "model.layers.5.mlp.experts.14.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.5.mlp.experts.14.up_proj.weight": "pytorch_model-00044-of-00080.bin", - "model.layers.5.mlp.experts.15.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.5.mlp.experts.15.down_proj.weight": "pytorch_model-00044-of-00080.bin", - "model.layers.5.mlp.experts.15.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.5.mlp.experts.15.gate_proj.weight": "pytorch_model-00044-of-00080.bin", - "model.layers.5.mlp.experts.15.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.5.mlp.experts.15.up_proj.weight": "pytorch_model-00044-of-00080.bin", - "model.layers.5.mlp.experts.2.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.5.mlp.experts.2.down_proj.weight": "pytorch_model-00043-of-00080.bin", - "model.layers.5.mlp.experts.2.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.5.mlp.experts.2.gate_proj.weight": "pytorch_model-00043-of-00080.bin", - "model.layers.5.mlp.experts.2.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.5.mlp.experts.2.up_proj.weight": "pytorch_model-00043-of-00080.bin", - "model.layers.5.mlp.experts.3.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.5.mlp.experts.3.down_proj.weight": "pytorch_model-00043-of-00080.bin", - "model.layers.5.mlp.experts.3.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.5.mlp.experts.3.gate_proj.weight": "pytorch_model-00043-of-00080.bin", - "model.layers.5.mlp.experts.3.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.5.mlp.experts.3.up_proj.weight": "pytorch_model-00043-of-00080.bin", - "model.layers.5.mlp.experts.4.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.5.mlp.experts.4.down_proj.weight": "pytorch_model-00043-of-00080.bin", - "model.layers.5.mlp.experts.4.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.5.mlp.experts.4.gate_proj.weight": "pytorch_model-00043-of-00080.bin", - "model.layers.5.mlp.experts.4.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.5.mlp.experts.4.up_proj.weight": "pytorch_model-00043-of-00080.bin", - "model.layers.5.mlp.experts.5.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.5.mlp.experts.5.down_proj.weight": "pytorch_model-00043-of-00080.bin", - "model.layers.5.mlp.experts.5.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.5.mlp.experts.5.gate_proj.weight": "pytorch_model-00043-of-00080.bin", - "model.layers.5.mlp.experts.5.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.5.mlp.experts.5.up_proj.weight": "pytorch_model-00043-of-00080.bin", - "model.layers.5.mlp.experts.6.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.5.mlp.experts.6.down_proj.weight": "pytorch_model-00043-of-00080.bin", - "model.layers.5.mlp.experts.6.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.5.mlp.experts.6.gate_proj.weight": "pytorch_model-00043-of-00080.bin", - "model.layers.5.mlp.experts.6.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.5.mlp.experts.6.up_proj.weight": "pytorch_model-00043-of-00080.bin", - "model.layers.5.mlp.experts.7.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.5.mlp.experts.7.down_proj.weight": "pytorch_model-00044-of-00080.bin", - "model.layers.5.mlp.experts.7.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.5.mlp.experts.7.gate_proj.weight": "pytorch_model-00044-of-00080.bin", - "model.layers.5.mlp.experts.7.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.5.mlp.experts.7.up_proj.weight": "pytorch_model-00043-of-00080.bin", - "model.layers.5.mlp.experts.8.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.5.mlp.experts.8.down_proj.weight": "pytorch_model-00044-of-00080.bin", - "model.layers.5.mlp.experts.8.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.5.mlp.experts.8.gate_proj.weight": "pytorch_model-00044-of-00080.bin", - "model.layers.5.mlp.experts.8.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.5.mlp.experts.8.up_proj.weight": "pytorch_model-00044-of-00080.bin", - "model.layers.5.mlp.experts.9.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.5.mlp.experts.9.down_proj.weight": "pytorch_model-00044-of-00080.bin", - "model.layers.5.mlp.experts.9.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.5.mlp.experts.9.gate_proj.weight": "pytorch_model-00044-of-00080.bin", - "model.layers.5.mlp.experts.9.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.5.mlp.experts.9.up_proj.weight": "pytorch_model-00044-of-00080.bin", - "model.layers.5.mlp.gate.wg.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.5.mlp.shared_mlp.down_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.5.mlp.shared_mlp.down_proj.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.5.mlp.shared_mlp.gate_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.5.mlp.shared_mlp.gate_proj.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.5.mlp.shared_mlp.up_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.5.mlp.shared_mlp.up_proj.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.5.post_attention_layernorm.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.5.self_attn.key_layernorm.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.5.self_attn.o_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.5.self_attn.o_proj.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.5.self_attn.q_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.5.self_attn.q_proj.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.5.self_attn.query_layernorm.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.50.input_layernorm.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.50.mlp.experts.0.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.50.mlp.experts.0.down_proj.weight": "pytorch_model-00036-of-00080.bin", - "model.layers.50.mlp.experts.0.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.50.mlp.experts.0.gate_proj.weight": "pytorch_model-00036-of-00080.bin", - "model.layers.50.mlp.experts.0.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.50.mlp.experts.0.up_proj.weight": "pytorch_model-00036-of-00080.bin", - "model.layers.50.mlp.experts.1.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.50.mlp.experts.1.down_proj.weight": "pytorch_model-00036-of-00080.bin", - "model.layers.50.mlp.experts.1.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.50.mlp.experts.1.gate_proj.weight": "pytorch_model-00036-of-00080.bin", - "model.layers.50.mlp.experts.1.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.50.mlp.experts.1.up_proj.weight": "pytorch_model-00036-of-00080.bin", - "model.layers.50.mlp.experts.10.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.50.mlp.experts.10.down_proj.weight": "pytorch_model-00036-of-00080.bin", - "model.layers.50.mlp.experts.10.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.50.mlp.experts.10.gate_proj.weight": "pytorch_model-00036-of-00080.bin", - "model.layers.50.mlp.experts.10.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.50.mlp.experts.10.up_proj.weight": "pytorch_model-00036-of-00080.bin", - "model.layers.50.mlp.experts.11.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.50.mlp.experts.11.down_proj.weight": "pytorch_model-00036-of-00080.bin", - "model.layers.50.mlp.experts.11.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.50.mlp.experts.11.gate_proj.weight": "pytorch_model-00036-of-00080.bin", - "model.layers.50.mlp.experts.11.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.50.mlp.experts.11.up_proj.weight": "pytorch_model-00036-of-00080.bin", - "model.layers.50.mlp.experts.12.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.50.mlp.experts.12.down_proj.weight": "pytorch_model-00036-of-00080.bin", - "model.layers.50.mlp.experts.12.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.50.mlp.experts.12.gate_proj.weight": "pytorch_model-00036-of-00080.bin", - "model.layers.50.mlp.experts.12.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.50.mlp.experts.12.up_proj.weight": "pytorch_model-00036-of-00080.bin", - "model.layers.50.mlp.experts.13.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.50.mlp.experts.13.down_proj.weight": "pytorch_model-00037-of-00080.bin", - "model.layers.50.mlp.experts.13.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.50.mlp.experts.13.gate_proj.weight": "pytorch_model-00037-of-00080.bin", - "model.layers.50.mlp.experts.13.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.50.mlp.experts.13.up_proj.weight": "pytorch_model-00036-of-00080.bin", - "model.layers.50.mlp.experts.14.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.50.mlp.experts.14.down_proj.weight": "pytorch_model-00037-of-00080.bin", - "model.layers.50.mlp.experts.14.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.50.mlp.experts.14.gate_proj.weight": "pytorch_model-00037-of-00080.bin", - "model.layers.50.mlp.experts.14.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.50.mlp.experts.14.up_proj.weight": "pytorch_model-00037-of-00080.bin", - "model.layers.50.mlp.experts.15.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.50.mlp.experts.15.down_proj.weight": "pytorch_model-00037-of-00080.bin", - "model.layers.50.mlp.experts.15.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.50.mlp.experts.15.gate_proj.weight": "pytorch_model-00037-of-00080.bin", - "model.layers.50.mlp.experts.15.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.50.mlp.experts.15.up_proj.weight": "pytorch_model-00037-of-00080.bin", - "model.layers.50.mlp.experts.2.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.50.mlp.experts.2.down_proj.weight": "pytorch_model-00036-of-00080.bin", - "model.layers.50.mlp.experts.2.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.50.mlp.experts.2.gate_proj.weight": "pytorch_model-00036-of-00080.bin", - "model.layers.50.mlp.experts.2.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.50.mlp.experts.2.up_proj.weight": "pytorch_model-00036-of-00080.bin", - "model.layers.50.mlp.experts.3.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.50.mlp.experts.3.down_proj.weight": "pytorch_model-00036-of-00080.bin", - "model.layers.50.mlp.experts.3.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.50.mlp.experts.3.gate_proj.weight": "pytorch_model-00036-of-00080.bin", - "model.layers.50.mlp.experts.3.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.50.mlp.experts.3.up_proj.weight": "pytorch_model-00036-of-00080.bin", - "model.layers.50.mlp.experts.4.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.50.mlp.experts.4.down_proj.weight": "pytorch_model-00036-of-00080.bin", - "model.layers.50.mlp.experts.4.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.50.mlp.experts.4.gate_proj.weight": "pytorch_model-00036-of-00080.bin", - "model.layers.50.mlp.experts.4.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.50.mlp.experts.4.up_proj.weight": "pytorch_model-00036-of-00080.bin", - "model.layers.50.mlp.experts.5.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.50.mlp.experts.5.down_proj.weight": "pytorch_model-00036-of-00080.bin", - "model.layers.50.mlp.experts.5.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.50.mlp.experts.5.gate_proj.weight": "pytorch_model-00036-of-00080.bin", - "model.layers.50.mlp.experts.5.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.50.mlp.experts.5.up_proj.weight": "pytorch_model-00036-of-00080.bin", - "model.layers.50.mlp.experts.6.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.50.mlp.experts.6.down_proj.weight": "pytorch_model-00036-of-00080.bin", - "model.layers.50.mlp.experts.6.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.50.mlp.experts.6.gate_proj.weight": "pytorch_model-00036-of-00080.bin", - "model.layers.50.mlp.experts.6.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.50.mlp.experts.6.up_proj.weight": "pytorch_model-00036-of-00080.bin", - "model.layers.50.mlp.experts.7.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.50.mlp.experts.7.down_proj.weight": "pytorch_model-00036-of-00080.bin", - "model.layers.50.mlp.experts.7.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.50.mlp.experts.7.gate_proj.weight": "pytorch_model-00036-of-00080.bin", - "model.layers.50.mlp.experts.7.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.50.mlp.experts.7.up_proj.weight": "pytorch_model-00036-of-00080.bin", - "model.layers.50.mlp.experts.8.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.50.mlp.experts.8.down_proj.weight": "pytorch_model-00036-of-00080.bin", - "model.layers.50.mlp.experts.8.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.50.mlp.experts.8.gate_proj.weight": "pytorch_model-00036-of-00080.bin", - "model.layers.50.mlp.experts.8.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.50.mlp.experts.8.up_proj.weight": "pytorch_model-00036-of-00080.bin", - "model.layers.50.mlp.experts.9.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.50.mlp.experts.9.down_proj.weight": "pytorch_model-00036-of-00080.bin", - "model.layers.50.mlp.experts.9.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.50.mlp.experts.9.gate_proj.weight": "pytorch_model-00036-of-00080.bin", - "model.layers.50.mlp.experts.9.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.50.mlp.experts.9.up_proj.weight": "pytorch_model-00036-of-00080.bin", - "model.layers.50.mlp.gate.wg.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.50.mlp.shared_mlp.down_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.50.mlp.shared_mlp.down_proj.weight": "pytorch_model-00022-of-00080.bin", - "model.layers.50.mlp.shared_mlp.gate_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.50.mlp.shared_mlp.gate_proj.weight": "pytorch_model-00022-of-00080.bin", - "model.layers.50.mlp.shared_mlp.up_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.50.mlp.shared_mlp.up_proj.weight": "pytorch_model-00022-of-00080.bin", - "model.layers.50.post_attention_layernorm.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.50.self_attn.k_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.50.self_attn.k_proj.weight": "pytorch_model-00040-of-00080.bin", - "model.layers.50.self_attn.key_layernorm.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.50.self_attn.o_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.50.self_attn.o_proj.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.50.self_attn.q_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.50.self_attn.q_proj.weight": "pytorch_model-00040-of-00080.bin", - "model.layers.50.self_attn.query_layernorm.weight": "pytorch_model-00021-of-00080.bin", - "model.layers.50.self_attn.v_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.50.self_attn.v_proj.weight": "pytorch_model-00040-of-00080.bin", - "model.layers.51.input_layernorm.weight": "pytorch_model-00022-of-00080.bin", - "model.layers.51.mlp.experts.0.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.51.mlp.experts.0.down_proj.weight": "pytorch_model-00037-of-00080.bin", - "model.layers.51.mlp.experts.0.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.51.mlp.experts.0.gate_proj.weight": "pytorch_model-00037-of-00080.bin", - "model.layers.51.mlp.experts.0.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.51.mlp.experts.0.up_proj.weight": "pytorch_model-00037-of-00080.bin", - "model.layers.51.mlp.experts.1.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.51.mlp.experts.1.down_proj.weight": "pytorch_model-00037-of-00080.bin", - "model.layers.51.mlp.experts.1.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.51.mlp.experts.1.gate_proj.weight": "pytorch_model-00037-of-00080.bin", - "model.layers.51.mlp.experts.1.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.51.mlp.experts.1.up_proj.weight": "pytorch_model-00037-of-00080.bin", - "model.layers.51.mlp.experts.10.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.51.mlp.experts.10.down_proj.weight": "pytorch_model-00037-of-00080.bin", - "model.layers.51.mlp.experts.10.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.51.mlp.experts.10.gate_proj.weight": "pytorch_model-00037-of-00080.bin", - "model.layers.51.mlp.experts.10.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.51.mlp.experts.10.up_proj.weight": "pytorch_model-00037-of-00080.bin", - "model.layers.51.mlp.experts.11.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.51.mlp.experts.11.down_proj.weight": "pytorch_model-00038-of-00080.bin", - "model.layers.51.mlp.experts.11.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.51.mlp.experts.11.gate_proj.weight": "pytorch_model-00038-of-00080.bin", - "model.layers.51.mlp.experts.11.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.51.mlp.experts.11.up_proj.weight": "pytorch_model-00037-of-00080.bin", - "model.layers.51.mlp.experts.12.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.51.mlp.experts.12.down_proj.weight": "pytorch_model-00038-of-00080.bin", - "model.layers.51.mlp.experts.12.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.51.mlp.experts.12.gate_proj.weight": "pytorch_model-00038-of-00080.bin", - "model.layers.51.mlp.experts.12.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.51.mlp.experts.12.up_proj.weight": "pytorch_model-00038-of-00080.bin", - "model.layers.51.mlp.experts.13.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.51.mlp.experts.13.down_proj.weight": "pytorch_model-00038-of-00080.bin", - "model.layers.51.mlp.experts.13.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.51.mlp.experts.13.gate_proj.weight": "pytorch_model-00038-of-00080.bin", - "model.layers.51.mlp.experts.13.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.51.mlp.experts.13.up_proj.weight": "pytorch_model-00038-of-00080.bin", - "model.layers.51.mlp.experts.14.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.51.mlp.experts.14.down_proj.weight": "pytorch_model-00038-of-00080.bin", - "model.layers.51.mlp.experts.14.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.51.mlp.experts.14.gate_proj.weight": "pytorch_model-00038-of-00080.bin", - "model.layers.51.mlp.experts.14.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.51.mlp.experts.14.up_proj.weight": "pytorch_model-00038-of-00080.bin", - "model.layers.51.mlp.experts.15.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.51.mlp.experts.15.down_proj.weight": "pytorch_model-00038-of-00080.bin", - "model.layers.51.mlp.experts.15.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.51.mlp.experts.15.gate_proj.weight": "pytorch_model-00038-of-00080.bin", - "model.layers.51.mlp.experts.15.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.51.mlp.experts.15.up_proj.weight": "pytorch_model-00038-of-00080.bin", - "model.layers.51.mlp.experts.2.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.51.mlp.experts.2.down_proj.weight": "pytorch_model-00037-of-00080.bin", - "model.layers.51.mlp.experts.2.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.51.mlp.experts.2.gate_proj.weight": "pytorch_model-00037-of-00080.bin", - "model.layers.51.mlp.experts.2.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.51.mlp.experts.2.up_proj.weight": "pytorch_model-00037-of-00080.bin", - "model.layers.51.mlp.experts.3.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.51.mlp.experts.3.down_proj.weight": "pytorch_model-00037-of-00080.bin", - "model.layers.51.mlp.experts.3.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.51.mlp.experts.3.gate_proj.weight": "pytorch_model-00037-of-00080.bin", - "model.layers.51.mlp.experts.3.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.51.mlp.experts.3.up_proj.weight": "pytorch_model-00037-of-00080.bin", - "model.layers.51.mlp.experts.4.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.51.mlp.experts.4.down_proj.weight": "pytorch_model-00037-of-00080.bin", - "model.layers.51.mlp.experts.4.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.51.mlp.experts.4.gate_proj.weight": "pytorch_model-00037-of-00080.bin", - "model.layers.51.mlp.experts.4.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.51.mlp.experts.4.up_proj.weight": "pytorch_model-00037-of-00080.bin", - "model.layers.51.mlp.experts.5.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.51.mlp.experts.5.down_proj.weight": "pytorch_model-00037-of-00080.bin", - "model.layers.51.mlp.experts.5.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.51.mlp.experts.5.gate_proj.weight": "pytorch_model-00037-of-00080.bin", - "model.layers.51.mlp.experts.5.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.51.mlp.experts.5.up_proj.weight": "pytorch_model-00037-of-00080.bin", - "model.layers.51.mlp.experts.6.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.51.mlp.experts.6.down_proj.weight": "pytorch_model-00037-of-00080.bin", - "model.layers.51.mlp.experts.6.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.51.mlp.experts.6.gate_proj.weight": "pytorch_model-00037-of-00080.bin", - "model.layers.51.mlp.experts.6.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.51.mlp.experts.6.up_proj.weight": "pytorch_model-00037-of-00080.bin", - "model.layers.51.mlp.experts.7.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.51.mlp.experts.7.down_proj.weight": "pytorch_model-00037-of-00080.bin", - "model.layers.51.mlp.experts.7.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.51.mlp.experts.7.gate_proj.weight": "pytorch_model-00037-of-00080.bin", - "model.layers.51.mlp.experts.7.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.51.mlp.experts.7.up_proj.weight": "pytorch_model-00037-of-00080.bin", - "model.layers.51.mlp.experts.8.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.51.mlp.experts.8.down_proj.weight": "pytorch_model-00037-of-00080.bin", - "model.layers.51.mlp.experts.8.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.51.mlp.experts.8.gate_proj.weight": "pytorch_model-00037-of-00080.bin", - "model.layers.51.mlp.experts.8.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.51.mlp.experts.8.up_proj.weight": "pytorch_model-00037-of-00080.bin", - "model.layers.51.mlp.experts.9.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.51.mlp.experts.9.down_proj.weight": "pytorch_model-00037-of-00080.bin", - "model.layers.51.mlp.experts.9.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.51.mlp.experts.9.gate_proj.weight": "pytorch_model-00037-of-00080.bin", - "model.layers.51.mlp.experts.9.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.51.mlp.experts.9.up_proj.weight": "pytorch_model-00037-of-00080.bin", - "model.layers.51.mlp.gate.wg.weight": "pytorch_model-00022-of-00080.bin", - "model.layers.51.mlp.shared_mlp.down_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.51.mlp.shared_mlp.down_proj.weight": "pytorch_model-00022-of-00080.bin", - "model.layers.51.mlp.shared_mlp.gate_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.51.mlp.shared_mlp.gate_proj.weight": "pytorch_model-00022-of-00080.bin", - "model.layers.51.mlp.shared_mlp.up_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.51.mlp.shared_mlp.up_proj.weight": "pytorch_model-00022-of-00080.bin", - "model.layers.51.post_attention_layernorm.weight": "pytorch_model-00022-of-00080.bin", - "model.layers.51.self_attn.key_layernorm.weight": "pytorch_model-00022-of-00080.bin", - "model.layers.51.self_attn.o_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.51.self_attn.o_proj.weight": "pytorch_model-00022-of-00080.bin", - "model.layers.51.self_attn.q_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.51.self_attn.q_proj.weight": "pytorch_model-00022-of-00080.bin", - "model.layers.51.self_attn.query_layernorm.weight": "pytorch_model-00022-of-00080.bin", - "model.layers.52.input_layernorm.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.52.mlp.experts.0.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.52.mlp.experts.0.down_proj.weight": "pytorch_model-00056-of-00080.bin", - "model.layers.52.mlp.experts.0.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.52.mlp.experts.0.gate_proj.weight": "pytorch_model-00056-of-00080.bin", - "model.layers.52.mlp.experts.0.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.52.mlp.experts.0.up_proj.weight": "pytorch_model-00056-of-00080.bin", - "model.layers.52.mlp.experts.1.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.52.mlp.experts.1.down_proj.weight": "pytorch_model-00056-of-00080.bin", - "model.layers.52.mlp.experts.1.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.52.mlp.experts.1.gate_proj.weight": "pytorch_model-00056-of-00080.bin", - "model.layers.52.mlp.experts.1.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.52.mlp.experts.1.up_proj.weight": "pytorch_model-00056-of-00080.bin", - "model.layers.52.mlp.experts.10.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.52.mlp.experts.10.down_proj.weight": "pytorch_model-00056-of-00080.bin", - "model.layers.52.mlp.experts.10.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.52.mlp.experts.10.gate_proj.weight": "pytorch_model-00056-of-00080.bin", - "model.layers.52.mlp.experts.10.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.52.mlp.experts.10.up_proj.weight": "pytorch_model-00056-of-00080.bin", - "model.layers.52.mlp.experts.11.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.52.mlp.experts.11.down_proj.weight": "pytorch_model-00056-of-00080.bin", - "model.layers.52.mlp.experts.11.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.52.mlp.experts.11.gate_proj.weight": "pytorch_model-00056-of-00080.bin", - "model.layers.52.mlp.experts.11.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.52.mlp.experts.11.up_proj.weight": "pytorch_model-00056-of-00080.bin", - "model.layers.52.mlp.experts.12.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.52.mlp.experts.12.down_proj.weight": "pytorch_model-00056-of-00080.bin", - "model.layers.52.mlp.experts.12.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.52.mlp.experts.12.gate_proj.weight": "pytorch_model-00056-of-00080.bin", - "model.layers.52.mlp.experts.12.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.52.mlp.experts.12.up_proj.weight": "pytorch_model-00056-of-00080.bin", - "model.layers.52.mlp.experts.13.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.52.mlp.experts.13.down_proj.weight": "pytorch_model-00057-of-00080.bin", - "model.layers.52.mlp.experts.13.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.52.mlp.experts.13.gate_proj.weight": "pytorch_model-00057-of-00080.bin", - "model.layers.52.mlp.experts.13.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.52.mlp.experts.13.up_proj.weight": "pytorch_model-00056-of-00080.bin", - "model.layers.52.mlp.experts.14.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.52.mlp.experts.14.down_proj.weight": "pytorch_model-00057-of-00080.bin", - "model.layers.52.mlp.experts.14.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.52.mlp.experts.14.gate_proj.weight": "pytorch_model-00057-of-00080.bin", - "model.layers.52.mlp.experts.14.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.52.mlp.experts.14.up_proj.weight": "pytorch_model-00057-of-00080.bin", - "model.layers.52.mlp.experts.15.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.52.mlp.experts.15.down_proj.weight": "pytorch_model-00057-of-00080.bin", - "model.layers.52.mlp.experts.15.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.52.mlp.experts.15.gate_proj.weight": "pytorch_model-00057-of-00080.bin", - "model.layers.52.mlp.experts.15.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.52.mlp.experts.15.up_proj.weight": "pytorch_model-00057-of-00080.bin", - "model.layers.52.mlp.experts.2.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.52.mlp.experts.2.down_proj.weight": "pytorch_model-00056-of-00080.bin", - "model.layers.52.mlp.experts.2.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.52.mlp.experts.2.gate_proj.weight": "pytorch_model-00056-of-00080.bin", - "model.layers.52.mlp.experts.2.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.52.mlp.experts.2.up_proj.weight": "pytorch_model-00056-of-00080.bin", - "model.layers.52.mlp.experts.3.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.52.mlp.experts.3.down_proj.weight": "pytorch_model-00056-of-00080.bin", - "model.layers.52.mlp.experts.3.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.52.mlp.experts.3.gate_proj.weight": "pytorch_model-00056-of-00080.bin", - "model.layers.52.mlp.experts.3.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.52.mlp.experts.3.up_proj.weight": "pytorch_model-00056-of-00080.bin", - "model.layers.52.mlp.experts.4.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.52.mlp.experts.4.down_proj.weight": "pytorch_model-00056-of-00080.bin", - "model.layers.52.mlp.experts.4.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.52.mlp.experts.4.gate_proj.weight": "pytorch_model-00056-of-00080.bin", - "model.layers.52.mlp.experts.4.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.52.mlp.experts.4.up_proj.weight": "pytorch_model-00056-of-00080.bin", - "model.layers.52.mlp.experts.5.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.52.mlp.experts.5.down_proj.weight": "pytorch_model-00056-of-00080.bin", - "model.layers.52.mlp.experts.5.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.52.mlp.experts.5.gate_proj.weight": "pytorch_model-00056-of-00080.bin", - "model.layers.52.mlp.experts.5.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.52.mlp.experts.5.up_proj.weight": "pytorch_model-00056-of-00080.bin", - "model.layers.52.mlp.experts.6.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.52.mlp.experts.6.down_proj.weight": "pytorch_model-00056-of-00080.bin", - "model.layers.52.mlp.experts.6.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.52.mlp.experts.6.gate_proj.weight": "pytorch_model-00056-of-00080.bin", - "model.layers.52.mlp.experts.6.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.52.mlp.experts.6.up_proj.weight": "pytorch_model-00056-of-00080.bin", - "model.layers.52.mlp.experts.7.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.52.mlp.experts.7.down_proj.weight": "pytorch_model-00056-of-00080.bin", - "model.layers.52.mlp.experts.7.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.52.mlp.experts.7.gate_proj.weight": "pytorch_model-00056-of-00080.bin", - "model.layers.52.mlp.experts.7.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.52.mlp.experts.7.up_proj.weight": "pytorch_model-00056-of-00080.bin", - "model.layers.52.mlp.experts.8.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.52.mlp.experts.8.down_proj.weight": "pytorch_model-00056-of-00080.bin", - "model.layers.52.mlp.experts.8.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.52.mlp.experts.8.gate_proj.weight": "pytorch_model-00056-of-00080.bin", - "model.layers.52.mlp.experts.8.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.52.mlp.experts.8.up_proj.weight": "pytorch_model-00056-of-00080.bin", - "model.layers.52.mlp.experts.9.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.52.mlp.experts.9.down_proj.weight": "pytorch_model-00056-of-00080.bin", - "model.layers.52.mlp.experts.9.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.52.mlp.experts.9.gate_proj.weight": "pytorch_model-00056-of-00080.bin", - "model.layers.52.mlp.experts.9.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.52.mlp.experts.9.up_proj.weight": "pytorch_model-00056-of-00080.bin", - "model.layers.52.mlp.gate.wg.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.52.mlp.shared_mlp.down_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.52.mlp.shared_mlp.down_proj.weight": "pytorch_model-00042-of-00080.bin", - "model.layers.52.mlp.shared_mlp.gate_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.52.mlp.shared_mlp.gate_proj.weight": "pytorch_model-00042-of-00080.bin", - "model.layers.52.mlp.shared_mlp.up_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.52.mlp.shared_mlp.up_proj.weight": "pytorch_model-00042-of-00080.bin", - "model.layers.52.post_attention_layernorm.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.52.self_attn.k_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.52.self_attn.k_proj.weight": "pytorch_model-00060-of-00080.bin", - "model.layers.52.self_attn.key_layernorm.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.52.self_attn.o_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.52.self_attn.o_proj.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.52.self_attn.q_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.52.self_attn.q_proj.weight": "pytorch_model-00060-of-00080.bin", - "model.layers.52.self_attn.query_layernorm.weight": "pytorch_model-00041-of-00080.bin", - "model.layers.52.self_attn.v_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.52.self_attn.v_proj.weight": "pytorch_model-00060-of-00080.bin", - "model.layers.53.input_layernorm.weight": "pytorch_model-00042-of-00080.bin", - "model.layers.53.mlp.experts.0.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.53.mlp.experts.0.down_proj.weight": "pytorch_model-00057-of-00080.bin", - "model.layers.53.mlp.experts.0.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.53.mlp.experts.0.gate_proj.weight": "pytorch_model-00057-of-00080.bin", - "model.layers.53.mlp.experts.0.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.53.mlp.experts.0.up_proj.weight": "pytorch_model-00057-of-00080.bin", - "model.layers.53.mlp.experts.1.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.53.mlp.experts.1.down_proj.weight": "pytorch_model-00057-of-00080.bin", - "model.layers.53.mlp.experts.1.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.53.mlp.experts.1.gate_proj.weight": "pytorch_model-00057-of-00080.bin", - "model.layers.53.mlp.experts.1.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.53.mlp.experts.1.up_proj.weight": "pytorch_model-00057-of-00080.bin", - "model.layers.53.mlp.experts.10.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.53.mlp.experts.10.down_proj.weight": "pytorch_model-00057-of-00080.bin", - "model.layers.53.mlp.experts.10.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.53.mlp.experts.10.gate_proj.weight": "pytorch_model-00057-of-00080.bin", - "model.layers.53.mlp.experts.10.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.53.mlp.experts.10.up_proj.weight": "pytorch_model-00057-of-00080.bin", - "model.layers.53.mlp.experts.11.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.53.mlp.experts.11.down_proj.weight": "pytorch_model-00058-of-00080.bin", - "model.layers.53.mlp.experts.11.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.53.mlp.experts.11.gate_proj.weight": "pytorch_model-00058-of-00080.bin", - "model.layers.53.mlp.experts.11.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.53.mlp.experts.11.up_proj.weight": "pytorch_model-00057-of-00080.bin", - "model.layers.53.mlp.experts.12.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.53.mlp.experts.12.down_proj.weight": "pytorch_model-00058-of-00080.bin", - "model.layers.53.mlp.experts.12.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.53.mlp.experts.12.gate_proj.weight": "pytorch_model-00058-of-00080.bin", - "model.layers.53.mlp.experts.12.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.53.mlp.experts.12.up_proj.weight": "pytorch_model-00058-of-00080.bin", - "model.layers.53.mlp.experts.13.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.53.mlp.experts.13.down_proj.weight": "pytorch_model-00058-of-00080.bin", - "model.layers.53.mlp.experts.13.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.53.mlp.experts.13.gate_proj.weight": "pytorch_model-00058-of-00080.bin", - "model.layers.53.mlp.experts.13.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.53.mlp.experts.13.up_proj.weight": "pytorch_model-00058-of-00080.bin", - "model.layers.53.mlp.experts.14.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.53.mlp.experts.14.down_proj.weight": "pytorch_model-00058-of-00080.bin", - "model.layers.53.mlp.experts.14.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.53.mlp.experts.14.gate_proj.weight": "pytorch_model-00058-of-00080.bin", - "model.layers.53.mlp.experts.14.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.53.mlp.experts.14.up_proj.weight": "pytorch_model-00058-of-00080.bin", - "model.layers.53.mlp.experts.15.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.53.mlp.experts.15.down_proj.weight": "pytorch_model-00058-of-00080.bin", - "model.layers.53.mlp.experts.15.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.53.mlp.experts.15.gate_proj.weight": "pytorch_model-00058-of-00080.bin", - "model.layers.53.mlp.experts.15.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.53.mlp.experts.15.up_proj.weight": "pytorch_model-00058-of-00080.bin", - "model.layers.53.mlp.experts.2.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.53.mlp.experts.2.down_proj.weight": "pytorch_model-00057-of-00080.bin", - "model.layers.53.mlp.experts.2.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.53.mlp.experts.2.gate_proj.weight": "pytorch_model-00057-of-00080.bin", - "model.layers.53.mlp.experts.2.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.53.mlp.experts.2.up_proj.weight": "pytorch_model-00057-of-00080.bin", - "model.layers.53.mlp.experts.3.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.53.mlp.experts.3.down_proj.weight": "pytorch_model-00057-of-00080.bin", - "model.layers.53.mlp.experts.3.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.53.mlp.experts.3.gate_proj.weight": "pytorch_model-00057-of-00080.bin", - "model.layers.53.mlp.experts.3.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.53.mlp.experts.3.up_proj.weight": "pytorch_model-00057-of-00080.bin", - "model.layers.53.mlp.experts.4.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.53.mlp.experts.4.down_proj.weight": "pytorch_model-00057-of-00080.bin", - "model.layers.53.mlp.experts.4.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.53.mlp.experts.4.gate_proj.weight": "pytorch_model-00057-of-00080.bin", - "model.layers.53.mlp.experts.4.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.53.mlp.experts.4.up_proj.weight": "pytorch_model-00057-of-00080.bin", - "model.layers.53.mlp.experts.5.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.53.mlp.experts.5.down_proj.weight": "pytorch_model-00057-of-00080.bin", - "model.layers.53.mlp.experts.5.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.53.mlp.experts.5.gate_proj.weight": "pytorch_model-00057-of-00080.bin", - "model.layers.53.mlp.experts.5.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.53.mlp.experts.5.up_proj.weight": "pytorch_model-00057-of-00080.bin", - "model.layers.53.mlp.experts.6.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.53.mlp.experts.6.down_proj.weight": "pytorch_model-00057-of-00080.bin", - "model.layers.53.mlp.experts.6.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.53.mlp.experts.6.gate_proj.weight": "pytorch_model-00057-of-00080.bin", - "model.layers.53.mlp.experts.6.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.53.mlp.experts.6.up_proj.weight": "pytorch_model-00057-of-00080.bin", - "model.layers.53.mlp.experts.7.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.53.mlp.experts.7.down_proj.weight": "pytorch_model-00057-of-00080.bin", - "model.layers.53.mlp.experts.7.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.53.mlp.experts.7.gate_proj.weight": "pytorch_model-00057-of-00080.bin", - "model.layers.53.mlp.experts.7.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.53.mlp.experts.7.up_proj.weight": "pytorch_model-00057-of-00080.bin", - "model.layers.53.mlp.experts.8.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.53.mlp.experts.8.down_proj.weight": "pytorch_model-00057-of-00080.bin", - "model.layers.53.mlp.experts.8.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.53.mlp.experts.8.gate_proj.weight": "pytorch_model-00057-of-00080.bin", - "model.layers.53.mlp.experts.8.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.53.mlp.experts.8.up_proj.weight": "pytorch_model-00057-of-00080.bin", - "model.layers.53.mlp.experts.9.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.53.mlp.experts.9.down_proj.weight": "pytorch_model-00057-of-00080.bin", - "model.layers.53.mlp.experts.9.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.53.mlp.experts.9.gate_proj.weight": "pytorch_model-00057-of-00080.bin", - "model.layers.53.mlp.experts.9.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.53.mlp.experts.9.up_proj.weight": "pytorch_model-00057-of-00080.bin", - "model.layers.53.mlp.gate.wg.weight": "pytorch_model-00042-of-00080.bin", - "model.layers.53.mlp.shared_mlp.down_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.53.mlp.shared_mlp.down_proj.weight": "pytorch_model-00042-of-00080.bin", - "model.layers.53.mlp.shared_mlp.gate_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.53.mlp.shared_mlp.gate_proj.weight": "pytorch_model-00042-of-00080.bin", - "model.layers.53.mlp.shared_mlp.up_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.53.mlp.shared_mlp.up_proj.weight": "pytorch_model-00042-of-00080.bin", - "model.layers.53.post_attention_layernorm.weight": "pytorch_model-00042-of-00080.bin", - "model.layers.53.self_attn.key_layernorm.weight": "pytorch_model-00042-of-00080.bin", - "model.layers.53.self_attn.o_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.53.self_attn.o_proj.weight": "pytorch_model-00042-of-00080.bin", - "model.layers.53.self_attn.q_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.53.self_attn.q_proj.weight": "pytorch_model-00042-of-00080.bin", - "model.layers.53.self_attn.query_layernorm.weight": "pytorch_model-00042-of-00080.bin", - "model.layers.54.input_layernorm.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.54.mlp.experts.0.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.54.mlp.experts.0.down_proj.weight": "pytorch_model-00076-of-00080.bin", - "model.layers.54.mlp.experts.0.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.54.mlp.experts.0.gate_proj.weight": "pytorch_model-00076-of-00080.bin", - "model.layers.54.mlp.experts.0.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.54.mlp.experts.0.up_proj.weight": "pytorch_model-00076-of-00080.bin", - "model.layers.54.mlp.experts.1.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.54.mlp.experts.1.down_proj.weight": "pytorch_model-00076-of-00080.bin", - "model.layers.54.mlp.experts.1.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.54.mlp.experts.1.gate_proj.weight": "pytorch_model-00076-of-00080.bin", - "model.layers.54.mlp.experts.1.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.54.mlp.experts.1.up_proj.weight": "pytorch_model-00076-of-00080.bin", - "model.layers.54.mlp.experts.10.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.54.mlp.experts.10.down_proj.weight": "pytorch_model-00076-of-00080.bin", - "model.layers.54.mlp.experts.10.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.54.mlp.experts.10.gate_proj.weight": "pytorch_model-00076-of-00080.bin", - "model.layers.54.mlp.experts.10.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.54.mlp.experts.10.up_proj.weight": "pytorch_model-00076-of-00080.bin", - "model.layers.54.mlp.experts.11.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.54.mlp.experts.11.down_proj.weight": "pytorch_model-00077-of-00080.bin", - "model.layers.54.mlp.experts.11.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.54.mlp.experts.11.gate_proj.weight": "pytorch_model-00077-of-00080.bin", - "model.layers.54.mlp.experts.11.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.54.mlp.experts.11.up_proj.weight": "pytorch_model-00077-of-00080.bin", - "model.layers.54.mlp.experts.12.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.54.mlp.experts.12.down_proj.weight": "pytorch_model-00077-of-00080.bin", - "model.layers.54.mlp.experts.12.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.54.mlp.experts.12.gate_proj.weight": "pytorch_model-00077-of-00080.bin", - "model.layers.54.mlp.experts.12.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.54.mlp.experts.12.up_proj.weight": "pytorch_model-00077-of-00080.bin", - "model.layers.54.mlp.experts.13.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.54.mlp.experts.13.down_proj.weight": "pytorch_model-00077-of-00080.bin", - "model.layers.54.mlp.experts.13.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.54.mlp.experts.13.gate_proj.weight": "pytorch_model-00077-of-00080.bin", - "model.layers.54.mlp.experts.13.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.54.mlp.experts.13.up_proj.weight": "pytorch_model-00077-of-00080.bin", - "model.layers.54.mlp.experts.14.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.54.mlp.experts.14.down_proj.weight": "pytorch_model-00077-of-00080.bin", - "model.layers.54.mlp.experts.14.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.54.mlp.experts.14.gate_proj.weight": "pytorch_model-00077-of-00080.bin", - "model.layers.54.mlp.experts.14.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.54.mlp.experts.14.up_proj.weight": "pytorch_model-00077-of-00080.bin", - "model.layers.54.mlp.experts.15.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.54.mlp.experts.15.down_proj.weight": "pytorch_model-00077-of-00080.bin", - "model.layers.54.mlp.experts.15.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.54.mlp.experts.15.gate_proj.weight": "pytorch_model-00077-of-00080.bin", - "model.layers.54.mlp.experts.15.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.54.mlp.experts.15.up_proj.weight": "pytorch_model-00077-of-00080.bin", - "model.layers.54.mlp.experts.2.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.54.mlp.experts.2.down_proj.weight": "pytorch_model-00076-of-00080.bin", - "model.layers.54.mlp.experts.2.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.54.mlp.experts.2.gate_proj.weight": "pytorch_model-00076-of-00080.bin", - "model.layers.54.mlp.experts.2.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.54.mlp.experts.2.up_proj.weight": "pytorch_model-00076-of-00080.bin", - "model.layers.54.mlp.experts.3.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.54.mlp.experts.3.down_proj.weight": "pytorch_model-00076-of-00080.bin", - "model.layers.54.mlp.experts.3.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.54.mlp.experts.3.gate_proj.weight": "pytorch_model-00076-of-00080.bin", - "model.layers.54.mlp.experts.3.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.54.mlp.experts.3.up_proj.weight": "pytorch_model-00076-of-00080.bin", - "model.layers.54.mlp.experts.4.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.54.mlp.experts.4.down_proj.weight": "pytorch_model-00076-of-00080.bin", - "model.layers.54.mlp.experts.4.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.54.mlp.experts.4.gate_proj.weight": "pytorch_model-00076-of-00080.bin", - "model.layers.54.mlp.experts.4.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.54.mlp.experts.4.up_proj.weight": "pytorch_model-00076-of-00080.bin", - "model.layers.54.mlp.experts.5.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.54.mlp.experts.5.down_proj.weight": "pytorch_model-00076-of-00080.bin", - "model.layers.54.mlp.experts.5.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.54.mlp.experts.5.gate_proj.weight": "pytorch_model-00076-of-00080.bin", - "model.layers.54.mlp.experts.5.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.54.mlp.experts.5.up_proj.weight": "pytorch_model-00076-of-00080.bin", - "model.layers.54.mlp.experts.6.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.54.mlp.experts.6.down_proj.weight": "pytorch_model-00076-of-00080.bin", - "model.layers.54.mlp.experts.6.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.54.mlp.experts.6.gate_proj.weight": "pytorch_model-00076-of-00080.bin", - "model.layers.54.mlp.experts.6.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.54.mlp.experts.6.up_proj.weight": "pytorch_model-00076-of-00080.bin", - "model.layers.54.mlp.experts.7.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.54.mlp.experts.7.down_proj.weight": "pytorch_model-00076-of-00080.bin", - "model.layers.54.mlp.experts.7.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.54.mlp.experts.7.gate_proj.weight": "pytorch_model-00076-of-00080.bin", - "model.layers.54.mlp.experts.7.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.54.mlp.experts.7.up_proj.weight": "pytorch_model-00076-of-00080.bin", - "model.layers.54.mlp.experts.8.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.54.mlp.experts.8.down_proj.weight": "pytorch_model-00076-of-00080.bin", - "model.layers.54.mlp.experts.8.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.54.mlp.experts.8.gate_proj.weight": "pytorch_model-00076-of-00080.bin", - "model.layers.54.mlp.experts.8.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.54.mlp.experts.8.up_proj.weight": "pytorch_model-00076-of-00080.bin", - "model.layers.54.mlp.experts.9.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.54.mlp.experts.9.down_proj.weight": "pytorch_model-00076-of-00080.bin", - "model.layers.54.mlp.experts.9.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.54.mlp.experts.9.gate_proj.weight": "pytorch_model-00076-of-00080.bin", - "model.layers.54.mlp.experts.9.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.54.mlp.experts.9.up_proj.weight": "pytorch_model-00076-of-00080.bin", - "model.layers.54.mlp.gate.wg.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.54.mlp.shared_mlp.down_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.54.mlp.shared_mlp.down_proj.weight": "pytorch_model-00062-of-00080.bin", - "model.layers.54.mlp.shared_mlp.gate_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.54.mlp.shared_mlp.gate_proj.weight": "pytorch_model-00062-of-00080.bin", - "model.layers.54.mlp.shared_mlp.up_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.54.mlp.shared_mlp.up_proj.weight": "pytorch_model-00062-of-00080.bin", - "model.layers.54.post_attention_layernorm.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.54.self_attn.k_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.54.self_attn.k_proj.weight": "pytorch_model-00080-of-00080.bin", - "model.layers.54.self_attn.key_layernorm.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.54.self_attn.o_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.54.self_attn.o_proj.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.54.self_attn.q_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.54.self_attn.q_proj.weight": "pytorch_model-00080-of-00080.bin", - "model.layers.54.self_attn.query_layernorm.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.54.self_attn.v_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.54.self_attn.v_proj.weight": "pytorch_model-00080-of-00080.bin", - "model.layers.55.input_layernorm.weight": "pytorch_model-00062-of-00080.bin", - "model.layers.55.mlp.experts.0.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.55.mlp.experts.0.down_proj.weight": "pytorch_model-00077-of-00080.bin", - "model.layers.55.mlp.experts.0.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.55.mlp.experts.0.gate_proj.weight": "pytorch_model-00077-of-00080.bin", - "model.layers.55.mlp.experts.0.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.55.mlp.experts.0.up_proj.weight": "pytorch_model-00077-of-00080.bin", - "model.layers.55.mlp.experts.1.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.55.mlp.experts.1.down_proj.weight": "pytorch_model-00077-of-00080.bin", - "model.layers.55.mlp.experts.1.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.55.mlp.experts.1.gate_proj.weight": "pytorch_model-00077-of-00080.bin", - "model.layers.55.mlp.experts.1.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.55.mlp.experts.1.up_proj.weight": "pytorch_model-00077-of-00080.bin", - "model.layers.55.mlp.experts.10.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.55.mlp.experts.10.down_proj.weight": "pytorch_model-00078-of-00080.bin", - "model.layers.55.mlp.experts.10.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.55.mlp.experts.10.gate_proj.weight": "pytorch_model-00078-of-00080.bin", - "model.layers.55.mlp.experts.10.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.55.mlp.experts.10.up_proj.weight": "pytorch_model-00078-of-00080.bin", - "model.layers.55.mlp.experts.11.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.55.mlp.experts.11.down_proj.weight": "pytorch_model-00078-of-00080.bin", - "model.layers.55.mlp.experts.11.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.55.mlp.experts.11.gate_proj.weight": "pytorch_model-00078-of-00080.bin", - "model.layers.55.mlp.experts.11.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.55.mlp.experts.11.up_proj.weight": "pytorch_model-00078-of-00080.bin", - "model.layers.55.mlp.experts.12.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.55.mlp.experts.12.down_proj.weight": "pytorch_model-00078-of-00080.bin", - "model.layers.55.mlp.experts.12.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.55.mlp.experts.12.gate_proj.weight": "pytorch_model-00078-of-00080.bin", - "model.layers.55.mlp.experts.12.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.55.mlp.experts.12.up_proj.weight": "pytorch_model-00078-of-00080.bin", - "model.layers.55.mlp.experts.13.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.55.mlp.experts.13.down_proj.weight": "pytorch_model-00078-of-00080.bin", - "model.layers.55.mlp.experts.13.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.55.mlp.experts.13.gate_proj.weight": "pytorch_model-00078-of-00080.bin", - "model.layers.55.mlp.experts.13.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.55.mlp.experts.13.up_proj.weight": "pytorch_model-00078-of-00080.bin", - "model.layers.55.mlp.experts.14.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.55.mlp.experts.14.down_proj.weight": "pytorch_model-00078-of-00080.bin", - "model.layers.55.mlp.experts.14.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.55.mlp.experts.14.gate_proj.weight": "pytorch_model-00078-of-00080.bin", - "model.layers.55.mlp.experts.14.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.55.mlp.experts.14.up_proj.weight": "pytorch_model-00078-of-00080.bin", - "model.layers.55.mlp.experts.15.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.55.mlp.experts.15.down_proj.weight": "pytorch_model-00078-of-00080.bin", - "model.layers.55.mlp.experts.15.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.55.mlp.experts.15.gate_proj.weight": "pytorch_model-00078-of-00080.bin", - "model.layers.55.mlp.experts.15.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.55.mlp.experts.15.up_proj.weight": "pytorch_model-00078-of-00080.bin", - "model.layers.55.mlp.experts.2.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.55.mlp.experts.2.down_proj.weight": "pytorch_model-00077-of-00080.bin", - "model.layers.55.mlp.experts.2.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.55.mlp.experts.2.gate_proj.weight": "pytorch_model-00077-of-00080.bin", - "model.layers.55.mlp.experts.2.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.55.mlp.experts.2.up_proj.weight": "pytorch_model-00077-of-00080.bin", - "model.layers.55.mlp.experts.3.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.55.mlp.experts.3.down_proj.weight": "pytorch_model-00077-of-00080.bin", - "model.layers.55.mlp.experts.3.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.55.mlp.experts.3.gate_proj.weight": "pytorch_model-00077-of-00080.bin", - "model.layers.55.mlp.experts.3.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.55.mlp.experts.3.up_proj.weight": "pytorch_model-00077-of-00080.bin", - "model.layers.55.mlp.experts.4.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.55.mlp.experts.4.down_proj.weight": "pytorch_model-00077-of-00080.bin", - "model.layers.55.mlp.experts.4.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.55.mlp.experts.4.gate_proj.weight": "pytorch_model-00077-of-00080.bin", - "model.layers.55.mlp.experts.4.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.55.mlp.experts.4.up_proj.weight": "pytorch_model-00077-of-00080.bin", - "model.layers.55.mlp.experts.5.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.55.mlp.experts.5.down_proj.weight": "pytorch_model-00077-of-00080.bin", - "model.layers.55.mlp.experts.5.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.55.mlp.experts.5.gate_proj.weight": "pytorch_model-00077-of-00080.bin", - "model.layers.55.mlp.experts.5.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.55.mlp.experts.5.up_proj.weight": "pytorch_model-00077-of-00080.bin", - "model.layers.55.mlp.experts.6.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.55.mlp.experts.6.down_proj.weight": "pytorch_model-00077-of-00080.bin", - "model.layers.55.mlp.experts.6.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.55.mlp.experts.6.gate_proj.weight": "pytorch_model-00077-of-00080.bin", - "model.layers.55.mlp.experts.6.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.55.mlp.experts.6.up_proj.weight": "pytorch_model-00077-of-00080.bin", - "model.layers.55.mlp.experts.7.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.55.mlp.experts.7.down_proj.weight": "pytorch_model-00077-of-00080.bin", - "model.layers.55.mlp.experts.7.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.55.mlp.experts.7.gate_proj.weight": "pytorch_model-00077-of-00080.bin", - "model.layers.55.mlp.experts.7.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.55.mlp.experts.7.up_proj.weight": "pytorch_model-00077-of-00080.bin", - "model.layers.55.mlp.experts.8.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.55.mlp.experts.8.down_proj.weight": "pytorch_model-00077-of-00080.bin", - "model.layers.55.mlp.experts.8.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.55.mlp.experts.8.gate_proj.weight": "pytorch_model-00077-of-00080.bin", - "model.layers.55.mlp.experts.8.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.55.mlp.experts.8.up_proj.weight": "pytorch_model-00077-of-00080.bin", - "model.layers.55.mlp.experts.9.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.55.mlp.experts.9.down_proj.weight": "pytorch_model-00078-of-00080.bin", - "model.layers.55.mlp.experts.9.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.55.mlp.experts.9.gate_proj.weight": "pytorch_model-00078-of-00080.bin", - "model.layers.55.mlp.experts.9.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.55.mlp.experts.9.up_proj.weight": "pytorch_model-00078-of-00080.bin", - "model.layers.55.mlp.gate.wg.weight": "pytorch_model-00062-of-00080.bin", - "model.layers.55.mlp.shared_mlp.down_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.55.mlp.shared_mlp.down_proj.weight": "pytorch_model-00062-of-00080.bin", - "model.layers.55.mlp.shared_mlp.gate_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.55.mlp.shared_mlp.gate_proj.weight": "pytorch_model-00062-of-00080.bin", - "model.layers.55.mlp.shared_mlp.up_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.55.mlp.shared_mlp.up_proj.weight": "pytorch_model-00062-of-00080.bin", - "model.layers.55.post_attention_layernorm.weight": "pytorch_model-00062-of-00080.bin", - "model.layers.55.self_attn.key_layernorm.weight": "pytorch_model-00062-of-00080.bin", - "model.layers.55.self_attn.o_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.55.self_attn.o_proj.weight": "pytorch_model-00062-of-00080.bin", - "model.layers.55.self_attn.q_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.55.self_attn.q_proj.weight": "pytorch_model-00062-of-00080.bin", - "model.layers.55.self_attn.query_layernorm.weight": "pytorch_model-00062-of-00080.bin", - "model.layers.56.input_layernorm.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.56.mlp.experts.0.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.56.mlp.experts.0.down_proj.weight": "pytorch_model-00018-of-00080.bin", - "model.layers.56.mlp.experts.0.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.56.mlp.experts.0.gate_proj.weight": "pytorch_model-00018-of-00080.bin", - "model.layers.56.mlp.experts.0.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.56.mlp.experts.0.up_proj.weight": "pytorch_model-00018-of-00080.bin", - "model.layers.56.mlp.experts.1.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.56.mlp.experts.1.down_proj.weight": "pytorch_model-00018-of-00080.bin", - "model.layers.56.mlp.experts.1.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.56.mlp.experts.1.gate_proj.weight": "pytorch_model-00018-of-00080.bin", - "model.layers.56.mlp.experts.1.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.56.mlp.experts.1.up_proj.weight": "pytorch_model-00018-of-00080.bin", - "model.layers.56.mlp.experts.10.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.56.mlp.experts.10.down_proj.weight": "pytorch_model-00019-of-00080.bin", - "model.layers.56.mlp.experts.10.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.56.mlp.experts.10.gate_proj.weight": "pytorch_model-00019-of-00080.bin", - "model.layers.56.mlp.experts.10.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.56.mlp.experts.10.up_proj.weight": "pytorch_model-00019-of-00080.bin", - "model.layers.56.mlp.experts.11.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.56.mlp.experts.11.down_proj.weight": "pytorch_model-00019-of-00080.bin", - "model.layers.56.mlp.experts.11.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.56.mlp.experts.11.gate_proj.weight": "pytorch_model-00019-of-00080.bin", - "model.layers.56.mlp.experts.11.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.56.mlp.experts.11.up_proj.weight": "pytorch_model-00019-of-00080.bin", - "model.layers.56.mlp.experts.12.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.56.mlp.experts.12.down_proj.weight": "pytorch_model-00019-of-00080.bin", - "model.layers.56.mlp.experts.12.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.56.mlp.experts.12.gate_proj.weight": "pytorch_model-00019-of-00080.bin", - "model.layers.56.mlp.experts.12.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.56.mlp.experts.12.up_proj.weight": "pytorch_model-00019-of-00080.bin", - "model.layers.56.mlp.experts.13.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.56.mlp.experts.13.down_proj.weight": "pytorch_model-00019-of-00080.bin", - "model.layers.56.mlp.experts.13.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.56.mlp.experts.13.gate_proj.weight": "pytorch_model-00019-of-00080.bin", - "model.layers.56.mlp.experts.13.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.56.mlp.experts.13.up_proj.weight": "pytorch_model-00019-of-00080.bin", - "model.layers.56.mlp.experts.14.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.56.mlp.experts.14.down_proj.weight": "pytorch_model-00019-of-00080.bin", - "model.layers.56.mlp.experts.14.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.56.mlp.experts.14.gate_proj.weight": "pytorch_model-00019-of-00080.bin", - "model.layers.56.mlp.experts.14.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.56.mlp.experts.14.up_proj.weight": "pytorch_model-00019-of-00080.bin", - "model.layers.56.mlp.experts.15.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.56.mlp.experts.15.down_proj.weight": "pytorch_model-00019-of-00080.bin", - "model.layers.56.mlp.experts.15.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.56.mlp.experts.15.gate_proj.weight": "pytorch_model-00019-of-00080.bin", - "model.layers.56.mlp.experts.15.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.56.mlp.experts.15.up_proj.weight": "pytorch_model-00019-of-00080.bin", - "model.layers.56.mlp.experts.2.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.56.mlp.experts.2.down_proj.weight": "pytorch_model-00018-of-00080.bin", - "model.layers.56.mlp.experts.2.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.56.mlp.experts.2.gate_proj.weight": "pytorch_model-00018-of-00080.bin", - "model.layers.56.mlp.experts.2.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.56.mlp.experts.2.up_proj.weight": "pytorch_model-00018-of-00080.bin", - "model.layers.56.mlp.experts.3.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.56.mlp.experts.3.down_proj.weight": "pytorch_model-00018-of-00080.bin", - "model.layers.56.mlp.experts.3.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.56.mlp.experts.3.gate_proj.weight": "pytorch_model-00018-of-00080.bin", - "model.layers.56.mlp.experts.3.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.56.mlp.experts.3.up_proj.weight": "pytorch_model-00018-of-00080.bin", - "model.layers.56.mlp.experts.4.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.56.mlp.experts.4.down_proj.weight": "pytorch_model-00018-of-00080.bin", - "model.layers.56.mlp.experts.4.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.56.mlp.experts.4.gate_proj.weight": "pytorch_model-00018-of-00080.bin", - "model.layers.56.mlp.experts.4.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.56.mlp.experts.4.up_proj.weight": "pytorch_model-00018-of-00080.bin", - "model.layers.56.mlp.experts.5.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.56.mlp.experts.5.down_proj.weight": "pytorch_model-00018-of-00080.bin", - "model.layers.56.mlp.experts.5.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.56.mlp.experts.5.gate_proj.weight": "pytorch_model-00018-of-00080.bin", - "model.layers.56.mlp.experts.5.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.56.mlp.experts.5.up_proj.weight": "pytorch_model-00018-of-00080.bin", - "model.layers.56.mlp.experts.6.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.56.mlp.experts.6.down_proj.weight": "pytorch_model-00018-of-00080.bin", - "model.layers.56.mlp.experts.6.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.56.mlp.experts.6.gate_proj.weight": "pytorch_model-00018-of-00080.bin", - "model.layers.56.mlp.experts.6.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.56.mlp.experts.6.up_proj.weight": "pytorch_model-00018-of-00080.bin", - "model.layers.56.mlp.experts.7.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.56.mlp.experts.7.down_proj.weight": "pytorch_model-00019-of-00080.bin", - "model.layers.56.mlp.experts.7.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.56.mlp.experts.7.gate_proj.weight": "pytorch_model-00019-of-00080.bin", - "model.layers.56.mlp.experts.7.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.56.mlp.experts.7.up_proj.weight": "pytorch_model-00019-of-00080.bin", - "model.layers.56.mlp.experts.8.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.56.mlp.experts.8.down_proj.weight": "pytorch_model-00019-of-00080.bin", - "model.layers.56.mlp.experts.8.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.56.mlp.experts.8.gate_proj.weight": "pytorch_model-00019-of-00080.bin", - "model.layers.56.mlp.experts.8.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.56.mlp.experts.8.up_proj.weight": "pytorch_model-00019-of-00080.bin", - "model.layers.56.mlp.experts.9.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.56.mlp.experts.9.down_proj.weight": "pytorch_model-00019-of-00080.bin", - "model.layers.56.mlp.experts.9.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.56.mlp.experts.9.gate_proj.weight": "pytorch_model-00019-of-00080.bin", - "model.layers.56.mlp.experts.9.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.56.mlp.experts.9.up_proj.weight": "pytorch_model-00019-of-00080.bin", - "model.layers.56.mlp.gate.wg.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.56.mlp.shared_mlp.down_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.56.mlp.shared_mlp.down_proj.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.56.mlp.shared_mlp.gate_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.56.mlp.shared_mlp.gate_proj.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.56.mlp.shared_mlp.up_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.56.mlp.shared_mlp.up_proj.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.56.post_attention_layernorm.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.56.self_attn.k_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.56.self_attn.k_proj.weight": "pytorch_model-00020-of-00080.bin", - "model.layers.56.self_attn.key_layernorm.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.56.self_attn.o_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.56.self_attn.o_proj.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.56.self_attn.q_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.56.self_attn.q_proj.weight": "pytorch_model-00020-of-00080.bin", - "model.layers.56.self_attn.query_layernorm.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.56.self_attn.v_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.56.self_attn.v_proj.weight": "pytorch_model-00020-of-00080.bin", - "model.layers.57.input_layernorm.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.57.mlp.experts.0.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.0.down_proj.weight": "pytorch_model-00019-of-00080.bin", - "model.layers.57.mlp.experts.0.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.0.gate_proj.weight": "pytorch_model-00019-of-00080.bin", - "model.layers.57.mlp.experts.0.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.0.up_proj.weight": "pytorch_model-00019-of-00080.bin", - "model.layers.57.mlp.experts.1.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.1.down_proj.weight": "pytorch_model-00019-of-00080.bin", - "model.layers.57.mlp.experts.1.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.1.gate_proj.weight": "pytorch_model-00019-of-00080.bin", - "model.layers.57.mlp.experts.1.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.1.up_proj.weight": "pytorch_model-00019-of-00080.bin", - "model.layers.57.mlp.experts.10.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.10.down_proj.weight": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.10.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.10.gate_proj.weight": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.10.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.10.up_proj.weight": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.11.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.11.down_proj.weight": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.11.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.11.gate_proj.weight": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.11.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.11.up_proj.weight": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.12.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.12.down_proj.weight": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.12.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.12.gate_proj.weight": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.12.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.12.up_proj.weight": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.13.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.13.down_proj.weight": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.13.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.13.gate_proj.weight": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.13.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.13.up_proj.weight": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.14.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.14.down_proj.weight": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.14.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.14.gate_proj.weight": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.14.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.14.up_proj.weight": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.15.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.15.down_proj.weight": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.15.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.15.gate_proj.weight": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.15.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.15.up_proj.weight": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.2.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.2.down_proj.weight": "pytorch_model-00019-of-00080.bin", - "model.layers.57.mlp.experts.2.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.2.gate_proj.weight": "pytorch_model-00019-of-00080.bin", - "model.layers.57.mlp.experts.2.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.2.up_proj.weight": "pytorch_model-00019-of-00080.bin", - "model.layers.57.mlp.experts.3.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.3.down_proj.weight": "pytorch_model-00019-of-00080.bin", - "model.layers.57.mlp.experts.3.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.3.gate_proj.weight": "pytorch_model-00019-of-00080.bin", - "model.layers.57.mlp.experts.3.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.3.up_proj.weight": "pytorch_model-00019-of-00080.bin", - "model.layers.57.mlp.experts.4.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.4.down_proj.weight": "pytorch_model-00019-of-00080.bin", - "model.layers.57.mlp.experts.4.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.4.gate_proj.weight": "pytorch_model-00019-of-00080.bin", - "model.layers.57.mlp.experts.4.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.4.up_proj.weight": "pytorch_model-00019-of-00080.bin", - "model.layers.57.mlp.experts.5.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.5.down_proj.weight": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.5.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.5.gate_proj.weight": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.5.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.5.up_proj.weight": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.6.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.6.down_proj.weight": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.6.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.6.gate_proj.weight": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.6.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.6.up_proj.weight": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.7.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.7.down_proj.weight": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.7.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.7.gate_proj.weight": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.7.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.7.up_proj.weight": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.8.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.8.down_proj.weight": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.8.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.8.gate_proj.weight": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.8.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.8.up_proj.weight": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.9.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.9.down_proj.weight": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.9.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.9.gate_proj.weight": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.9.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.experts.9.up_proj.weight": "pytorch_model-00020-of-00080.bin", - "model.layers.57.mlp.gate.wg.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.57.mlp.shared_mlp.down_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.57.mlp.shared_mlp.down_proj.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.57.mlp.shared_mlp.gate_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.57.mlp.shared_mlp.gate_proj.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.57.mlp.shared_mlp.up_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.57.mlp.shared_mlp.up_proj.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.57.post_attention_layernorm.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.57.self_attn.key_layernorm.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.57.self_attn.o_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.57.self_attn.o_proj.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.57.self_attn.q_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.57.self_attn.q_proj.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.57.self_attn.query_layernorm.weight": "pytorch_model-00002-of-00080.bin", - "model.layers.58.input_layernorm.weight": "pytorch_model-00022-of-00080.bin", - "model.layers.58.mlp.experts.0.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.58.mlp.experts.0.down_proj.weight": "pytorch_model-00038-of-00080.bin", - "model.layers.58.mlp.experts.0.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.58.mlp.experts.0.gate_proj.weight": "pytorch_model-00038-of-00080.bin", - "model.layers.58.mlp.experts.0.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.58.mlp.experts.0.up_proj.weight": "pytorch_model-00038-of-00080.bin", - "model.layers.58.mlp.experts.1.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.58.mlp.experts.1.down_proj.weight": "pytorch_model-00038-of-00080.bin", - "model.layers.58.mlp.experts.1.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.58.mlp.experts.1.gate_proj.weight": "pytorch_model-00038-of-00080.bin", - "model.layers.58.mlp.experts.1.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.58.mlp.experts.1.up_proj.weight": "pytorch_model-00038-of-00080.bin", - "model.layers.58.mlp.experts.10.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.58.mlp.experts.10.down_proj.weight": "pytorch_model-00039-of-00080.bin", - "model.layers.58.mlp.experts.10.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.58.mlp.experts.10.gate_proj.weight": "pytorch_model-00039-of-00080.bin", - "model.layers.58.mlp.experts.10.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.58.mlp.experts.10.up_proj.weight": "pytorch_model-00039-of-00080.bin", - "model.layers.58.mlp.experts.11.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.58.mlp.experts.11.down_proj.weight": "pytorch_model-00039-of-00080.bin", - "model.layers.58.mlp.experts.11.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.58.mlp.experts.11.gate_proj.weight": "pytorch_model-00039-of-00080.bin", - "model.layers.58.mlp.experts.11.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.58.mlp.experts.11.up_proj.weight": "pytorch_model-00039-of-00080.bin", - "model.layers.58.mlp.experts.12.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.58.mlp.experts.12.down_proj.weight": "pytorch_model-00039-of-00080.bin", - "model.layers.58.mlp.experts.12.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.58.mlp.experts.12.gate_proj.weight": "pytorch_model-00039-of-00080.bin", - "model.layers.58.mlp.experts.12.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.58.mlp.experts.12.up_proj.weight": "pytorch_model-00039-of-00080.bin", - "model.layers.58.mlp.experts.13.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.58.mlp.experts.13.down_proj.weight": "pytorch_model-00039-of-00080.bin", - "model.layers.58.mlp.experts.13.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.58.mlp.experts.13.gate_proj.weight": "pytorch_model-00039-of-00080.bin", - "model.layers.58.mlp.experts.13.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.58.mlp.experts.13.up_proj.weight": "pytorch_model-00039-of-00080.bin", - "model.layers.58.mlp.experts.14.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.58.mlp.experts.14.down_proj.weight": "pytorch_model-00039-of-00080.bin", - "model.layers.58.mlp.experts.14.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.58.mlp.experts.14.gate_proj.weight": "pytorch_model-00039-of-00080.bin", - "model.layers.58.mlp.experts.14.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.58.mlp.experts.14.up_proj.weight": "pytorch_model-00039-of-00080.bin", - "model.layers.58.mlp.experts.15.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.58.mlp.experts.15.down_proj.weight": "pytorch_model-00039-of-00080.bin", - "model.layers.58.mlp.experts.15.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.58.mlp.experts.15.gate_proj.weight": "pytorch_model-00039-of-00080.bin", - "model.layers.58.mlp.experts.15.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.58.mlp.experts.15.up_proj.weight": "pytorch_model-00039-of-00080.bin", - "model.layers.58.mlp.experts.2.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.58.mlp.experts.2.down_proj.weight": "pytorch_model-00038-of-00080.bin", - "model.layers.58.mlp.experts.2.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.58.mlp.experts.2.gate_proj.weight": "pytorch_model-00038-of-00080.bin", - "model.layers.58.mlp.experts.2.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.58.mlp.experts.2.up_proj.weight": "pytorch_model-00038-of-00080.bin", - "model.layers.58.mlp.experts.3.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.58.mlp.experts.3.down_proj.weight": "pytorch_model-00038-of-00080.bin", - "model.layers.58.mlp.experts.3.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.58.mlp.experts.3.gate_proj.weight": "pytorch_model-00038-of-00080.bin", - "model.layers.58.mlp.experts.3.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.58.mlp.experts.3.up_proj.weight": "pytorch_model-00038-of-00080.bin", - "model.layers.58.mlp.experts.4.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.58.mlp.experts.4.down_proj.weight": "pytorch_model-00038-of-00080.bin", - "model.layers.58.mlp.experts.4.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.58.mlp.experts.4.gate_proj.weight": "pytorch_model-00038-of-00080.bin", - "model.layers.58.mlp.experts.4.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.58.mlp.experts.4.up_proj.weight": "pytorch_model-00038-of-00080.bin", - "model.layers.58.mlp.experts.5.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.58.mlp.experts.5.down_proj.weight": "pytorch_model-00038-of-00080.bin", - "model.layers.58.mlp.experts.5.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.58.mlp.experts.5.gate_proj.weight": "pytorch_model-00038-of-00080.bin", - "model.layers.58.mlp.experts.5.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.58.mlp.experts.5.up_proj.weight": "pytorch_model-00038-of-00080.bin", - "model.layers.58.mlp.experts.6.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.58.mlp.experts.6.down_proj.weight": "pytorch_model-00038-of-00080.bin", - "model.layers.58.mlp.experts.6.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.58.mlp.experts.6.gate_proj.weight": "pytorch_model-00038-of-00080.bin", - "model.layers.58.mlp.experts.6.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.58.mlp.experts.6.up_proj.weight": "pytorch_model-00038-of-00080.bin", - "model.layers.58.mlp.experts.7.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.58.mlp.experts.7.down_proj.weight": "pytorch_model-00038-of-00080.bin", - "model.layers.58.mlp.experts.7.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.58.mlp.experts.7.gate_proj.weight": "pytorch_model-00038-of-00080.bin", - "model.layers.58.mlp.experts.7.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.58.mlp.experts.7.up_proj.weight": "pytorch_model-00038-of-00080.bin", - "model.layers.58.mlp.experts.8.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.58.mlp.experts.8.down_proj.weight": "pytorch_model-00038-of-00080.bin", - "model.layers.58.mlp.experts.8.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.58.mlp.experts.8.gate_proj.weight": "pytorch_model-00038-of-00080.bin", - "model.layers.58.mlp.experts.8.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.58.mlp.experts.8.up_proj.weight": "pytorch_model-00038-of-00080.bin", - "model.layers.58.mlp.experts.9.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.58.mlp.experts.9.down_proj.weight": "pytorch_model-00039-of-00080.bin", - "model.layers.58.mlp.experts.9.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.58.mlp.experts.9.gate_proj.weight": "pytorch_model-00039-of-00080.bin", - "model.layers.58.mlp.experts.9.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.58.mlp.experts.9.up_proj.weight": "pytorch_model-00038-of-00080.bin", - "model.layers.58.mlp.gate.wg.weight": "pytorch_model-00022-of-00080.bin", - "model.layers.58.mlp.shared_mlp.down_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.58.mlp.shared_mlp.down_proj.weight": "pytorch_model-00022-of-00080.bin", - "model.layers.58.mlp.shared_mlp.gate_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.58.mlp.shared_mlp.gate_proj.weight": "pytorch_model-00022-of-00080.bin", - "model.layers.58.mlp.shared_mlp.up_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.58.mlp.shared_mlp.up_proj.weight": "pytorch_model-00022-of-00080.bin", - "model.layers.58.post_attention_layernorm.weight": "pytorch_model-00022-of-00080.bin", - "model.layers.58.self_attn.k_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.58.self_attn.k_proj.weight": "pytorch_model-00040-of-00080.bin", - "model.layers.58.self_attn.key_layernorm.weight": "pytorch_model-00022-of-00080.bin", - "model.layers.58.self_attn.o_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.58.self_attn.o_proj.weight": "pytorch_model-00022-of-00080.bin", - "model.layers.58.self_attn.q_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.58.self_attn.q_proj.weight": "pytorch_model-00040-of-00080.bin", - "model.layers.58.self_attn.query_layernorm.weight": "pytorch_model-00022-of-00080.bin", - "model.layers.58.self_attn.v_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.58.self_attn.v_proj.weight": "pytorch_model-00040-of-00080.bin", - "model.layers.59.input_layernorm.weight": "pytorch_model-00022-of-00080.bin", - "model.layers.59.mlp.experts.0.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.0.down_proj.weight": "pytorch_model-00039-of-00080.bin", - "model.layers.59.mlp.experts.0.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.0.gate_proj.weight": "pytorch_model-00039-of-00080.bin", - "model.layers.59.mlp.experts.0.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.0.up_proj.weight": "pytorch_model-00039-of-00080.bin", - "model.layers.59.mlp.experts.1.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.1.down_proj.weight": "pytorch_model-00039-of-00080.bin", - "model.layers.59.mlp.experts.1.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.1.gate_proj.weight": "pytorch_model-00039-of-00080.bin", - "model.layers.59.mlp.experts.1.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.1.up_proj.weight": "pytorch_model-00039-of-00080.bin", - "model.layers.59.mlp.experts.10.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.10.down_proj.weight": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.10.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.10.gate_proj.weight": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.10.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.10.up_proj.weight": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.11.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.11.down_proj.weight": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.11.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.11.gate_proj.weight": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.11.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.11.up_proj.weight": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.12.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.12.down_proj.weight": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.12.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.12.gate_proj.weight": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.12.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.12.up_proj.weight": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.13.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.13.down_proj.weight": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.13.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.13.gate_proj.weight": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.13.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.13.up_proj.weight": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.14.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.14.down_proj.weight": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.14.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.14.gate_proj.weight": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.14.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.14.up_proj.weight": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.15.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.15.down_proj.weight": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.15.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.15.gate_proj.weight": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.15.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.15.up_proj.weight": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.2.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.2.down_proj.weight": "pytorch_model-00039-of-00080.bin", - "model.layers.59.mlp.experts.2.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.2.gate_proj.weight": "pytorch_model-00039-of-00080.bin", - "model.layers.59.mlp.experts.2.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.2.up_proj.weight": "pytorch_model-00039-of-00080.bin", - "model.layers.59.mlp.experts.3.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.3.down_proj.weight": "pytorch_model-00039-of-00080.bin", - "model.layers.59.mlp.experts.3.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.3.gate_proj.weight": "pytorch_model-00039-of-00080.bin", - "model.layers.59.mlp.experts.3.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.3.up_proj.weight": "pytorch_model-00039-of-00080.bin", - "model.layers.59.mlp.experts.4.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.4.down_proj.weight": "pytorch_model-00039-of-00080.bin", - "model.layers.59.mlp.experts.4.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.4.gate_proj.weight": "pytorch_model-00039-of-00080.bin", - "model.layers.59.mlp.experts.4.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.4.up_proj.weight": "pytorch_model-00039-of-00080.bin", - "model.layers.59.mlp.experts.5.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.5.down_proj.weight": "pytorch_model-00039-of-00080.bin", - "model.layers.59.mlp.experts.5.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.5.gate_proj.weight": "pytorch_model-00039-of-00080.bin", - "model.layers.59.mlp.experts.5.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.5.up_proj.weight": "pytorch_model-00039-of-00080.bin", - "model.layers.59.mlp.experts.6.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.6.down_proj.weight": "pytorch_model-00039-of-00080.bin", - "model.layers.59.mlp.experts.6.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.6.gate_proj.weight": "pytorch_model-00039-of-00080.bin", - "model.layers.59.mlp.experts.6.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.6.up_proj.weight": "pytorch_model-00039-of-00080.bin", - "model.layers.59.mlp.experts.7.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.7.down_proj.weight": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.7.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.7.gate_proj.weight": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.7.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.7.up_proj.weight": "pytorch_model-00039-of-00080.bin", - "model.layers.59.mlp.experts.8.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.8.down_proj.weight": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.8.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.8.gate_proj.weight": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.8.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.8.up_proj.weight": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.9.down_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.9.down_proj.weight": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.9.gate_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.9.gate_proj.weight": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.9.up_proj.bias": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.experts.9.up_proj.weight": "pytorch_model-00040-of-00080.bin", - "model.layers.59.mlp.gate.wg.weight": "pytorch_model-00022-of-00080.bin", - "model.layers.59.mlp.shared_mlp.down_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.59.mlp.shared_mlp.down_proj.weight": "pytorch_model-00022-of-00080.bin", - "model.layers.59.mlp.shared_mlp.gate_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.59.mlp.shared_mlp.gate_proj.weight": "pytorch_model-00022-of-00080.bin", - "model.layers.59.mlp.shared_mlp.up_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.59.mlp.shared_mlp.up_proj.weight": "pytorch_model-00022-of-00080.bin", - "model.layers.59.post_attention_layernorm.weight": "pytorch_model-00022-of-00080.bin", - "model.layers.59.self_attn.key_layernorm.weight": "pytorch_model-00022-of-00080.bin", - "model.layers.59.self_attn.o_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.59.self_attn.o_proj.weight": "pytorch_model-00022-of-00080.bin", - "model.layers.59.self_attn.q_proj.bias": "pytorch_model-00022-of-00080.bin", - "model.layers.59.self_attn.q_proj.weight": "pytorch_model-00022-of-00080.bin", - "model.layers.59.self_attn.query_layernorm.weight": "pytorch_model-00022-of-00080.bin", - "model.layers.6.input_layernorm.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.6.mlp.experts.0.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.6.mlp.experts.0.down_proj.weight": "pytorch_model-00062-of-00080.bin", - "model.layers.6.mlp.experts.0.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.6.mlp.experts.0.gate_proj.weight": "pytorch_model-00062-of-00080.bin", - "model.layers.6.mlp.experts.0.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.6.mlp.experts.0.up_proj.weight": "pytorch_model-00062-of-00080.bin", - "model.layers.6.mlp.experts.1.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.6.mlp.experts.1.down_proj.weight": "pytorch_model-00062-of-00080.bin", - "model.layers.6.mlp.experts.1.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.6.mlp.experts.1.gate_proj.weight": "pytorch_model-00062-of-00080.bin", - "model.layers.6.mlp.experts.1.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.6.mlp.experts.1.up_proj.weight": "pytorch_model-00062-of-00080.bin", - "model.layers.6.mlp.experts.10.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.6.mlp.experts.10.down_proj.weight": "pytorch_model-00063-of-00080.bin", - "model.layers.6.mlp.experts.10.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.6.mlp.experts.10.gate_proj.weight": "pytorch_model-00063-of-00080.bin", - "model.layers.6.mlp.experts.10.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.6.mlp.experts.10.up_proj.weight": "pytorch_model-00063-of-00080.bin", - "model.layers.6.mlp.experts.11.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.6.mlp.experts.11.down_proj.weight": "pytorch_model-00063-of-00080.bin", - "model.layers.6.mlp.experts.11.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.6.mlp.experts.11.gate_proj.weight": "pytorch_model-00063-of-00080.bin", - "model.layers.6.mlp.experts.11.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.6.mlp.experts.11.up_proj.weight": "pytorch_model-00063-of-00080.bin", - "model.layers.6.mlp.experts.12.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.6.mlp.experts.12.down_proj.weight": "pytorch_model-00063-of-00080.bin", - "model.layers.6.mlp.experts.12.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.6.mlp.experts.12.gate_proj.weight": "pytorch_model-00063-of-00080.bin", - "model.layers.6.mlp.experts.12.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.6.mlp.experts.12.up_proj.weight": "pytorch_model-00063-of-00080.bin", - "model.layers.6.mlp.experts.13.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.6.mlp.experts.13.down_proj.weight": "pytorch_model-00063-of-00080.bin", - "model.layers.6.mlp.experts.13.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.6.mlp.experts.13.gate_proj.weight": "pytorch_model-00063-of-00080.bin", - "model.layers.6.mlp.experts.13.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.6.mlp.experts.13.up_proj.weight": "pytorch_model-00063-of-00080.bin", - "model.layers.6.mlp.experts.14.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.6.mlp.experts.14.down_proj.weight": "pytorch_model-00063-of-00080.bin", - "model.layers.6.mlp.experts.14.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.6.mlp.experts.14.gate_proj.weight": "pytorch_model-00063-of-00080.bin", - "model.layers.6.mlp.experts.14.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.6.mlp.experts.14.up_proj.weight": "pytorch_model-00063-of-00080.bin", - "model.layers.6.mlp.experts.15.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.6.mlp.experts.15.down_proj.weight": "pytorch_model-00063-of-00080.bin", - "model.layers.6.mlp.experts.15.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.6.mlp.experts.15.gate_proj.weight": "pytorch_model-00063-of-00080.bin", - "model.layers.6.mlp.experts.15.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.6.mlp.experts.15.up_proj.weight": "pytorch_model-00063-of-00080.bin", - "model.layers.6.mlp.experts.2.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.6.mlp.experts.2.down_proj.weight": "pytorch_model-00062-of-00080.bin", - "model.layers.6.mlp.experts.2.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.6.mlp.experts.2.gate_proj.weight": "pytorch_model-00062-of-00080.bin", - "model.layers.6.mlp.experts.2.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.6.mlp.experts.2.up_proj.weight": "pytorch_model-00062-of-00080.bin", - "model.layers.6.mlp.experts.3.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.6.mlp.experts.3.down_proj.weight": "pytorch_model-00062-of-00080.bin", - "model.layers.6.mlp.experts.3.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.6.mlp.experts.3.gate_proj.weight": "pytorch_model-00062-of-00080.bin", - "model.layers.6.mlp.experts.3.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.6.mlp.experts.3.up_proj.weight": "pytorch_model-00062-of-00080.bin", - "model.layers.6.mlp.experts.4.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.6.mlp.experts.4.down_proj.weight": "pytorch_model-00062-of-00080.bin", - "model.layers.6.mlp.experts.4.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.6.mlp.experts.4.gate_proj.weight": "pytorch_model-00062-of-00080.bin", - "model.layers.6.mlp.experts.4.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.6.mlp.experts.4.up_proj.weight": "pytorch_model-00062-of-00080.bin", - "model.layers.6.mlp.experts.5.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.6.mlp.experts.5.down_proj.weight": "pytorch_model-00062-of-00080.bin", - "model.layers.6.mlp.experts.5.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.6.mlp.experts.5.gate_proj.weight": "pytorch_model-00062-of-00080.bin", - "model.layers.6.mlp.experts.5.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.6.mlp.experts.5.up_proj.weight": "pytorch_model-00062-of-00080.bin", - "model.layers.6.mlp.experts.6.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.6.mlp.experts.6.down_proj.weight": "pytorch_model-00062-of-00080.bin", - "model.layers.6.mlp.experts.6.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.6.mlp.experts.6.gate_proj.weight": "pytorch_model-00062-of-00080.bin", - "model.layers.6.mlp.experts.6.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.6.mlp.experts.6.up_proj.weight": "pytorch_model-00062-of-00080.bin", - "model.layers.6.mlp.experts.7.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.6.mlp.experts.7.down_proj.weight": "pytorch_model-00063-of-00080.bin", - "model.layers.6.mlp.experts.7.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.6.mlp.experts.7.gate_proj.weight": "pytorch_model-00063-of-00080.bin", - "model.layers.6.mlp.experts.7.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.6.mlp.experts.7.up_proj.weight": "pytorch_model-00063-of-00080.bin", - "model.layers.6.mlp.experts.8.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.6.mlp.experts.8.down_proj.weight": "pytorch_model-00063-of-00080.bin", - "model.layers.6.mlp.experts.8.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.6.mlp.experts.8.gate_proj.weight": "pytorch_model-00063-of-00080.bin", - "model.layers.6.mlp.experts.8.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.6.mlp.experts.8.up_proj.weight": "pytorch_model-00063-of-00080.bin", - "model.layers.6.mlp.experts.9.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.6.mlp.experts.9.down_proj.weight": "pytorch_model-00063-of-00080.bin", - "model.layers.6.mlp.experts.9.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.6.mlp.experts.9.gate_proj.weight": "pytorch_model-00063-of-00080.bin", - "model.layers.6.mlp.experts.9.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.6.mlp.experts.9.up_proj.weight": "pytorch_model-00063-of-00080.bin", - "model.layers.6.mlp.gate.wg.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.6.mlp.shared_mlp.down_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.6.mlp.shared_mlp.down_proj.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.6.mlp.shared_mlp.gate_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.6.mlp.shared_mlp.gate_proj.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.6.mlp.shared_mlp.up_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.6.mlp.shared_mlp.up_proj.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.6.post_attention_layernorm.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.6.self_attn.k_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.6.self_attn.k_proj.weight": "pytorch_model-00080-of-00080.bin", - "model.layers.6.self_attn.key_layernorm.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.6.self_attn.o_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.6.self_attn.o_proj.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.6.self_attn.q_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.6.self_attn.q_proj.weight": "pytorch_model-00080-of-00080.bin", - "model.layers.6.self_attn.query_layernorm.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.6.self_attn.v_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.6.self_attn.v_proj.weight": "pytorch_model-00080-of-00080.bin", - "model.layers.60.input_layernorm.weight": "pytorch_model-00042-of-00080.bin", - "model.layers.60.mlp.experts.0.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.60.mlp.experts.0.down_proj.weight": "pytorch_model-00058-of-00080.bin", - "model.layers.60.mlp.experts.0.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.60.mlp.experts.0.gate_proj.weight": "pytorch_model-00058-of-00080.bin", - "model.layers.60.mlp.experts.0.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.60.mlp.experts.0.up_proj.weight": "pytorch_model-00058-of-00080.bin", - "model.layers.60.mlp.experts.1.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.60.mlp.experts.1.down_proj.weight": "pytorch_model-00058-of-00080.bin", - "model.layers.60.mlp.experts.1.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.60.mlp.experts.1.gate_proj.weight": "pytorch_model-00058-of-00080.bin", - "model.layers.60.mlp.experts.1.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.60.mlp.experts.1.up_proj.weight": "pytorch_model-00058-of-00080.bin", - "model.layers.60.mlp.experts.10.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.60.mlp.experts.10.down_proj.weight": "pytorch_model-00059-of-00080.bin", - "model.layers.60.mlp.experts.10.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.60.mlp.experts.10.gate_proj.weight": "pytorch_model-00059-of-00080.bin", - "model.layers.60.mlp.experts.10.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.60.mlp.experts.10.up_proj.weight": "pytorch_model-00059-of-00080.bin", - "model.layers.60.mlp.experts.11.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.60.mlp.experts.11.down_proj.weight": "pytorch_model-00059-of-00080.bin", - "model.layers.60.mlp.experts.11.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.60.mlp.experts.11.gate_proj.weight": "pytorch_model-00059-of-00080.bin", - "model.layers.60.mlp.experts.11.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.60.mlp.experts.11.up_proj.weight": "pytorch_model-00059-of-00080.bin", - "model.layers.60.mlp.experts.12.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.60.mlp.experts.12.down_proj.weight": "pytorch_model-00059-of-00080.bin", - "model.layers.60.mlp.experts.12.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.60.mlp.experts.12.gate_proj.weight": "pytorch_model-00059-of-00080.bin", - "model.layers.60.mlp.experts.12.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.60.mlp.experts.12.up_proj.weight": "pytorch_model-00059-of-00080.bin", - "model.layers.60.mlp.experts.13.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.60.mlp.experts.13.down_proj.weight": "pytorch_model-00059-of-00080.bin", - "model.layers.60.mlp.experts.13.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.60.mlp.experts.13.gate_proj.weight": "pytorch_model-00059-of-00080.bin", - "model.layers.60.mlp.experts.13.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.60.mlp.experts.13.up_proj.weight": "pytorch_model-00059-of-00080.bin", - "model.layers.60.mlp.experts.14.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.60.mlp.experts.14.down_proj.weight": "pytorch_model-00059-of-00080.bin", - "model.layers.60.mlp.experts.14.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.60.mlp.experts.14.gate_proj.weight": "pytorch_model-00059-of-00080.bin", - "model.layers.60.mlp.experts.14.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.60.mlp.experts.14.up_proj.weight": "pytorch_model-00059-of-00080.bin", - "model.layers.60.mlp.experts.15.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.60.mlp.experts.15.down_proj.weight": "pytorch_model-00059-of-00080.bin", - "model.layers.60.mlp.experts.15.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.60.mlp.experts.15.gate_proj.weight": "pytorch_model-00059-of-00080.bin", - "model.layers.60.mlp.experts.15.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.60.mlp.experts.15.up_proj.weight": "pytorch_model-00059-of-00080.bin", - "model.layers.60.mlp.experts.2.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.60.mlp.experts.2.down_proj.weight": "pytorch_model-00058-of-00080.bin", - "model.layers.60.mlp.experts.2.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.60.mlp.experts.2.gate_proj.weight": "pytorch_model-00058-of-00080.bin", - "model.layers.60.mlp.experts.2.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.60.mlp.experts.2.up_proj.weight": "pytorch_model-00058-of-00080.bin", - "model.layers.60.mlp.experts.3.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.60.mlp.experts.3.down_proj.weight": "pytorch_model-00058-of-00080.bin", - "model.layers.60.mlp.experts.3.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.60.mlp.experts.3.gate_proj.weight": "pytorch_model-00058-of-00080.bin", - "model.layers.60.mlp.experts.3.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.60.mlp.experts.3.up_proj.weight": "pytorch_model-00058-of-00080.bin", - "model.layers.60.mlp.experts.4.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.60.mlp.experts.4.down_proj.weight": "pytorch_model-00058-of-00080.bin", - "model.layers.60.mlp.experts.4.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.60.mlp.experts.4.gate_proj.weight": "pytorch_model-00058-of-00080.bin", - "model.layers.60.mlp.experts.4.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.60.mlp.experts.4.up_proj.weight": "pytorch_model-00058-of-00080.bin", - "model.layers.60.mlp.experts.5.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.60.mlp.experts.5.down_proj.weight": "pytorch_model-00058-of-00080.bin", - "model.layers.60.mlp.experts.5.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.60.mlp.experts.5.gate_proj.weight": "pytorch_model-00058-of-00080.bin", - "model.layers.60.mlp.experts.5.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.60.mlp.experts.5.up_proj.weight": "pytorch_model-00058-of-00080.bin", - "model.layers.60.mlp.experts.6.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.60.mlp.experts.6.down_proj.weight": "pytorch_model-00058-of-00080.bin", - "model.layers.60.mlp.experts.6.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.60.mlp.experts.6.gate_proj.weight": "pytorch_model-00058-of-00080.bin", - "model.layers.60.mlp.experts.6.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.60.mlp.experts.6.up_proj.weight": "pytorch_model-00058-of-00080.bin", - "model.layers.60.mlp.experts.7.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.60.mlp.experts.7.down_proj.weight": "pytorch_model-00058-of-00080.bin", - "model.layers.60.mlp.experts.7.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.60.mlp.experts.7.gate_proj.weight": "pytorch_model-00058-of-00080.bin", - "model.layers.60.mlp.experts.7.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.60.mlp.experts.7.up_proj.weight": "pytorch_model-00058-of-00080.bin", - "model.layers.60.mlp.experts.8.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.60.mlp.experts.8.down_proj.weight": "pytorch_model-00058-of-00080.bin", - "model.layers.60.mlp.experts.8.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.60.mlp.experts.8.gate_proj.weight": "pytorch_model-00058-of-00080.bin", - "model.layers.60.mlp.experts.8.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.60.mlp.experts.8.up_proj.weight": "pytorch_model-00058-of-00080.bin", - "model.layers.60.mlp.experts.9.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.60.mlp.experts.9.down_proj.weight": "pytorch_model-00059-of-00080.bin", - "model.layers.60.mlp.experts.9.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.60.mlp.experts.9.gate_proj.weight": "pytorch_model-00059-of-00080.bin", - "model.layers.60.mlp.experts.9.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.60.mlp.experts.9.up_proj.weight": "pytorch_model-00058-of-00080.bin", - "model.layers.60.mlp.gate.wg.weight": "pytorch_model-00042-of-00080.bin", - "model.layers.60.mlp.shared_mlp.down_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.60.mlp.shared_mlp.down_proj.weight": "pytorch_model-00042-of-00080.bin", - "model.layers.60.mlp.shared_mlp.gate_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.60.mlp.shared_mlp.gate_proj.weight": "pytorch_model-00042-of-00080.bin", - "model.layers.60.mlp.shared_mlp.up_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.60.mlp.shared_mlp.up_proj.weight": "pytorch_model-00042-of-00080.bin", - "model.layers.60.post_attention_layernorm.weight": "pytorch_model-00042-of-00080.bin", - "model.layers.60.self_attn.k_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.60.self_attn.k_proj.weight": "pytorch_model-00060-of-00080.bin", - "model.layers.60.self_attn.key_layernorm.weight": "pytorch_model-00042-of-00080.bin", - "model.layers.60.self_attn.o_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.60.self_attn.o_proj.weight": "pytorch_model-00042-of-00080.bin", - "model.layers.60.self_attn.q_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.60.self_attn.q_proj.weight": "pytorch_model-00060-of-00080.bin", - "model.layers.60.self_attn.query_layernorm.weight": "pytorch_model-00042-of-00080.bin", - "model.layers.60.self_attn.v_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.60.self_attn.v_proj.weight": "pytorch_model-00060-of-00080.bin", - "model.layers.61.input_layernorm.weight": "pytorch_model-00042-of-00080.bin", - "model.layers.61.mlp.experts.0.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.0.down_proj.weight": "pytorch_model-00059-of-00080.bin", - "model.layers.61.mlp.experts.0.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.0.gate_proj.weight": "pytorch_model-00059-of-00080.bin", - "model.layers.61.mlp.experts.0.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.0.up_proj.weight": "pytorch_model-00059-of-00080.bin", - "model.layers.61.mlp.experts.1.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.1.down_proj.weight": "pytorch_model-00059-of-00080.bin", - "model.layers.61.mlp.experts.1.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.1.gate_proj.weight": "pytorch_model-00059-of-00080.bin", - "model.layers.61.mlp.experts.1.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.1.up_proj.weight": "pytorch_model-00059-of-00080.bin", - "model.layers.61.mlp.experts.10.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.10.down_proj.weight": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.10.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.10.gate_proj.weight": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.10.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.10.up_proj.weight": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.11.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.11.down_proj.weight": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.11.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.11.gate_proj.weight": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.11.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.11.up_proj.weight": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.12.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.12.down_proj.weight": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.12.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.12.gate_proj.weight": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.12.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.12.up_proj.weight": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.13.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.13.down_proj.weight": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.13.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.13.gate_proj.weight": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.13.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.13.up_proj.weight": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.14.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.14.down_proj.weight": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.14.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.14.gate_proj.weight": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.14.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.14.up_proj.weight": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.15.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.15.down_proj.weight": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.15.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.15.gate_proj.weight": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.15.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.15.up_proj.weight": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.2.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.2.down_proj.weight": "pytorch_model-00059-of-00080.bin", - "model.layers.61.mlp.experts.2.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.2.gate_proj.weight": "pytorch_model-00059-of-00080.bin", - "model.layers.61.mlp.experts.2.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.2.up_proj.weight": "pytorch_model-00059-of-00080.bin", - "model.layers.61.mlp.experts.3.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.3.down_proj.weight": "pytorch_model-00059-of-00080.bin", - "model.layers.61.mlp.experts.3.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.3.gate_proj.weight": "pytorch_model-00059-of-00080.bin", - "model.layers.61.mlp.experts.3.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.3.up_proj.weight": "pytorch_model-00059-of-00080.bin", - "model.layers.61.mlp.experts.4.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.4.down_proj.weight": "pytorch_model-00059-of-00080.bin", - "model.layers.61.mlp.experts.4.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.4.gate_proj.weight": "pytorch_model-00059-of-00080.bin", - "model.layers.61.mlp.experts.4.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.4.up_proj.weight": "pytorch_model-00059-of-00080.bin", - "model.layers.61.mlp.experts.5.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.5.down_proj.weight": "pytorch_model-00059-of-00080.bin", - "model.layers.61.mlp.experts.5.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.5.gate_proj.weight": "pytorch_model-00059-of-00080.bin", - "model.layers.61.mlp.experts.5.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.5.up_proj.weight": "pytorch_model-00059-of-00080.bin", - "model.layers.61.mlp.experts.6.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.6.down_proj.weight": "pytorch_model-00059-of-00080.bin", - "model.layers.61.mlp.experts.6.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.6.gate_proj.weight": "pytorch_model-00059-of-00080.bin", - "model.layers.61.mlp.experts.6.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.6.up_proj.weight": "pytorch_model-00059-of-00080.bin", - "model.layers.61.mlp.experts.7.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.7.down_proj.weight": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.7.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.7.gate_proj.weight": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.7.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.7.up_proj.weight": "pytorch_model-00059-of-00080.bin", - "model.layers.61.mlp.experts.8.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.8.down_proj.weight": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.8.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.8.gate_proj.weight": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.8.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.8.up_proj.weight": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.9.down_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.9.down_proj.weight": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.9.gate_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.9.gate_proj.weight": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.9.up_proj.bias": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.experts.9.up_proj.weight": "pytorch_model-00060-of-00080.bin", - "model.layers.61.mlp.gate.wg.weight": "pytorch_model-00042-of-00080.bin", - "model.layers.61.mlp.shared_mlp.down_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.61.mlp.shared_mlp.down_proj.weight": "pytorch_model-00042-of-00080.bin", - "model.layers.61.mlp.shared_mlp.gate_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.61.mlp.shared_mlp.gate_proj.weight": "pytorch_model-00042-of-00080.bin", - "model.layers.61.mlp.shared_mlp.up_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.61.mlp.shared_mlp.up_proj.weight": "pytorch_model-00042-of-00080.bin", - "model.layers.61.post_attention_layernorm.weight": "pytorch_model-00042-of-00080.bin", - "model.layers.61.self_attn.key_layernorm.weight": "pytorch_model-00042-of-00080.bin", - "model.layers.61.self_attn.o_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.61.self_attn.o_proj.weight": "pytorch_model-00042-of-00080.bin", - "model.layers.61.self_attn.q_proj.bias": "pytorch_model-00042-of-00080.bin", - "model.layers.61.self_attn.q_proj.weight": "pytorch_model-00042-of-00080.bin", - "model.layers.61.self_attn.query_layernorm.weight": "pytorch_model-00042-of-00080.bin", - "model.layers.62.input_layernorm.weight": "pytorch_model-00062-of-00080.bin", - "model.layers.62.mlp.experts.0.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.62.mlp.experts.0.down_proj.weight": "pytorch_model-00078-of-00080.bin", - "model.layers.62.mlp.experts.0.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.62.mlp.experts.0.gate_proj.weight": "pytorch_model-00078-of-00080.bin", - "model.layers.62.mlp.experts.0.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.62.mlp.experts.0.up_proj.weight": "pytorch_model-00078-of-00080.bin", - "model.layers.62.mlp.experts.1.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.62.mlp.experts.1.down_proj.weight": "pytorch_model-00078-of-00080.bin", - "model.layers.62.mlp.experts.1.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.62.mlp.experts.1.gate_proj.weight": "pytorch_model-00078-of-00080.bin", - "model.layers.62.mlp.experts.1.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.62.mlp.experts.1.up_proj.weight": "pytorch_model-00078-of-00080.bin", - "model.layers.62.mlp.experts.10.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.62.mlp.experts.10.down_proj.weight": "pytorch_model-00079-of-00080.bin", - "model.layers.62.mlp.experts.10.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.62.mlp.experts.10.gate_proj.weight": "pytorch_model-00079-of-00080.bin", - "model.layers.62.mlp.experts.10.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.62.mlp.experts.10.up_proj.weight": "pytorch_model-00079-of-00080.bin", - "model.layers.62.mlp.experts.11.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.62.mlp.experts.11.down_proj.weight": "pytorch_model-00079-of-00080.bin", - "model.layers.62.mlp.experts.11.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.62.mlp.experts.11.gate_proj.weight": "pytorch_model-00079-of-00080.bin", - "model.layers.62.mlp.experts.11.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.62.mlp.experts.11.up_proj.weight": "pytorch_model-00079-of-00080.bin", - "model.layers.62.mlp.experts.12.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.62.mlp.experts.12.down_proj.weight": "pytorch_model-00079-of-00080.bin", - "model.layers.62.mlp.experts.12.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.62.mlp.experts.12.gate_proj.weight": "pytorch_model-00079-of-00080.bin", - "model.layers.62.mlp.experts.12.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.62.mlp.experts.12.up_proj.weight": "pytorch_model-00079-of-00080.bin", - "model.layers.62.mlp.experts.13.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.62.mlp.experts.13.down_proj.weight": "pytorch_model-00079-of-00080.bin", - "model.layers.62.mlp.experts.13.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.62.mlp.experts.13.gate_proj.weight": "pytorch_model-00079-of-00080.bin", - "model.layers.62.mlp.experts.13.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.62.mlp.experts.13.up_proj.weight": "pytorch_model-00079-of-00080.bin", - "model.layers.62.mlp.experts.14.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.62.mlp.experts.14.down_proj.weight": "pytorch_model-00079-of-00080.bin", - "model.layers.62.mlp.experts.14.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.62.mlp.experts.14.gate_proj.weight": "pytorch_model-00079-of-00080.bin", - "model.layers.62.mlp.experts.14.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.62.mlp.experts.14.up_proj.weight": "pytorch_model-00079-of-00080.bin", - "model.layers.62.mlp.experts.15.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.62.mlp.experts.15.down_proj.weight": "pytorch_model-00079-of-00080.bin", - "model.layers.62.mlp.experts.15.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.62.mlp.experts.15.gate_proj.weight": "pytorch_model-00079-of-00080.bin", - "model.layers.62.mlp.experts.15.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.62.mlp.experts.15.up_proj.weight": "pytorch_model-00079-of-00080.bin", - "model.layers.62.mlp.experts.2.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.62.mlp.experts.2.down_proj.weight": "pytorch_model-00078-of-00080.bin", - "model.layers.62.mlp.experts.2.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.62.mlp.experts.2.gate_proj.weight": "pytorch_model-00078-of-00080.bin", - "model.layers.62.mlp.experts.2.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.62.mlp.experts.2.up_proj.weight": "pytorch_model-00078-of-00080.bin", - "model.layers.62.mlp.experts.3.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.62.mlp.experts.3.down_proj.weight": "pytorch_model-00078-of-00080.bin", - "model.layers.62.mlp.experts.3.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.62.mlp.experts.3.gate_proj.weight": "pytorch_model-00078-of-00080.bin", - "model.layers.62.mlp.experts.3.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.62.mlp.experts.3.up_proj.weight": "pytorch_model-00078-of-00080.bin", - "model.layers.62.mlp.experts.4.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.62.mlp.experts.4.down_proj.weight": "pytorch_model-00078-of-00080.bin", - "model.layers.62.mlp.experts.4.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.62.mlp.experts.4.gate_proj.weight": "pytorch_model-00078-of-00080.bin", - "model.layers.62.mlp.experts.4.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.62.mlp.experts.4.up_proj.weight": "pytorch_model-00078-of-00080.bin", - "model.layers.62.mlp.experts.5.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.62.mlp.experts.5.down_proj.weight": "pytorch_model-00078-of-00080.bin", - "model.layers.62.mlp.experts.5.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.62.mlp.experts.5.gate_proj.weight": "pytorch_model-00078-of-00080.bin", - "model.layers.62.mlp.experts.5.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.62.mlp.experts.5.up_proj.weight": "pytorch_model-00078-of-00080.bin", - "model.layers.62.mlp.experts.6.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.62.mlp.experts.6.down_proj.weight": "pytorch_model-00078-of-00080.bin", - "model.layers.62.mlp.experts.6.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.62.mlp.experts.6.gate_proj.weight": "pytorch_model-00078-of-00080.bin", - "model.layers.62.mlp.experts.6.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.62.mlp.experts.6.up_proj.weight": "pytorch_model-00078-of-00080.bin", - "model.layers.62.mlp.experts.7.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.62.mlp.experts.7.down_proj.weight": "pytorch_model-00079-of-00080.bin", - "model.layers.62.mlp.experts.7.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.62.mlp.experts.7.gate_proj.weight": "pytorch_model-00079-of-00080.bin", - "model.layers.62.mlp.experts.7.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.62.mlp.experts.7.up_proj.weight": "pytorch_model-00079-of-00080.bin", - "model.layers.62.mlp.experts.8.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.62.mlp.experts.8.down_proj.weight": "pytorch_model-00079-of-00080.bin", - "model.layers.62.mlp.experts.8.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.62.mlp.experts.8.gate_proj.weight": "pytorch_model-00079-of-00080.bin", - "model.layers.62.mlp.experts.8.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.62.mlp.experts.8.up_proj.weight": "pytorch_model-00079-of-00080.bin", - "model.layers.62.mlp.experts.9.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.62.mlp.experts.9.down_proj.weight": "pytorch_model-00079-of-00080.bin", - "model.layers.62.mlp.experts.9.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.62.mlp.experts.9.gate_proj.weight": "pytorch_model-00079-of-00080.bin", - "model.layers.62.mlp.experts.9.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.62.mlp.experts.9.up_proj.weight": "pytorch_model-00079-of-00080.bin", - "model.layers.62.mlp.gate.wg.weight": "pytorch_model-00062-of-00080.bin", - "model.layers.62.mlp.shared_mlp.down_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.62.mlp.shared_mlp.down_proj.weight": "pytorch_model-00062-of-00080.bin", - "model.layers.62.mlp.shared_mlp.gate_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.62.mlp.shared_mlp.gate_proj.weight": "pytorch_model-00062-of-00080.bin", - "model.layers.62.mlp.shared_mlp.up_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.62.mlp.shared_mlp.up_proj.weight": "pytorch_model-00062-of-00080.bin", - "model.layers.62.post_attention_layernorm.weight": "pytorch_model-00062-of-00080.bin", - "model.layers.62.self_attn.k_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.62.self_attn.k_proj.weight": "pytorch_model-00080-of-00080.bin", - "model.layers.62.self_attn.key_layernorm.weight": "pytorch_model-00062-of-00080.bin", - "model.layers.62.self_attn.o_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.62.self_attn.o_proj.weight": "pytorch_model-00062-of-00080.bin", - "model.layers.62.self_attn.q_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.62.self_attn.q_proj.weight": "pytorch_model-00080-of-00080.bin", - "model.layers.62.self_attn.query_layernorm.weight": "pytorch_model-00062-of-00080.bin", - "model.layers.62.self_attn.v_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.62.self_attn.v_proj.weight": "pytorch_model-00080-of-00080.bin", - "model.layers.63.input_layernorm.weight": "pytorch_model-00062-of-00080.bin", - "model.layers.63.mlp.experts.0.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.0.down_proj.weight": "pytorch_model-00079-of-00080.bin", - "model.layers.63.mlp.experts.0.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.0.gate_proj.weight": "pytorch_model-00079-of-00080.bin", - "model.layers.63.mlp.experts.0.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.0.up_proj.weight": "pytorch_model-00079-of-00080.bin", - "model.layers.63.mlp.experts.1.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.1.down_proj.weight": "pytorch_model-00079-of-00080.bin", - "model.layers.63.mlp.experts.1.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.1.gate_proj.weight": "pytorch_model-00079-of-00080.bin", - "model.layers.63.mlp.experts.1.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.1.up_proj.weight": "pytorch_model-00079-of-00080.bin", - "model.layers.63.mlp.experts.10.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.10.down_proj.weight": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.10.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.10.gate_proj.weight": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.10.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.10.up_proj.weight": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.11.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.11.down_proj.weight": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.11.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.11.gate_proj.weight": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.11.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.11.up_proj.weight": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.12.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.12.down_proj.weight": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.12.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.12.gate_proj.weight": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.12.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.12.up_proj.weight": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.13.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.13.down_proj.weight": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.13.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.13.gate_proj.weight": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.13.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.13.up_proj.weight": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.14.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.14.down_proj.weight": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.14.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.14.gate_proj.weight": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.14.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.14.up_proj.weight": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.15.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.15.down_proj.weight": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.15.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.15.gate_proj.weight": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.15.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.15.up_proj.weight": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.2.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.2.down_proj.weight": "pytorch_model-00079-of-00080.bin", - "model.layers.63.mlp.experts.2.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.2.gate_proj.weight": "pytorch_model-00079-of-00080.bin", - "model.layers.63.mlp.experts.2.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.2.up_proj.weight": "pytorch_model-00079-of-00080.bin", - "model.layers.63.mlp.experts.3.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.3.down_proj.weight": "pytorch_model-00079-of-00080.bin", - "model.layers.63.mlp.experts.3.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.3.gate_proj.weight": "pytorch_model-00079-of-00080.bin", - "model.layers.63.mlp.experts.3.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.3.up_proj.weight": "pytorch_model-00079-of-00080.bin", - "model.layers.63.mlp.experts.4.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.4.down_proj.weight": "pytorch_model-00079-of-00080.bin", - "model.layers.63.mlp.experts.4.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.4.gate_proj.weight": "pytorch_model-00079-of-00080.bin", - "model.layers.63.mlp.experts.4.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.4.up_proj.weight": "pytorch_model-00079-of-00080.bin", - "model.layers.63.mlp.experts.5.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.5.down_proj.weight": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.5.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.5.gate_proj.weight": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.5.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.5.up_proj.weight": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.6.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.6.down_proj.weight": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.6.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.6.gate_proj.weight": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.6.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.6.up_proj.weight": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.7.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.7.down_proj.weight": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.7.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.7.gate_proj.weight": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.7.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.7.up_proj.weight": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.8.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.8.down_proj.weight": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.8.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.8.gate_proj.weight": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.8.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.8.up_proj.weight": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.9.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.9.down_proj.weight": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.9.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.9.gate_proj.weight": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.9.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.experts.9.up_proj.weight": "pytorch_model-00080-of-00080.bin", - "model.layers.63.mlp.gate.wg.weight": "pytorch_model-00062-of-00080.bin", - "model.layers.63.mlp.shared_mlp.down_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.63.mlp.shared_mlp.down_proj.weight": "pytorch_model-00062-of-00080.bin", - "model.layers.63.mlp.shared_mlp.gate_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.63.mlp.shared_mlp.gate_proj.weight": "pytorch_model-00062-of-00080.bin", - "model.layers.63.mlp.shared_mlp.up_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.63.mlp.shared_mlp.up_proj.weight": "pytorch_model-00062-of-00080.bin", - "model.layers.63.post_attention_layernorm.weight": "pytorch_model-00062-of-00080.bin", - "model.layers.63.self_attn.key_layernorm.weight": "pytorch_model-00062-of-00080.bin", - "model.layers.63.self_attn.o_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.63.self_attn.o_proj.weight": "pytorch_model-00062-of-00080.bin", - "model.layers.63.self_attn.q_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.63.self_attn.q_proj.weight": "pytorch_model-00062-of-00080.bin", - "model.layers.63.self_attn.query_layernorm.weight": "pytorch_model-00062-of-00080.bin", - "model.layers.7.input_layernorm.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.7.mlp.experts.0.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.7.mlp.experts.0.down_proj.weight": "pytorch_model-00063-of-00080.bin", - "model.layers.7.mlp.experts.0.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.7.mlp.experts.0.gate_proj.weight": "pytorch_model-00063-of-00080.bin", - "model.layers.7.mlp.experts.0.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.7.mlp.experts.0.up_proj.weight": "pytorch_model-00063-of-00080.bin", - "model.layers.7.mlp.experts.1.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.7.mlp.experts.1.down_proj.weight": "pytorch_model-00063-of-00080.bin", - "model.layers.7.mlp.experts.1.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.7.mlp.experts.1.gate_proj.weight": "pytorch_model-00063-of-00080.bin", - "model.layers.7.mlp.experts.1.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.7.mlp.experts.1.up_proj.weight": "pytorch_model-00063-of-00080.bin", - "model.layers.7.mlp.experts.10.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.7.mlp.experts.10.down_proj.weight": "pytorch_model-00064-of-00080.bin", - "model.layers.7.mlp.experts.10.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.7.mlp.experts.10.gate_proj.weight": "pytorch_model-00064-of-00080.bin", - "model.layers.7.mlp.experts.10.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.7.mlp.experts.10.up_proj.weight": "pytorch_model-00064-of-00080.bin", - "model.layers.7.mlp.experts.11.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.7.mlp.experts.11.down_proj.weight": "pytorch_model-00064-of-00080.bin", - "model.layers.7.mlp.experts.11.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.7.mlp.experts.11.gate_proj.weight": "pytorch_model-00064-of-00080.bin", - "model.layers.7.mlp.experts.11.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.7.mlp.experts.11.up_proj.weight": "pytorch_model-00064-of-00080.bin", - "model.layers.7.mlp.experts.12.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.7.mlp.experts.12.down_proj.weight": "pytorch_model-00064-of-00080.bin", - "model.layers.7.mlp.experts.12.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.7.mlp.experts.12.gate_proj.weight": "pytorch_model-00064-of-00080.bin", - "model.layers.7.mlp.experts.12.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.7.mlp.experts.12.up_proj.weight": "pytorch_model-00064-of-00080.bin", - "model.layers.7.mlp.experts.13.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.7.mlp.experts.13.down_proj.weight": "pytorch_model-00064-of-00080.bin", - "model.layers.7.mlp.experts.13.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.7.mlp.experts.13.gate_proj.weight": "pytorch_model-00064-of-00080.bin", - "model.layers.7.mlp.experts.13.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.7.mlp.experts.13.up_proj.weight": "pytorch_model-00064-of-00080.bin", - "model.layers.7.mlp.experts.14.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.7.mlp.experts.14.down_proj.weight": "pytorch_model-00064-of-00080.bin", - "model.layers.7.mlp.experts.14.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.7.mlp.experts.14.gate_proj.weight": "pytorch_model-00064-of-00080.bin", - "model.layers.7.mlp.experts.14.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.7.mlp.experts.14.up_proj.weight": "pytorch_model-00064-of-00080.bin", - "model.layers.7.mlp.experts.15.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.7.mlp.experts.15.down_proj.weight": "pytorch_model-00064-of-00080.bin", - "model.layers.7.mlp.experts.15.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.7.mlp.experts.15.gate_proj.weight": "pytorch_model-00064-of-00080.bin", - "model.layers.7.mlp.experts.15.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.7.mlp.experts.15.up_proj.weight": "pytorch_model-00064-of-00080.bin", - "model.layers.7.mlp.experts.2.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.7.mlp.experts.2.down_proj.weight": "pytorch_model-00063-of-00080.bin", - "model.layers.7.mlp.experts.2.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.7.mlp.experts.2.gate_proj.weight": "pytorch_model-00063-of-00080.bin", - "model.layers.7.mlp.experts.2.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.7.mlp.experts.2.up_proj.weight": "pytorch_model-00063-of-00080.bin", - "model.layers.7.mlp.experts.3.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.7.mlp.experts.3.down_proj.weight": "pytorch_model-00063-of-00080.bin", - "model.layers.7.mlp.experts.3.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.7.mlp.experts.3.gate_proj.weight": "pytorch_model-00063-of-00080.bin", - "model.layers.7.mlp.experts.3.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.7.mlp.experts.3.up_proj.weight": "pytorch_model-00063-of-00080.bin", - "model.layers.7.mlp.experts.4.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.7.mlp.experts.4.down_proj.weight": "pytorch_model-00063-of-00080.bin", - "model.layers.7.mlp.experts.4.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.7.mlp.experts.4.gate_proj.weight": "pytorch_model-00063-of-00080.bin", - "model.layers.7.mlp.experts.4.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.7.mlp.experts.4.up_proj.weight": "pytorch_model-00063-of-00080.bin", - "model.layers.7.mlp.experts.5.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.7.mlp.experts.5.down_proj.weight": "pytorch_model-00064-of-00080.bin", - "model.layers.7.mlp.experts.5.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.7.mlp.experts.5.gate_proj.weight": "pytorch_model-00064-of-00080.bin", - "model.layers.7.mlp.experts.5.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.7.mlp.experts.5.up_proj.weight": "pytorch_model-00064-of-00080.bin", - "model.layers.7.mlp.experts.6.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.7.mlp.experts.6.down_proj.weight": "pytorch_model-00064-of-00080.bin", - "model.layers.7.mlp.experts.6.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.7.mlp.experts.6.gate_proj.weight": "pytorch_model-00064-of-00080.bin", - "model.layers.7.mlp.experts.6.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.7.mlp.experts.6.up_proj.weight": "pytorch_model-00064-of-00080.bin", - "model.layers.7.mlp.experts.7.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.7.mlp.experts.7.down_proj.weight": "pytorch_model-00064-of-00080.bin", - "model.layers.7.mlp.experts.7.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.7.mlp.experts.7.gate_proj.weight": "pytorch_model-00064-of-00080.bin", - "model.layers.7.mlp.experts.7.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.7.mlp.experts.7.up_proj.weight": "pytorch_model-00064-of-00080.bin", - "model.layers.7.mlp.experts.8.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.7.mlp.experts.8.down_proj.weight": "pytorch_model-00064-of-00080.bin", - "model.layers.7.mlp.experts.8.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.7.mlp.experts.8.gate_proj.weight": "pytorch_model-00064-of-00080.bin", - "model.layers.7.mlp.experts.8.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.7.mlp.experts.8.up_proj.weight": "pytorch_model-00064-of-00080.bin", - "model.layers.7.mlp.experts.9.down_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.7.mlp.experts.9.down_proj.weight": "pytorch_model-00064-of-00080.bin", - "model.layers.7.mlp.experts.9.gate_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.7.mlp.experts.9.gate_proj.weight": "pytorch_model-00064-of-00080.bin", - "model.layers.7.mlp.experts.9.up_proj.bias": "pytorch_model-00080-of-00080.bin", - "model.layers.7.mlp.experts.9.up_proj.weight": "pytorch_model-00064-of-00080.bin", - "model.layers.7.mlp.gate.wg.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.7.mlp.shared_mlp.down_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.7.mlp.shared_mlp.down_proj.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.7.mlp.shared_mlp.gate_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.7.mlp.shared_mlp.gate_proj.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.7.mlp.shared_mlp.up_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.7.mlp.shared_mlp.up_proj.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.7.post_attention_layernorm.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.7.self_attn.key_layernorm.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.7.self_attn.o_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.7.self_attn.o_proj.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.7.self_attn.q_proj.bias": "pytorch_model-00062-of-00080.bin", - "model.layers.7.self_attn.q_proj.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.7.self_attn.query_layernorm.weight": "pytorch_model-00061-of-00080.bin", - "model.layers.8.input_layernorm.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.8.mlp.experts.0.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.8.mlp.experts.0.down_proj.weight": "pytorch_model-00004-of-00080.bin", - "model.layers.8.mlp.experts.0.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.8.mlp.experts.0.gate_proj.weight": "pytorch_model-00004-of-00080.bin", - "model.layers.8.mlp.experts.0.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.8.mlp.experts.0.up_proj.weight": "pytorch_model-00004-of-00080.bin", - "model.layers.8.mlp.experts.1.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.8.mlp.experts.1.down_proj.weight": "pytorch_model-00004-of-00080.bin", - "model.layers.8.mlp.experts.1.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.8.mlp.experts.1.gate_proj.weight": "pytorch_model-00004-of-00080.bin", - "model.layers.8.mlp.experts.1.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.8.mlp.experts.1.up_proj.weight": "pytorch_model-00004-of-00080.bin", - "model.layers.8.mlp.experts.10.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.8.mlp.experts.10.down_proj.weight": "pytorch_model-00005-of-00080.bin", - "model.layers.8.mlp.experts.10.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.8.mlp.experts.10.gate_proj.weight": "pytorch_model-00005-of-00080.bin", - "model.layers.8.mlp.experts.10.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.8.mlp.experts.10.up_proj.weight": "pytorch_model-00005-of-00080.bin", - "model.layers.8.mlp.experts.11.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.8.mlp.experts.11.down_proj.weight": "pytorch_model-00005-of-00080.bin", - "model.layers.8.mlp.experts.11.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.8.mlp.experts.11.gate_proj.weight": "pytorch_model-00005-of-00080.bin", - "model.layers.8.mlp.experts.11.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.8.mlp.experts.11.up_proj.weight": "pytorch_model-00005-of-00080.bin", - "model.layers.8.mlp.experts.12.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.8.mlp.experts.12.down_proj.weight": "pytorch_model-00005-of-00080.bin", - "model.layers.8.mlp.experts.12.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.8.mlp.experts.12.gate_proj.weight": "pytorch_model-00005-of-00080.bin", - "model.layers.8.mlp.experts.12.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.8.mlp.experts.12.up_proj.weight": "pytorch_model-00005-of-00080.bin", - "model.layers.8.mlp.experts.13.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.8.mlp.experts.13.down_proj.weight": "pytorch_model-00005-of-00080.bin", - "model.layers.8.mlp.experts.13.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.8.mlp.experts.13.gate_proj.weight": "pytorch_model-00005-of-00080.bin", - "model.layers.8.mlp.experts.13.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.8.mlp.experts.13.up_proj.weight": "pytorch_model-00005-of-00080.bin", - "model.layers.8.mlp.experts.14.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.8.mlp.experts.14.down_proj.weight": "pytorch_model-00005-of-00080.bin", - "model.layers.8.mlp.experts.14.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.8.mlp.experts.14.gate_proj.weight": "pytorch_model-00005-of-00080.bin", - "model.layers.8.mlp.experts.14.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.8.mlp.experts.14.up_proj.weight": "pytorch_model-00005-of-00080.bin", - "model.layers.8.mlp.experts.15.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.8.mlp.experts.15.down_proj.weight": "pytorch_model-00005-of-00080.bin", - "model.layers.8.mlp.experts.15.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.8.mlp.experts.15.gate_proj.weight": "pytorch_model-00005-of-00080.bin", - "model.layers.8.mlp.experts.15.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.8.mlp.experts.15.up_proj.weight": "pytorch_model-00005-of-00080.bin", - "model.layers.8.mlp.experts.2.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.8.mlp.experts.2.down_proj.weight": "pytorch_model-00004-of-00080.bin", - "model.layers.8.mlp.experts.2.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.8.mlp.experts.2.gate_proj.weight": "pytorch_model-00004-of-00080.bin", - "model.layers.8.mlp.experts.2.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.8.mlp.experts.2.up_proj.weight": "pytorch_model-00004-of-00080.bin", - "model.layers.8.mlp.experts.3.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.8.mlp.experts.3.down_proj.weight": "pytorch_model-00005-of-00080.bin", - "model.layers.8.mlp.experts.3.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.8.mlp.experts.3.gate_proj.weight": "pytorch_model-00005-of-00080.bin", - "model.layers.8.mlp.experts.3.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.8.mlp.experts.3.up_proj.weight": "pytorch_model-00005-of-00080.bin", - "model.layers.8.mlp.experts.4.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.8.mlp.experts.4.down_proj.weight": "pytorch_model-00005-of-00080.bin", - "model.layers.8.mlp.experts.4.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.8.mlp.experts.4.gate_proj.weight": "pytorch_model-00005-of-00080.bin", - "model.layers.8.mlp.experts.4.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.8.mlp.experts.4.up_proj.weight": "pytorch_model-00005-of-00080.bin", - "model.layers.8.mlp.experts.5.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.8.mlp.experts.5.down_proj.weight": "pytorch_model-00005-of-00080.bin", - "model.layers.8.mlp.experts.5.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.8.mlp.experts.5.gate_proj.weight": "pytorch_model-00005-of-00080.bin", - "model.layers.8.mlp.experts.5.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.8.mlp.experts.5.up_proj.weight": "pytorch_model-00005-of-00080.bin", - "model.layers.8.mlp.experts.6.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.8.mlp.experts.6.down_proj.weight": "pytorch_model-00005-of-00080.bin", - "model.layers.8.mlp.experts.6.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.8.mlp.experts.6.gate_proj.weight": "pytorch_model-00005-of-00080.bin", - "model.layers.8.mlp.experts.6.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.8.mlp.experts.6.up_proj.weight": "pytorch_model-00005-of-00080.bin", - "model.layers.8.mlp.experts.7.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.8.mlp.experts.7.down_proj.weight": "pytorch_model-00005-of-00080.bin", - "model.layers.8.mlp.experts.7.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.8.mlp.experts.7.gate_proj.weight": "pytorch_model-00005-of-00080.bin", - "model.layers.8.mlp.experts.7.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.8.mlp.experts.7.up_proj.weight": "pytorch_model-00005-of-00080.bin", - "model.layers.8.mlp.experts.8.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.8.mlp.experts.8.down_proj.weight": "pytorch_model-00005-of-00080.bin", - "model.layers.8.mlp.experts.8.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.8.mlp.experts.8.gate_proj.weight": "pytorch_model-00005-of-00080.bin", - "model.layers.8.mlp.experts.8.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.8.mlp.experts.8.up_proj.weight": "pytorch_model-00005-of-00080.bin", - "model.layers.8.mlp.experts.9.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.8.mlp.experts.9.down_proj.weight": "pytorch_model-00005-of-00080.bin", - "model.layers.8.mlp.experts.9.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.8.mlp.experts.9.gate_proj.weight": "pytorch_model-00005-of-00080.bin", - "model.layers.8.mlp.experts.9.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.8.mlp.experts.9.up_proj.weight": "pytorch_model-00005-of-00080.bin", - "model.layers.8.mlp.gate.wg.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.8.mlp.shared_mlp.down_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.8.mlp.shared_mlp.down_proj.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.8.mlp.shared_mlp.gate_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.8.mlp.shared_mlp.gate_proj.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.8.mlp.shared_mlp.up_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.8.mlp.shared_mlp.up_proj.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.8.post_attention_layernorm.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.8.self_attn.k_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.8.self_attn.k_proj.weight": "pytorch_model-00020-of-00080.bin", - "model.layers.8.self_attn.key_layernorm.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.8.self_attn.o_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.8.self_attn.o_proj.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.8.self_attn.q_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.8.self_attn.q_proj.weight": "pytorch_model-00020-of-00080.bin", - "model.layers.8.self_attn.query_layernorm.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.8.self_attn.v_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.8.self_attn.v_proj.weight": "pytorch_model-00020-of-00080.bin", - "model.layers.9.input_layernorm.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.9.mlp.experts.0.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.9.mlp.experts.0.down_proj.weight": "pytorch_model-00005-of-00080.bin", - "model.layers.9.mlp.experts.0.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.9.mlp.experts.0.gate_proj.weight": "pytorch_model-00005-of-00080.bin", - "model.layers.9.mlp.experts.0.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.9.mlp.experts.0.up_proj.weight": "pytorch_model-00005-of-00080.bin", - "model.layers.9.mlp.experts.1.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.9.mlp.experts.1.down_proj.weight": "pytorch_model-00006-of-00080.bin", - "model.layers.9.mlp.experts.1.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.9.mlp.experts.1.gate_proj.weight": "pytorch_model-00006-of-00080.bin", - "model.layers.9.mlp.experts.1.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.9.mlp.experts.1.up_proj.weight": "pytorch_model-00006-of-00080.bin", - "model.layers.9.mlp.experts.10.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.9.mlp.experts.10.down_proj.weight": "pytorch_model-00006-of-00080.bin", - "model.layers.9.mlp.experts.10.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.9.mlp.experts.10.gate_proj.weight": "pytorch_model-00006-of-00080.bin", - "model.layers.9.mlp.experts.10.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.9.mlp.experts.10.up_proj.weight": "pytorch_model-00006-of-00080.bin", - "model.layers.9.mlp.experts.11.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.9.mlp.experts.11.down_proj.weight": "pytorch_model-00006-of-00080.bin", - "model.layers.9.mlp.experts.11.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.9.mlp.experts.11.gate_proj.weight": "pytorch_model-00006-of-00080.bin", - "model.layers.9.mlp.experts.11.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.9.mlp.experts.11.up_proj.weight": "pytorch_model-00006-of-00080.bin", - "model.layers.9.mlp.experts.12.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.9.mlp.experts.12.down_proj.weight": "pytorch_model-00006-of-00080.bin", - "model.layers.9.mlp.experts.12.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.9.mlp.experts.12.gate_proj.weight": "pytorch_model-00006-of-00080.bin", - "model.layers.9.mlp.experts.12.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.9.mlp.experts.12.up_proj.weight": "pytorch_model-00006-of-00080.bin", - "model.layers.9.mlp.experts.13.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.9.mlp.experts.13.down_proj.weight": "pytorch_model-00006-of-00080.bin", - "model.layers.9.mlp.experts.13.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.9.mlp.experts.13.gate_proj.weight": "pytorch_model-00006-of-00080.bin", - "model.layers.9.mlp.experts.13.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.9.mlp.experts.13.up_proj.weight": "pytorch_model-00006-of-00080.bin", - "model.layers.9.mlp.experts.14.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.9.mlp.experts.14.down_proj.weight": "pytorch_model-00006-of-00080.bin", - "model.layers.9.mlp.experts.14.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.9.mlp.experts.14.gate_proj.weight": "pytorch_model-00006-of-00080.bin", - "model.layers.9.mlp.experts.14.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.9.mlp.experts.14.up_proj.weight": "pytorch_model-00006-of-00080.bin", - "model.layers.9.mlp.experts.15.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.9.mlp.experts.15.down_proj.weight": "pytorch_model-00007-of-00080.bin", - "model.layers.9.mlp.experts.15.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.9.mlp.experts.15.gate_proj.weight": "pytorch_model-00007-of-00080.bin", - "model.layers.9.mlp.experts.15.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.9.mlp.experts.15.up_proj.weight": "pytorch_model-00007-of-00080.bin", - "model.layers.9.mlp.experts.2.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.9.mlp.experts.2.down_proj.weight": "pytorch_model-00006-of-00080.bin", - "model.layers.9.mlp.experts.2.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.9.mlp.experts.2.gate_proj.weight": "pytorch_model-00006-of-00080.bin", - "model.layers.9.mlp.experts.2.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.9.mlp.experts.2.up_proj.weight": "pytorch_model-00006-of-00080.bin", - "model.layers.9.mlp.experts.3.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.9.mlp.experts.3.down_proj.weight": "pytorch_model-00006-of-00080.bin", - "model.layers.9.mlp.experts.3.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.9.mlp.experts.3.gate_proj.weight": "pytorch_model-00006-of-00080.bin", - "model.layers.9.mlp.experts.3.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.9.mlp.experts.3.up_proj.weight": "pytorch_model-00006-of-00080.bin", - "model.layers.9.mlp.experts.4.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.9.mlp.experts.4.down_proj.weight": "pytorch_model-00006-of-00080.bin", - "model.layers.9.mlp.experts.4.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.9.mlp.experts.4.gate_proj.weight": "pytorch_model-00006-of-00080.bin", - "model.layers.9.mlp.experts.4.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.9.mlp.experts.4.up_proj.weight": "pytorch_model-00006-of-00080.bin", - "model.layers.9.mlp.experts.5.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.9.mlp.experts.5.down_proj.weight": "pytorch_model-00006-of-00080.bin", - "model.layers.9.mlp.experts.5.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.9.mlp.experts.5.gate_proj.weight": "pytorch_model-00006-of-00080.bin", - "model.layers.9.mlp.experts.5.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.9.mlp.experts.5.up_proj.weight": "pytorch_model-00006-of-00080.bin", - "model.layers.9.mlp.experts.6.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.9.mlp.experts.6.down_proj.weight": "pytorch_model-00006-of-00080.bin", - "model.layers.9.mlp.experts.6.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.9.mlp.experts.6.gate_proj.weight": "pytorch_model-00006-of-00080.bin", - "model.layers.9.mlp.experts.6.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.9.mlp.experts.6.up_proj.weight": "pytorch_model-00006-of-00080.bin", - "model.layers.9.mlp.experts.7.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.9.mlp.experts.7.down_proj.weight": "pytorch_model-00006-of-00080.bin", - "model.layers.9.mlp.experts.7.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.9.mlp.experts.7.gate_proj.weight": "pytorch_model-00006-of-00080.bin", - "model.layers.9.mlp.experts.7.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.9.mlp.experts.7.up_proj.weight": "pytorch_model-00006-of-00080.bin", - "model.layers.9.mlp.experts.8.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.9.mlp.experts.8.down_proj.weight": "pytorch_model-00006-of-00080.bin", - "model.layers.9.mlp.experts.8.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.9.mlp.experts.8.gate_proj.weight": "pytorch_model-00006-of-00080.bin", - "model.layers.9.mlp.experts.8.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.9.mlp.experts.8.up_proj.weight": "pytorch_model-00006-of-00080.bin", - "model.layers.9.mlp.experts.9.down_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.9.mlp.experts.9.down_proj.weight": "pytorch_model-00006-of-00080.bin", - "model.layers.9.mlp.experts.9.gate_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.9.mlp.experts.9.gate_proj.weight": "pytorch_model-00006-of-00080.bin", - "model.layers.9.mlp.experts.9.up_proj.bias": "pytorch_model-00020-of-00080.bin", - "model.layers.9.mlp.experts.9.up_proj.weight": "pytorch_model-00006-of-00080.bin", - "model.layers.9.mlp.gate.wg.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.9.mlp.shared_mlp.down_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.9.mlp.shared_mlp.down_proj.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.9.mlp.shared_mlp.gate_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.9.mlp.shared_mlp.gate_proj.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.9.mlp.shared_mlp.up_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.9.mlp.shared_mlp.up_proj.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.9.post_attention_layernorm.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.9.self_attn.key_layernorm.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.9.self_attn.o_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.9.self_attn.o_proj.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.9.self_attn.q_proj.bias": "pytorch_model-00002-of-00080.bin", - "model.layers.9.self_attn.q_proj.weight": "pytorch_model-00001-of-00080.bin", - "model.layers.9.self_attn.query_layernorm.weight": "pytorch_model-00001-of-00080.bin", - "model.norm.weight": "pytorch_model-00062-of-00080.bin" - } -}