diff --git a/pytorch_model-00000.bin b/pytorch_model-00000.bin
new file mode 100644
index 0000000000000000000000000000000000000000..562e9f1decc98d4f8384da379840e1d9e88a0376
--- /dev/null
+++ b/pytorch_model-00000.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c1ebfec1bfb45a728e89a5acf8b2b56ea12129c146f9d051ecd727d22441f437
+size 9663707876
diff --git a/pytorch_model-00001.bin b/pytorch_model-00001.bin
new file mode 100644
index 0000000000000000000000000000000000000000..6057d93e964e1caff523798284efed91676ecd4e
--- /dev/null
+++ b/pytorch_model-00001.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3eddd2772437565de62951a7793332cc11afa7a72626d29afdd52fa01cf58acf
+size 9840143683
diff --git a/pytorch_model-00002.bin b/pytorch_model-00002.bin
new file mode 100644
index 0000000000000000000000000000000000000000..0337a2a41046a41ab0c89e1ae9d14c5fcfd1b569
--- /dev/null
+++ b/pytorch_model-00002.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e4953e7a459251465f90140926bd1b8c22a86c3a1d3134983a0bb7cb441d430d
+size 9840143683
diff --git a/pytorch_model-00003.bin b/pytorch_model-00003.bin
new file mode 100644
index 0000000000000000000000000000000000000000..9b00be22e623235f5955665237e69a3a05aeecfd
--- /dev/null
+++ b/pytorch_model-00003.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5701894871b3bc9942ebfbc5510374f92ea35dfd585f1368dea92de7f8b3dcbd
+size 9840143683
diff --git a/pytorch_model-00004.bin b/pytorch_model-00004.bin
new file mode 100644
index 0000000000000000000000000000000000000000..cf97017c1cbca65f99aeec179bc21e73f09acb4a
--- /dev/null
+++ b/pytorch_model-00004.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1f86699f22b3fb8cef1c932d405d3427c260b5e0efd5e768d4e9206a3bfd20b3
+size 9840143683
diff --git a/pytorch_model-00005.bin b/pytorch_model-00005.bin
new file mode 100644
index 0000000000000000000000000000000000000000..ec5429d166dc17998bf4145567df75ce5f7b906d
--- /dev/null
+++ b/pytorch_model-00005.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9eb41985a50a5d692f0445f3265cb3ad459bf39a307db634383b51051c9916dd
+size 9840143683
diff --git a/pytorch_model-00006.bin b/pytorch_model-00006.bin
new file mode 100644
index 0000000000000000000000000000000000000000..18e1d2118c99bc9e43d487863475ee156df2c7e2
--- /dev/null
+++ b/pytorch_model-00006.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:881aacf19c7f19d96aed6d8fa5acda0eed513e13003a2f3128f64a216fe712da
+size 9840143683
diff --git a/pytorch_model-00007.bin b/pytorch_model-00007.bin
new file mode 100644
index 0000000000000000000000000000000000000000..8350b3e0bf956e2c0d17e1488655e7d3e8bd55c4
--- /dev/null
+++ b/pytorch_model-00007.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e7cfe7783f6975328fb4bd4ecc929073c917e126d95b62cbb86093c31cacffaa
+size 9840143683
diff --git a/pytorch_model-00008.bin b/pytorch_model-00008.bin
new file mode 100644
index 0000000000000000000000000000000000000000..944a8146d2f2589f961df02d7bf9583fdebef40b
--- /dev/null
+++ b/pytorch_model-00008.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bf081eb2af50d546a336b1da9294b662b972e016849a74bf4fa9f57bc349b929
+size 9840143683
diff --git a/pytorch_model-00009.bin b/pytorch_model-00009.bin
new file mode 100644
index 0000000000000000000000000000000000000000..b108c98ea021362d76cb2c5836e01667be49d905
--- /dev/null
+++ b/pytorch_model-00009.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b71b6608a4c452e1e6fa3b4cc7a7b8f545baa92fa353418fa361a26accb33a14
+size 9840143683
diff --git a/pytorch_model-00010.bin b/pytorch_model-00010.bin
new file mode 100644
index 0000000000000000000000000000000000000000..5c08bac80cc4cd9f0364f6bf70e3e85e06ef37d7
--- /dev/null
+++ b/pytorch_model-00010.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:25d5a59228667fc5f8be9209126e231c1f4ffd28d33cee72d8fa11e5d5e9648a
+size 9840143683
diff --git a/pytorch_model-00011.bin b/pytorch_model-00011.bin
new file mode 100644
index 0000000000000000000000000000000000000000..498ea2b2d4cc27b53ba43edc87caad6887b6c7b4
--- /dev/null
+++ b/pytorch_model-00011.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9757edda1b39af71913a54dd8dbf94cc25f87301225ddb0de91aea1585905ca9
+size 9840143683
diff --git a/pytorch_model-00012.bin b/pytorch_model-00012.bin
new file mode 100644
index 0000000000000000000000000000000000000000..5d697496e5af923f484920a82915f92054c20444
--- /dev/null
+++ b/pytorch_model-00012.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b444ccc5cf94ed6aa4c029f64173b40e21549fbe197fd207ac3a113a7ac5ca85
+size 9840143683
diff --git a/pytorch_model-00013.bin b/pytorch_model-00013.bin
new file mode 100644
index 0000000000000000000000000000000000000000..dbd6d4fc16145ea4987f068e19c36e3ab04a8233
--- /dev/null
+++ b/pytorch_model-00013.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4bb7d8c8b5903eafa97796001291edbd42b0478d7aca4b62cdd3d185e131fe54
+size 9840143683
diff --git a/pytorch_model-00014.bin b/pytorch_model-00014.bin
new file mode 100644
index 0000000000000000000000000000000000000000..b56ae520fa0f4f0e2dd1253911b9678ebba756a3
--- /dev/null
+++ b/pytorch_model-00014.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5d66499d0087e7ac3c843c96906d91b9b903d0fce589468c150a60c62042ab4d
+size 9840143683
diff --git a/pytorch_model-00015.bin b/pytorch_model-00015.bin
new file mode 100644
index 0000000000000000000000000000000000000000..53ea25468e0a15378519b4a8aba51a5f96f060f0
--- /dev/null
+++ b/pytorch_model-00015.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:116aa3213e8f99fe8b05b05e09087726e21b447af7e12031a6c13002249c320a
+size 9840143683
diff --git a/pytorch_model-00016.bin b/pytorch_model-00016.bin
new file mode 100644
index 0000000000000000000000000000000000000000..d1f54adbb152df9de965405251372ceb64621c7f
--- /dev/null
+++ b/pytorch_model-00016.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:05bc318f6c1df94f26e0747e597d48360d12a0406f9d6a12fc8e1d323876c2de
+size 9840143683
diff --git a/pytorch_model-00017.bin b/pytorch_model-00017.bin
new file mode 100644
index 0000000000000000000000000000000000000000..bbcb667f7d872ef2e2dd913d3f688b7c35749ed2
--- /dev/null
+++ b/pytorch_model-00017.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:717ee5b5f0ad7153b2ec3f67a6c1fa1be129d81de63f23e917df45663cfabbdf
+size 9840143683
diff --git a/pytorch_model-00018.bin b/pytorch_model-00018.bin
new file mode 100644
index 0000000000000000000000000000000000000000..54c3806b9183cbfe21b8b7072bce35d39a782727
--- /dev/null
+++ b/pytorch_model-00018.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d74183b6b0dc258978798522ef8322241c8acd65e10bc9f6475753819375e7af
+size 9840143683
diff --git a/pytorch_model-00019.bin b/pytorch_model-00019.bin
new file mode 100644
index 0000000000000000000000000000000000000000..747bfa967bdaf7d523d2ea507e236f60132395a0
--- /dev/null
+++ b/pytorch_model-00019.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fa6691c698f33965f0bb7f23b74ef8190f52988b31eed7e38504f6888c1be00a
+size 9840143683
diff --git a/pytorch_model-00020.bin b/pytorch_model-00020.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7beb2c6b3ea5987cfdebfe13b813e5e7e5e27ed3
--- /dev/null
+++ b/pytorch_model-00020.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bb8e160a2277666132649c41c0c5146289564dc0c3fdd1a51385cd0112368509
+size 9840143683
diff --git a/pytorch_model-00021.bin b/pytorch_model-00021.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7aeab3e2736add7412c94386fb3b15255e9a48f5
--- /dev/null
+++ b/pytorch_model-00021.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7eb5bd1c19a136649681b37ac3db320d7248ad99f1fb36a45676cff8239f8f5c
+size 9840143683
diff --git a/pytorch_model-00022.bin b/pytorch_model-00022.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f646c9c691ee8660e8eb44dd686473725af990de
--- /dev/null
+++ b/pytorch_model-00022.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7f391050be2cea0d44570f335742ff33a1501f3b142894a69e7ac7d5faaf9f26
+size 9840143683
diff --git a/pytorch_model-00023.bin b/pytorch_model-00023.bin
new file mode 100644
index 0000000000000000000000000000000000000000..125ca2c5d84f7e5109db1b84026704ccc9bc661a
--- /dev/null
+++ b/pytorch_model-00023.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d039c8ae1401c5dca1d344fbfba4d9d29477997b33b6e864dfb7c922afb43b2e
+size 9840143683
diff --git a/pytorch_model-00024.bin b/pytorch_model-00024.bin
new file mode 100644
index 0000000000000000000000000000000000000000..70576572a713ba51853f714ee227c3a25bbf5a48
--- /dev/null
+++ b/pytorch_model-00024.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f0b66750b47383a1826002523aa2499e44b858f54b9d15a1d2942f1e1b5baec3
+size 9840143683
diff --git a/pytorch_model-00025.bin b/pytorch_model-00025.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a12080b2212df59461cdd6a23a1e59a4e1d1baf0
--- /dev/null
+++ b/pytorch_model-00025.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c3d9d5dbf6e5c1fcff0d714fdfda97cc310aedf612d2c7cb5875df7d595894eb
+size 9840143683
diff --git a/pytorch_model-00026.bin b/pytorch_model-00026.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3fad9706d6ed10f45c0401bcbabef104e0e4410a
--- /dev/null
+++ b/pytorch_model-00026.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f182db161b001c5808fa8d74c09d95b1292ad576e78e7d6fc96e41b251a08444
+size 9840143683
diff --git a/pytorch_model-00027.bin b/pytorch_model-00027.bin
new file mode 100644
index 0000000000000000000000000000000000000000..d345e886b02bcc34b199aa33c4989cfc4c0dec04
--- /dev/null
+++ b/pytorch_model-00027.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d2454b8cc6435dcf9a8659d4a23c9fc3acd6adfdc55567ede49c55023e110908
+size 9840143683
diff --git a/pytorch_model-00028.bin b/pytorch_model-00028.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3a386fcdde81f6bc58ccaeeff385008e4c4251a0
--- /dev/null
+++ b/pytorch_model-00028.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:26e5a3d65dcf5542884d1cc6f70163449bee31c5de9a8fedb9cca4a5e227e694
+size 9840143683
diff --git a/pytorch_model-00029.bin b/pytorch_model-00029.bin
new file mode 100644
index 0000000000000000000000000000000000000000..d9ca27e4d109cddf0a0eec29b110e90bf5fce414
--- /dev/null
+++ b/pytorch_model-00029.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f59c8ca224f1390d736ad52cf20ae29c0d749fe7d84b6ba54d064179c3054187
+size 9840143683
diff --git a/pytorch_model-00030.bin b/pytorch_model-00030.bin
new file mode 100644
index 0000000000000000000000000000000000000000..ded5b482d34b7a8d412506c90d29b04447185457
--- /dev/null
+++ b/pytorch_model-00030.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:51d330a8050aac205b66086303fb185244e086277ecd247c959d13117d8fd851
+size 9840143683
diff --git a/pytorch_model-00031.bin b/pytorch_model-00031.bin
new file mode 100644
index 0000000000000000000000000000000000000000..51163ea0faff6c0f1bcadb63d792806f1c200d5a
--- /dev/null
+++ b/pytorch_model-00031.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a8a210180da371bc05d319507ef046a7624d2b5e2566dd58f61a3f7c688d198e
+size 9840143683
diff --git a/pytorch_model-00032.bin b/pytorch_model-00032.bin
new file mode 100644
index 0000000000000000000000000000000000000000..03e7852c71b7430959fa3b98e6223ae80ef9d209
--- /dev/null
+++ b/pytorch_model-00032.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:93c5855f60aa89056162392fdba96e5ae8f40965be91ee05c096e4027f8832c1
+size 9840143683
diff --git a/pytorch_model-00033.bin b/pytorch_model-00033.bin
new file mode 100644
index 0000000000000000000000000000000000000000..adee930855e23a5a5806c27912352083d32e5855
--- /dev/null
+++ b/pytorch_model-00033.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:934779e8487370790f44250b00cca5d56e65132ef9d896caac408a10183cb5d0
+size 9840143683
diff --git a/pytorch_model-00034.bin b/pytorch_model-00034.bin
new file mode 100644
index 0000000000000000000000000000000000000000..664ea7f96a96d96603f9c92ae440752e40e90266
--- /dev/null
+++ b/pytorch_model-00034.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:088c42ece18a8f0447cb7723f7d1fd367bfb3d117d01a35e88c48a7e1582b064
+size 9840143683
diff --git a/pytorch_model-00035.bin b/pytorch_model-00035.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4fceeff935eb2f77fef6589f0c1c52175272f52f
--- /dev/null
+++ b/pytorch_model-00035.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0de65e13cfe8bf054bbef7dbb56b24f3c9cdb1e51d45d8ad0b5413147f443b17
+size 9840143683
diff --git a/pytorch_model-00036.bin b/pytorch_model-00036.bin
new file mode 100644
index 0000000000000000000000000000000000000000..1ec065db1e05e1e1ffe3bc5f1be07c8bf791acce
--- /dev/null
+++ b/pytorch_model-00036.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:131a8e1afc541dedd5a9387ed40a1d8d05010457d0e32b5c61d6415b75e94fbd
+size 9840143683
diff --git a/pytorch_model-00037.bin b/pytorch_model-00037.bin
new file mode 100644
index 0000000000000000000000000000000000000000..fc4e55705efa0f0e91993c4c9ad0156615f4d6de
--- /dev/null
+++ b/pytorch_model-00037.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:33db199d6283342b306515936990cf02a7cb117231c8e587b3a6ec4a9da67ea9
+size 9840143683
diff --git a/pytorch_model-00038.bin b/pytorch_model-00038.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c5d59285ab5fbe597aa098783db8116e135bf80e
--- /dev/null
+++ b/pytorch_model-00038.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4b41c6e88ea9e29cdf4acdda59fc968544ff7340ae4746f4fd75d5b926c37c63
+size 9840143683
diff --git a/pytorch_model-00039.bin b/pytorch_model-00039.bin
new file mode 100644
index 0000000000000000000000000000000000000000..111d527b613e82e2ef259c5f8445ecca6a6fe713
--- /dev/null
+++ b/pytorch_model-00039.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:69ba81883191cf34a7b5dba4e70a6c8fc8aeef815429a1475529d4b08376440c
+size 9840143683
diff --git a/pytorch_model-00040.bin b/pytorch_model-00040.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3f791d123942cd1e430fde8c506f4acddcc5f08c
--- /dev/null
+++ b/pytorch_model-00040.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:120d68928227bad851911642676d14baf0bd1b3e5232d316b898b878be38bca2
+size 9840143683
diff --git a/pytorch_model-00041.bin b/pytorch_model-00041.bin
new file mode 100644
index 0000000000000000000000000000000000000000..909232ec2ee7ccfe9ab9cac6b166ce9c95abe0ac
--- /dev/null
+++ b/pytorch_model-00041.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7cf6b59003d83507f2503d2618f1cd3ef85de6fa94780dc9a5df44224e5f95a4
+size 9840143683
diff --git a/pytorch_model-00042.bin b/pytorch_model-00042.bin
new file mode 100644
index 0000000000000000000000000000000000000000..cde74d7b960cc3a2f45242b9adaa4834d599ecf5
--- /dev/null
+++ b/pytorch_model-00042.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:83fc8ce648f6aee36c2aa265c833d125d0e36ab8560147739b945ee3943eb101
+size 9840143683
diff --git a/pytorch_model-00043.bin b/pytorch_model-00043.bin
new file mode 100644
index 0000000000000000000000000000000000000000..952c78d56d9d165847216037c6b608d148dfc45c
--- /dev/null
+++ b/pytorch_model-00043.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f1a555172f8b31fcedcd0bd5c5c0b3ca64bcf016c2f2edc9cb86d5a6e5c40dab
+size 9840143683
diff --git a/pytorch_model-00044.bin b/pytorch_model-00044.bin
new file mode 100644
index 0000000000000000000000000000000000000000..9eb679964af59960ee646db9b1559dda2fc44fd7
--- /dev/null
+++ b/pytorch_model-00044.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:942e946cf569c3a9da11520aca2c47effbde4c6aa1ea9d41f460b28dc546b78c
+size 9840143683
diff --git a/pytorch_model-00045.bin b/pytorch_model-00045.bin
new file mode 100644
index 0000000000000000000000000000000000000000..38dc2b3497d6ca8a70b57d9502e1b8e30c3f733a
--- /dev/null
+++ b/pytorch_model-00045.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:69056d3ec7c9af4b3b5491bc095e2403e4f179ea0e46a477bcf90f2e80fd8cbe
+size 9840143683
diff --git a/pytorch_model-00046.bin b/pytorch_model-00046.bin
new file mode 100644
index 0000000000000000000000000000000000000000..69f71f3d9dab051429501b79f25bc8e378dfa535
--- /dev/null
+++ b/pytorch_model-00046.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:943651b294efc62fa2998ecc8adf0c058f2562b6cc33b130e995fd0bd14e5eff
+size 9840143683
diff --git a/pytorch_model-00047.bin b/pytorch_model-00047.bin
new file mode 100644
index 0000000000000000000000000000000000000000..780ac146fdcab8a124ead9933ae30ac0d8fd9264
--- /dev/null
+++ b/pytorch_model-00047.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:043755ad867656b6bef6a7e59c196f2558b7e9b43a958fa8ba7257ab8db0cd21
+size 9840143683
diff --git a/pytorch_model-00048.bin b/pytorch_model-00048.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7d9132e82948a309b7d39bc9a35ff20f3248a37a
--- /dev/null
+++ b/pytorch_model-00048.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bb517cf536af4c135d53f43b31521edb662eeafe8deb86a2416eb067033265be
+size 9840143683
diff --git a/pytorch_model-00049.bin b/pytorch_model-00049.bin
new file mode 100644
index 0000000000000000000000000000000000000000..0702e972c5a98db9540ef8334641b5a85ecfa9d0
--- /dev/null
+++ b/pytorch_model-00049.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2f433b5433992633ee55b98291abb2155e5291cc055a69aa62c94e12c8350960
+size 9840143683
diff --git a/pytorch_model-00050.bin b/pytorch_model-00050.bin
new file mode 100644
index 0000000000000000000000000000000000000000..84c3d0e6b5326abb815e683f194c1c5a7f267419
--- /dev/null
+++ b/pytorch_model-00050.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d02bc3d2f6ebea617b7e270848e68285555c64fd79c8afcec20295b0b4524186
+size 9840143683
diff --git a/pytorch_model-00051.bin b/pytorch_model-00051.bin
new file mode 100644
index 0000000000000000000000000000000000000000..50e70531090562d566c8cc35583e274295ee22e8
--- /dev/null
+++ b/pytorch_model-00051.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1ece9310dd154221635162b4d81bc32e5f647f8cef8b9c021e44d4d773a5ee30
+size 9840143683
diff --git a/pytorch_model-00052.bin b/pytorch_model-00052.bin
new file mode 100644
index 0000000000000000000000000000000000000000..352116f3786ed8ede3413aa1e27ac61e13e7fe73
--- /dev/null
+++ b/pytorch_model-00052.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8d74bd5ae9c7514cf682ea05cfd4a9d64b8a02f9b4626baf36a7baeb01b2269c
+size 9840143683
diff --git a/pytorch_model-00053.bin b/pytorch_model-00053.bin
new file mode 100644
index 0000000000000000000000000000000000000000..18d230528dd3cada9430ef894e14b757e2cd377e
--- /dev/null
+++ b/pytorch_model-00053.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bcab1af7c27662b44a65e36009e635fad76d98ea31229f094a6fb66df1824d72
+size 9840143683
diff --git a/pytorch_model-00054.bin b/pytorch_model-00054.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3575511059b67e68913b156534e9b85e6e287d81
--- /dev/null
+++ b/pytorch_model-00054.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3169d2069e543abed9dfcfc2df8880f43367e59d8816d5f514819387d13a35d6
+size 9840143683
diff --git a/pytorch_model-00055.bin b/pytorch_model-00055.bin
new file mode 100644
index 0000000000000000000000000000000000000000..0f1f9296053698a54c6231c3a82349568e302bc5
--- /dev/null
+++ b/pytorch_model-00055.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6e54898e1ec96e6ec0ad98081c7f9b31deeb99325c79f1796237e99f5e33ecd9
+size 9840143683
diff --git a/pytorch_model-00056.bin b/pytorch_model-00056.bin
new file mode 100644
index 0000000000000000000000000000000000000000..2245eacae71beb1b2f51d990b5ea944561fdfd31
--- /dev/null
+++ b/pytorch_model-00056.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d8414ba7f9cdea79c184a5be63e6ef98f49a62e720b9c02e245b6c2146cc1e6e
+size 9840143683
diff --git a/pytorch_model-00057.bin b/pytorch_model-00057.bin
new file mode 100644
index 0000000000000000000000000000000000000000..e285e27f00e13feec5341dddf65a4f6fb2cf4c6a
--- /dev/null
+++ b/pytorch_model-00057.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d3c3b2bd06ba5c1f8c3fc5f123bb002b9717a71a2a3af7905c43fb04c2f410b8
+size 9840143683
diff --git a/pytorch_model-00058.bin b/pytorch_model-00058.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f963349d455421ab1f7ce15ff9ec039777be09f0
--- /dev/null
+++ b/pytorch_model-00058.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:659d883fd4981037aee7883e37486ec67708eb3b21d10e0d1afff73fc20b2fcb
+size 9840143683
diff --git a/pytorch_model-00059.bin b/pytorch_model-00059.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c4f5d03dbede908ce96189be0329cedbb237251d
--- /dev/null
+++ b/pytorch_model-00059.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6f3f5316638328e319ffbeafe61278da4ac2da790aff77526eac153c6b7d9841
+size 9840143683
diff --git a/pytorch_model-00060.bin b/pytorch_model-00060.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4c528af81015b5f4d3f99b5252cfddf040843b19
--- /dev/null
+++ b/pytorch_model-00060.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:39bc1c21a9959cc868dd7e6e4ef6833a7e7a6c88f5413e0c0ce9229b99fc956d
+size 9840143683
diff --git a/pytorch_model-00061.bin b/pytorch_model-00061.bin
new file mode 100644
index 0000000000000000000000000000000000000000..5f83e029c3ae4fbfe121da64acf584279507c338
--- /dev/null
+++ b/pytorch_model-00061.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:10c3981d9efaead619b905babae74d55c58cefb65ad2bfb6bb5e1d872ba27cad
+size 9840143683
diff --git a/pytorch_model-00062.bin b/pytorch_model-00062.bin
new file mode 100644
index 0000000000000000000000000000000000000000..73aed81021ae95f2e46c75c9847c5cfffe49b621
--- /dev/null
+++ b/pytorch_model-00062.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cd14252abcbb445bc477e147a159e6fcd974f11b38ec34d07d771881b39a1589
+size 9840143683
diff --git a/pytorch_model-00063.bin b/pytorch_model-00063.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3121108570ea6646f687e89eeeac0bf160a0bbf4
--- /dev/null
+++ b/pytorch_model-00063.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:51761cd493c24ebdca2ec1e4dd0e73c7d5d0fea6036d2ca799d6ecbd3a81b1b7
+size 9840143683
diff --git a/pytorch_model-00064.bin b/pytorch_model-00064.bin
new file mode 100644
index 0000000000000000000000000000000000000000..1ea82809d3d6b303a2521cb5504b8621fe51ba85
--- /dev/null
+++ b/pytorch_model-00064.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e273da1d7074096b7bc743a63cb826173605994191fd5ffe03f60b99eb76d0ce
+size 3397687391
diff --git a/pytorch_model.bin.index.json b/pytorch_model.bin.index.json
new file mode 100644
index 0000000000000000000000000000000000000000..668b442f108f959509e7d5a628833f2280e226af
--- /dev/null
+++ b/pytorch_model.bin.index.json
@@ -0,0 +1,2122 @@
+{
+  "metadata": {
+    "total_size": 632989704192
+  },
+  "weight_map": {
+    "model.embed_tokens.weight": "pytorch_model-00000.bin",
+    "lm_head.weight": "pytorch_model-00000.bin",
+    "model.norm.scale": "pytorch_model-00000.bin",
+    "model.layers.0.moe_block.experts.0.linear.weight": "pytorch_model-00000.bin",
+    "model.layers.0.moe_block.experts.1.linear.weight": "pytorch_model-00000.bin",
+    "model.layers.0.moe_block.experts.2.linear.weight": "pytorch_model-00000.bin",
+    "model.layers.0.moe_block.experts.3.linear.weight": "pytorch_model-00000.bin",
+    "model.layers.0.moe_block.experts.4.linear.weight": "pytorch_model-00000.bin",
+    "model.layers.0.moe_block.experts.5.linear.weight": "pytorch_model-00000.bin",
+    "model.layers.0.moe_block.experts.6.linear.weight": "pytorch_model-00000.bin",
+    "model.layers.0.moe_block.experts.7.linear.weight": "pytorch_model-00000.bin",
+    "model.layers.0.moe_block.experts.0.linear_1.weight": "pytorch_model-00000.bin",
+    "model.layers.0.moe_block.experts.1.linear_1.weight": "pytorch_model-00000.bin",
+    "model.layers.0.moe_block.experts.2.linear_1.weight": "pytorch_model-00000.bin",
+    "model.layers.0.moe_block.experts.3.linear_1.weight": "pytorch_model-00000.bin",
+    "model.layers.0.moe_block.experts.4.linear_1.weight": "pytorch_model-00000.bin",
+    "model.layers.0.moe_block.experts.5.linear_1.weight": "pytorch_model-00000.bin",
+    "model.layers.0.moe_block.experts.6.linear_1.weight": "pytorch_model-00000.bin",
+    "model.layers.0.moe_block.experts.7.linear_1.weight": "pytorch_model-00000.bin",
+    "model.layers.0.moe_block.experts.0.linear_v.weight": "pytorch_model-00001.bin",
+    "model.layers.0.moe_block.experts.1.linear_v.weight": "pytorch_model-00001.bin",
+    "model.layers.0.moe_block.experts.2.linear_v.weight": "pytorch_model-00001.bin",
+    "model.layers.0.moe_block.experts.3.linear_v.weight": "pytorch_model-00001.bin",
+    "model.layers.0.moe_block.experts.4.linear_v.weight": "pytorch_model-00001.bin",
+    "model.layers.0.moe_block.experts.5.linear_v.weight": "pytorch_model-00001.bin",
+    "model.layers.0.moe_block.experts.6.linear_v.weight": "pytorch_model-00001.bin",
+    "model.layers.0.moe_block.experts.7.linear_v.weight": "pytorch_model-00001.bin",
+    "model.layers.0.attn.k_proj.weight": "pytorch_model-00001.bin",
+    "model.layers.0.attn.o_proj.weight": "pytorch_model-00001.bin",
+    "model.layers.0.attn.q_proj.weight": "pytorch_model-00001.bin",
+    "model.layers.0.attn.v_proj.weight": "pytorch_model-00001.bin",
+    "model.layers.0.pre_attn_norm.scale": "pytorch_model-00001.bin",
+    "model.layers.0.post_attn_norm.scale": "pytorch_model-00001.bin",
+    "model.layers.0.pre_moe_norm.scale": "pytorch_model-00001.bin",
+    "model.layers.0.post_moe_norm.scale": "pytorch_model-00001.bin",
+    "model.layers.0.moe_block.gate.weight": "pytorch_model-00001.bin",
+    "model.layers.1.moe_block.experts.0.linear.weight": "pytorch_model-00001.bin",
+    "model.layers.1.moe_block.experts.1.linear.weight": "pytorch_model-00001.bin",
+    "model.layers.1.moe_block.experts.2.linear.weight": "pytorch_model-00001.bin",
+    "model.layers.1.moe_block.experts.3.linear.weight": "pytorch_model-00001.bin",
+    "model.layers.1.moe_block.experts.4.linear.weight": "pytorch_model-00001.bin",
+    "model.layers.1.moe_block.experts.5.linear.weight": "pytorch_model-00001.bin",
+    "model.layers.1.moe_block.experts.6.linear.weight": "pytorch_model-00001.bin",
+    "model.layers.1.moe_block.experts.7.linear.weight": "pytorch_model-00001.bin",
+    "model.layers.1.moe_block.experts.0.linear_1.weight": "pytorch_model-00001.bin",
+    "model.layers.1.moe_block.experts.1.linear_1.weight": "pytorch_model-00001.bin",
+    "model.layers.1.moe_block.experts.2.linear_1.weight": "pytorch_model-00001.bin",
+    "model.layers.1.moe_block.experts.3.linear_1.weight": "pytorch_model-00001.bin",
+    "model.layers.1.moe_block.experts.4.linear_1.weight": "pytorch_model-00001.bin",
+    "model.layers.1.moe_block.experts.5.linear_1.weight": "pytorch_model-00001.bin",
+    "model.layers.1.moe_block.experts.6.linear_1.weight": "pytorch_model-00001.bin",
+    "model.layers.1.moe_block.experts.7.linear_1.weight": "pytorch_model-00001.bin",
+    "model.layers.1.moe_block.experts.0.linear_v.weight": "pytorch_model-00002.bin",
+    "model.layers.1.moe_block.experts.1.linear_v.weight": "pytorch_model-00002.bin",
+    "model.layers.1.moe_block.experts.2.linear_v.weight": "pytorch_model-00002.bin",
+    "model.layers.1.moe_block.experts.3.linear_v.weight": "pytorch_model-00002.bin",
+    "model.layers.1.moe_block.experts.4.linear_v.weight": "pytorch_model-00002.bin",
+    "model.layers.1.moe_block.experts.5.linear_v.weight": "pytorch_model-00002.bin",
+    "model.layers.1.moe_block.experts.6.linear_v.weight": "pytorch_model-00002.bin",
+    "model.layers.1.moe_block.experts.7.linear_v.weight": "pytorch_model-00002.bin",
+    "model.layers.1.attn.k_proj.weight": "pytorch_model-00002.bin",
+    "model.layers.1.attn.o_proj.weight": "pytorch_model-00002.bin",
+    "model.layers.1.attn.q_proj.weight": "pytorch_model-00002.bin",
+    "model.layers.1.attn.v_proj.weight": "pytorch_model-00002.bin",
+    "model.layers.1.pre_attn_norm.scale": "pytorch_model-00002.bin",
+    "model.layers.1.post_attn_norm.scale": "pytorch_model-00002.bin",
+    "model.layers.1.pre_moe_norm.scale": "pytorch_model-00002.bin",
+    "model.layers.1.post_moe_norm.scale": "pytorch_model-00002.bin",
+    "model.layers.1.moe_block.gate.weight": "pytorch_model-00002.bin",
+    "model.layers.10.moe_block.experts.0.linear.weight": "pytorch_model-00002.bin",
+    "model.layers.10.moe_block.experts.1.linear.weight": "pytorch_model-00002.bin",
+    "model.layers.10.moe_block.experts.2.linear.weight": "pytorch_model-00002.bin",
+    "model.layers.10.moe_block.experts.3.linear.weight": "pytorch_model-00002.bin",
+    "model.layers.10.moe_block.experts.4.linear.weight": "pytorch_model-00002.bin",
+    "model.layers.10.moe_block.experts.5.linear.weight": "pytorch_model-00002.bin",
+    "model.layers.10.moe_block.experts.6.linear.weight": "pytorch_model-00002.bin",
+    "model.layers.10.moe_block.experts.7.linear.weight": "pytorch_model-00002.bin",
+    "model.layers.10.moe_block.experts.0.linear_1.weight": "pytorch_model-00002.bin",
+    "model.layers.10.moe_block.experts.1.linear_1.weight": "pytorch_model-00002.bin",
+    "model.layers.10.moe_block.experts.2.linear_1.weight": "pytorch_model-00002.bin",
+    "model.layers.10.moe_block.experts.3.linear_1.weight": "pytorch_model-00002.bin",
+    "model.layers.10.moe_block.experts.4.linear_1.weight": "pytorch_model-00002.bin",
+    "model.layers.10.moe_block.experts.5.linear_1.weight": "pytorch_model-00002.bin",
+    "model.layers.10.moe_block.experts.6.linear_1.weight": "pytorch_model-00002.bin",
+    "model.layers.10.moe_block.experts.7.linear_1.weight": "pytorch_model-00002.bin",
+    "model.layers.10.moe_block.experts.0.linear_v.weight": "pytorch_model-00003.bin",
+    "model.layers.10.moe_block.experts.1.linear_v.weight": "pytorch_model-00003.bin",
+    "model.layers.10.moe_block.experts.2.linear_v.weight": "pytorch_model-00003.bin",
+    "model.layers.10.moe_block.experts.3.linear_v.weight": "pytorch_model-00003.bin",
+    "model.layers.10.moe_block.experts.4.linear_v.weight": "pytorch_model-00003.bin",
+    "model.layers.10.moe_block.experts.5.linear_v.weight": "pytorch_model-00003.bin",
+    "model.layers.10.moe_block.experts.6.linear_v.weight": "pytorch_model-00003.bin",
+    "model.layers.10.moe_block.experts.7.linear_v.weight": "pytorch_model-00003.bin",
+    "model.layers.10.attn.k_proj.weight": "pytorch_model-00003.bin",
+    "model.layers.10.attn.o_proj.weight": "pytorch_model-00003.bin",
+    "model.layers.10.attn.q_proj.weight": "pytorch_model-00003.bin",
+    "model.layers.10.attn.v_proj.weight": "pytorch_model-00003.bin",
+    "model.layers.10.pre_attn_norm.scale": "pytorch_model-00003.bin",
+    "model.layers.10.post_attn_norm.scale": "pytorch_model-00003.bin",
+    "model.layers.10.pre_moe_norm.scale": "pytorch_model-00003.bin",
+    "model.layers.10.post_moe_norm.scale": "pytorch_model-00003.bin",
+    "model.layers.10.moe_block.gate.weight": "pytorch_model-00003.bin",
+    "model.layers.11.moe_block.experts.0.linear.weight": "pytorch_model-00003.bin",
+    "model.layers.11.moe_block.experts.1.linear.weight": "pytorch_model-00003.bin",
+    "model.layers.11.moe_block.experts.2.linear.weight": "pytorch_model-00003.bin",
+    "model.layers.11.moe_block.experts.3.linear.weight": "pytorch_model-00003.bin",
+    "model.layers.11.moe_block.experts.4.linear.weight": "pytorch_model-00003.bin",
+    "model.layers.11.moe_block.experts.5.linear.weight": "pytorch_model-00003.bin",
+    "model.layers.11.moe_block.experts.6.linear.weight": "pytorch_model-00003.bin",
+    "model.layers.11.moe_block.experts.7.linear.weight": "pytorch_model-00003.bin",
+    "model.layers.11.moe_block.experts.0.linear_1.weight": "pytorch_model-00003.bin",
+    "model.layers.11.moe_block.experts.1.linear_1.weight": "pytorch_model-00003.bin",
+    "model.layers.11.moe_block.experts.2.linear_1.weight": "pytorch_model-00003.bin",
+    "model.layers.11.moe_block.experts.3.linear_1.weight": "pytorch_model-00003.bin",
+    "model.layers.11.moe_block.experts.4.linear_1.weight": "pytorch_model-00003.bin",
+    "model.layers.11.moe_block.experts.5.linear_1.weight": "pytorch_model-00003.bin",
+    "model.layers.11.moe_block.experts.6.linear_1.weight": "pytorch_model-00003.bin",
+    "model.layers.11.moe_block.experts.7.linear_1.weight": "pytorch_model-00003.bin",
+    "model.layers.11.moe_block.experts.0.linear_v.weight": "pytorch_model-00004.bin",
+    "model.layers.11.moe_block.experts.1.linear_v.weight": "pytorch_model-00004.bin",
+    "model.layers.11.moe_block.experts.2.linear_v.weight": "pytorch_model-00004.bin",
+    "model.layers.11.moe_block.experts.3.linear_v.weight": "pytorch_model-00004.bin",
+    "model.layers.11.moe_block.experts.4.linear_v.weight": "pytorch_model-00004.bin",
+    "model.layers.11.moe_block.experts.5.linear_v.weight": "pytorch_model-00004.bin",
+    "model.layers.11.moe_block.experts.6.linear_v.weight": "pytorch_model-00004.bin",
+    "model.layers.11.moe_block.experts.7.linear_v.weight": "pytorch_model-00004.bin",
+    "model.layers.11.attn.k_proj.weight": "pytorch_model-00004.bin",
+    "model.layers.11.attn.o_proj.weight": "pytorch_model-00004.bin",
+    "model.layers.11.attn.q_proj.weight": "pytorch_model-00004.bin",
+    "model.layers.11.attn.v_proj.weight": "pytorch_model-00004.bin",
+    "model.layers.11.pre_attn_norm.scale": "pytorch_model-00004.bin",
+    "model.layers.11.post_attn_norm.scale": "pytorch_model-00004.bin",
+    "model.layers.11.pre_moe_norm.scale": "pytorch_model-00004.bin",
+    "model.layers.11.post_moe_norm.scale": "pytorch_model-00004.bin",
+    "model.layers.11.moe_block.gate.weight": "pytorch_model-00004.bin",
+    "model.layers.12.moe_block.experts.0.linear.weight": "pytorch_model-00004.bin",
+    "model.layers.12.moe_block.experts.1.linear.weight": "pytorch_model-00004.bin",
+    "model.layers.12.moe_block.experts.2.linear.weight": "pytorch_model-00004.bin",
+    "model.layers.12.moe_block.experts.3.linear.weight": "pytorch_model-00004.bin",
+    "model.layers.12.moe_block.experts.4.linear.weight": "pytorch_model-00004.bin",
+    "model.layers.12.moe_block.experts.5.linear.weight": "pytorch_model-00004.bin",
+    "model.layers.12.moe_block.experts.6.linear.weight": "pytorch_model-00004.bin",
+    "model.layers.12.moe_block.experts.7.linear.weight": "pytorch_model-00004.bin",
+    "model.layers.12.moe_block.experts.0.linear_1.weight": "pytorch_model-00004.bin",
+    "model.layers.12.moe_block.experts.1.linear_1.weight": "pytorch_model-00004.bin",
+    "model.layers.12.moe_block.experts.2.linear_1.weight": "pytorch_model-00004.bin",
+    "model.layers.12.moe_block.experts.3.linear_1.weight": "pytorch_model-00004.bin",
+    "model.layers.12.moe_block.experts.4.linear_1.weight": "pytorch_model-00004.bin",
+    "model.layers.12.moe_block.experts.5.linear_1.weight": "pytorch_model-00004.bin",
+    "model.layers.12.moe_block.experts.6.linear_1.weight": "pytorch_model-00004.bin",
+    "model.layers.12.moe_block.experts.7.linear_1.weight": "pytorch_model-00004.bin",
+    "model.layers.12.moe_block.experts.0.linear_v.weight": "pytorch_model-00005.bin",
+    "model.layers.12.moe_block.experts.1.linear_v.weight": "pytorch_model-00005.bin",
+    "model.layers.12.moe_block.experts.2.linear_v.weight": "pytorch_model-00005.bin",
+    "model.layers.12.moe_block.experts.3.linear_v.weight": "pytorch_model-00005.bin",
+    "model.layers.12.moe_block.experts.4.linear_v.weight": "pytorch_model-00005.bin",
+    "model.layers.12.moe_block.experts.5.linear_v.weight": "pytorch_model-00005.bin",
+    "model.layers.12.moe_block.experts.6.linear_v.weight": "pytorch_model-00005.bin",
+    "model.layers.12.moe_block.experts.7.linear_v.weight": "pytorch_model-00005.bin",
+    "model.layers.12.attn.k_proj.weight": "pytorch_model-00005.bin",
+    "model.layers.12.attn.o_proj.weight": "pytorch_model-00005.bin",
+    "model.layers.12.attn.q_proj.weight": "pytorch_model-00005.bin",
+    "model.layers.12.attn.v_proj.weight": "pytorch_model-00005.bin",
+    "model.layers.12.pre_attn_norm.scale": "pytorch_model-00005.bin",
+    "model.layers.12.post_attn_norm.scale": "pytorch_model-00005.bin",
+    "model.layers.12.pre_moe_norm.scale": "pytorch_model-00005.bin",
+    "model.layers.12.post_moe_norm.scale": "pytorch_model-00005.bin",
+    "model.layers.12.moe_block.gate.weight": "pytorch_model-00005.bin",
+    "model.layers.13.moe_block.experts.0.linear.weight": "pytorch_model-00005.bin",
+    "model.layers.13.moe_block.experts.1.linear.weight": "pytorch_model-00005.bin",
+    "model.layers.13.moe_block.experts.2.linear.weight": "pytorch_model-00005.bin",
+    "model.layers.13.moe_block.experts.3.linear.weight": "pytorch_model-00005.bin",
+    "model.layers.13.moe_block.experts.4.linear.weight": "pytorch_model-00005.bin",
+    "model.layers.13.moe_block.experts.5.linear.weight": "pytorch_model-00005.bin",
+    "model.layers.13.moe_block.experts.6.linear.weight": "pytorch_model-00005.bin",
+    "model.layers.13.moe_block.experts.7.linear.weight": "pytorch_model-00005.bin",
+    "model.layers.13.moe_block.experts.0.linear_1.weight": "pytorch_model-00005.bin",
+    "model.layers.13.moe_block.experts.1.linear_1.weight": "pytorch_model-00005.bin",
+    "model.layers.13.moe_block.experts.2.linear_1.weight": "pytorch_model-00005.bin",
+    "model.layers.13.moe_block.experts.3.linear_1.weight": "pytorch_model-00005.bin",
+    "model.layers.13.moe_block.experts.4.linear_1.weight": "pytorch_model-00005.bin",
+    "model.layers.13.moe_block.experts.5.linear_1.weight": "pytorch_model-00005.bin",
+    "model.layers.13.moe_block.experts.6.linear_1.weight": "pytorch_model-00005.bin",
+    "model.layers.13.moe_block.experts.7.linear_1.weight": "pytorch_model-00005.bin",
+    "model.layers.13.moe_block.experts.0.linear_v.weight": "pytorch_model-00006.bin",
+    "model.layers.13.moe_block.experts.1.linear_v.weight": "pytorch_model-00006.bin",
+    "model.layers.13.moe_block.experts.2.linear_v.weight": "pytorch_model-00006.bin",
+    "model.layers.13.moe_block.experts.3.linear_v.weight": "pytorch_model-00006.bin",
+    "model.layers.13.moe_block.experts.4.linear_v.weight": "pytorch_model-00006.bin",
+    "model.layers.13.moe_block.experts.5.linear_v.weight": "pytorch_model-00006.bin",
+    "model.layers.13.moe_block.experts.6.linear_v.weight": "pytorch_model-00006.bin",
+    "model.layers.13.moe_block.experts.7.linear_v.weight": "pytorch_model-00006.bin",
+    "model.layers.13.attn.k_proj.weight": "pytorch_model-00006.bin",
+    "model.layers.13.attn.o_proj.weight": "pytorch_model-00006.bin",
+    "model.layers.13.attn.q_proj.weight": "pytorch_model-00006.bin",
+    "model.layers.13.attn.v_proj.weight": "pytorch_model-00006.bin",
+    "model.layers.13.pre_attn_norm.scale": "pytorch_model-00006.bin",
+    "model.layers.13.post_attn_norm.scale": "pytorch_model-00006.bin",
+    "model.layers.13.pre_moe_norm.scale": "pytorch_model-00006.bin",
+    "model.layers.13.post_moe_norm.scale": "pytorch_model-00006.bin",
+    "model.layers.13.moe_block.gate.weight": "pytorch_model-00006.bin",
+    "model.layers.14.moe_block.experts.0.linear.weight": "pytorch_model-00006.bin",
+    "model.layers.14.moe_block.experts.1.linear.weight": "pytorch_model-00006.bin",
+    "model.layers.14.moe_block.experts.2.linear.weight": "pytorch_model-00006.bin",
+    "model.layers.14.moe_block.experts.3.linear.weight": "pytorch_model-00006.bin",
+    "model.layers.14.moe_block.experts.4.linear.weight": "pytorch_model-00006.bin",
+    "model.layers.14.moe_block.experts.5.linear.weight": "pytorch_model-00006.bin",
+    "model.layers.14.moe_block.experts.6.linear.weight": "pytorch_model-00006.bin",
+    "model.layers.14.moe_block.experts.7.linear.weight": "pytorch_model-00006.bin",
+    "model.layers.14.moe_block.experts.0.linear_1.weight": "pytorch_model-00006.bin",
+    "model.layers.14.moe_block.experts.1.linear_1.weight": "pytorch_model-00006.bin",
+    "model.layers.14.moe_block.experts.2.linear_1.weight": "pytorch_model-00006.bin",
+    "model.layers.14.moe_block.experts.3.linear_1.weight": "pytorch_model-00006.bin",
+    "model.layers.14.moe_block.experts.4.linear_1.weight": "pytorch_model-00006.bin",
+    "model.layers.14.moe_block.experts.5.linear_1.weight": "pytorch_model-00006.bin",
+    "model.layers.14.moe_block.experts.6.linear_1.weight": "pytorch_model-00006.bin",
+    "model.layers.14.moe_block.experts.7.linear_1.weight": "pytorch_model-00006.bin",
+    "model.layers.14.moe_block.experts.0.linear_v.weight": "pytorch_model-00007.bin",
+    "model.layers.14.moe_block.experts.1.linear_v.weight": "pytorch_model-00007.bin",
+    "model.layers.14.moe_block.experts.2.linear_v.weight": "pytorch_model-00007.bin",
+    "model.layers.14.moe_block.experts.3.linear_v.weight": "pytorch_model-00007.bin",
+    "model.layers.14.moe_block.experts.4.linear_v.weight": "pytorch_model-00007.bin",
+    "model.layers.14.moe_block.experts.5.linear_v.weight": "pytorch_model-00007.bin",
+    "model.layers.14.moe_block.experts.6.linear_v.weight": "pytorch_model-00007.bin",
+    "model.layers.14.moe_block.experts.7.linear_v.weight": "pytorch_model-00007.bin",
+    "model.layers.14.attn.k_proj.weight": "pytorch_model-00007.bin",
+    "model.layers.14.attn.o_proj.weight": "pytorch_model-00007.bin",
+    "model.layers.14.attn.q_proj.weight": "pytorch_model-00007.bin",
+    "model.layers.14.attn.v_proj.weight": "pytorch_model-00007.bin",
+    "model.layers.14.pre_attn_norm.scale": "pytorch_model-00007.bin",
+    "model.layers.14.post_attn_norm.scale": "pytorch_model-00007.bin",
+    "model.layers.14.pre_moe_norm.scale": "pytorch_model-00007.bin",
+    "model.layers.14.post_moe_norm.scale": "pytorch_model-00007.bin",
+    "model.layers.14.moe_block.gate.weight": "pytorch_model-00007.bin",
+    "model.layers.15.moe_block.experts.0.linear.weight": "pytorch_model-00007.bin",
+    "model.layers.15.moe_block.experts.1.linear.weight": "pytorch_model-00007.bin",
+    "model.layers.15.moe_block.experts.2.linear.weight": "pytorch_model-00007.bin",
+    "model.layers.15.moe_block.experts.3.linear.weight": "pytorch_model-00007.bin",
+    "model.layers.15.moe_block.experts.4.linear.weight": "pytorch_model-00007.bin",
+    "model.layers.15.moe_block.experts.5.linear.weight": "pytorch_model-00007.bin",
+    "model.layers.15.moe_block.experts.6.linear.weight": "pytorch_model-00007.bin",
+    "model.layers.15.moe_block.experts.7.linear.weight": "pytorch_model-00007.bin",
+    "model.layers.15.moe_block.experts.0.linear_1.weight": "pytorch_model-00007.bin",
+    "model.layers.15.moe_block.experts.1.linear_1.weight": "pytorch_model-00007.bin",
+    "model.layers.15.moe_block.experts.2.linear_1.weight": "pytorch_model-00007.bin",
+    "model.layers.15.moe_block.experts.3.linear_1.weight": "pytorch_model-00007.bin",
+    "model.layers.15.moe_block.experts.4.linear_1.weight": "pytorch_model-00007.bin",
+    "model.layers.15.moe_block.experts.5.linear_1.weight": "pytorch_model-00007.bin",
+    "model.layers.15.moe_block.experts.6.linear_1.weight": "pytorch_model-00007.bin",
+    "model.layers.15.moe_block.experts.7.linear_1.weight": "pytorch_model-00007.bin",
+    "model.layers.15.moe_block.experts.0.linear_v.weight": "pytorch_model-00008.bin",
+    "model.layers.15.moe_block.experts.1.linear_v.weight": "pytorch_model-00008.bin",
+    "model.layers.15.moe_block.experts.2.linear_v.weight": "pytorch_model-00008.bin",
+    "model.layers.15.moe_block.experts.3.linear_v.weight": "pytorch_model-00008.bin",
+    "model.layers.15.moe_block.experts.4.linear_v.weight": "pytorch_model-00008.bin",
+    "model.layers.15.moe_block.experts.5.linear_v.weight": "pytorch_model-00008.bin",
+    "model.layers.15.moe_block.experts.6.linear_v.weight": "pytorch_model-00008.bin",
+    "model.layers.15.moe_block.experts.7.linear_v.weight": "pytorch_model-00008.bin",
+    "model.layers.15.attn.k_proj.weight": "pytorch_model-00008.bin",
+    "model.layers.15.attn.o_proj.weight": "pytorch_model-00008.bin",
+    "model.layers.15.attn.q_proj.weight": "pytorch_model-00008.bin",
+    "model.layers.15.attn.v_proj.weight": "pytorch_model-00008.bin",
+    "model.layers.15.pre_attn_norm.scale": "pytorch_model-00008.bin",
+    "model.layers.15.post_attn_norm.scale": "pytorch_model-00008.bin",
+    "model.layers.15.pre_moe_norm.scale": "pytorch_model-00008.bin",
+    "model.layers.15.post_moe_norm.scale": "pytorch_model-00008.bin",
+    "model.layers.15.moe_block.gate.weight": "pytorch_model-00008.bin",
+    "model.layers.16.moe_block.experts.0.linear.weight": "pytorch_model-00008.bin",
+    "model.layers.16.moe_block.experts.1.linear.weight": "pytorch_model-00008.bin",
+    "model.layers.16.moe_block.experts.2.linear.weight": "pytorch_model-00008.bin",
+    "model.layers.16.moe_block.experts.3.linear.weight": "pytorch_model-00008.bin",
+    "model.layers.16.moe_block.experts.4.linear.weight": "pytorch_model-00008.bin",
+    "model.layers.16.moe_block.experts.5.linear.weight": "pytorch_model-00008.bin",
+    "model.layers.16.moe_block.experts.6.linear.weight": "pytorch_model-00008.bin",
+    "model.layers.16.moe_block.experts.7.linear.weight": "pytorch_model-00008.bin",
+    "model.layers.16.moe_block.experts.0.linear_1.weight": "pytorch_model-00008.bin",
+    "model.layers.16.moe_block.experts.1.linear_1.weight": "pytorch_model-00008.bin",
+    "model.layers.16.moe_block.experts.2.linear_1.weight": "pytorch_model-00008.bin",
+    "model.layers.16.moe_block.experts.3.linear_1.weight": "pytorch_model-00008.bin",
+    "model.layers.16.moe_block.experts.4.linear_1.weight": "pytorch_model-00008.bin",
+    "model.layers.16.moe_block.experts.5.linear_1.weight": "pytorch_model-00008.bin",
+    "model.layers.16.moe_block.experts.6.linear_1.weight": "pytorch_model-00008.bin",
+    "model.layers.16.moe_block.experts.7.linear_1.weight": "pytorch_model-00008.bin",
+    "model.layers.16.moe_block.experts.0.linear_v.weight": "pytorch_model-00009.bin",
+    "model.layers.16.moe_block.experts.1.linear_v.weight": "pytorch_model-00009.bin",
+    "model.layers.16.moe_block.experts.2.linear_v.weight": "pytorch_model-00009.bin",
+    "model.layers.16.moe_block.experts.3.linear_v.weight": "pytorch_model-00009.bin",
+    "model.layers.16.moe_block.experts.4.linear_v.weight": "pytorch_model-00009.bin",
+    "model.layers.16.moe_block.experts.5.linear_v.weight": "pytorch_model-00009.bin",
+    "model.layers.16.moe_block.experts.6.linear_v.weight": "pytorch_model-00009.bin",
+    "model.layers.16.moe_block.experts.7.linear_v.weight": "pytorch_model-00009.bin",
+    "model.layers.16.attn.k_proj.weight": "pytorch_model-00009.bin",
+    "model.layers.16.attn.o_proj.weight": "pytorch_model-00009.bin",
+    "model.layers.16.attn.q_proj.weight": "pytorch_model-00009.bin",
+    "model.layers.16.attn.v_proj.weight": "pytorch_model-00009.bin",
+    "model.layers.16.pre_attn_norm.scale": "pytorch_model-00009.bin",
+    "model.layers.16.post_attn_norm.scale": "pytorch_model-00009.bin",
+    "model.layers.16.pre_moe_norm.scale": "pytorch_model-00009.bin",
+    "model.layers.16.post_moe_norm.scale": "pytorch_model-00009.bin",
+    "model.layers.16.moe_block.gate.weight": "pytorch_model-00009.bin",
+    "model.layers.17.moe_block.experts.0.linear.weight": "pytorch_model-00009.bin",
+    "model.layers.17.moe_block.experts.1.linear.weight": "pytorch_model-00009.bin",
+    "model.layers.17.moe_block.experts.2.linear.weight": "pytorch_model-00009.bin",
+    "model.layers.17.moe_block.experts.3.linear.weight": "pytorch_model-00009.bin",
+    "model.layers.17.moe_block.experts.4.linear.weight": "pytorch_model-00009.bin",
+    "model.layers.17.moe_block.experts.5.linear.weight": "pytorch_model-00009.bin",
+    "model.layers.17.moe_block.experts.6.linear.weight": "pytorch_model-00009.bin",
+    "model.layers.17.moe_block.experts.7.linear.weight": "pytorch_model-00009.bin",
+    "model.layers.17.moe_block.experts.0.linear_1.weight": "pytorch_model-00009.bin",
+    "model.layers.17.moe_block.experts.1.linear_1.weight": "pytorch_model-00009.bin",
+    "model.layers.17.moe_block.experts.2.linear_1.weight": "pytorch_model-00009.bin",
+    "model.layers.17.moe_block.experts.3.linear_1.weight": "pytorch_model-00009.bin",
+    "model.layers.17.moe_block.experts.4.linear_1.weight": "pytorch_model-00009.bin",
+    "model.layers.17.moe_block.experts.5.linear_1.weight": "pytorch_model-00009.bin",
+    "model.layers.17.moe_block.experts.6.linear_1.weight": "pytorch_model-00009.bin",
+    "model.layers.17.moe_block.experts.7.linear_1.weight": "pytorch_model-00009.bin",
+    "model.layers.17.moe_block.experts.0.linear_v.weight": "pytorch_model-00010.bin",
+    "model.layers.17.moe_block.experts.1.linear_v.weight": "pytorch_model-00010.bin",
+    "model.layers.17.moe_block.experts.2.linear_v.weight": "pytorch_model-00010.bin",
+    "model.layers.17.moe_block.experts.3.linear_v.weight": "pytorch_model-00010.bin",
+    "model.layers.17.moe_block.experts.4.linear_v.weight": "pytorch_model-00010.bin",
+    "model.layers.17.moe_block.experts.5.linear_v.weight": "pytorch_model-00010.bin",
+    "model.layers.17.moe_block.experts.6.linear_v.weight": "pytorch_model-00010.bin",
+    "model.layers.17.moe_block.experts.7.linear_v.weight": "pytorch_model-00010.bin",
+    "model.layers.17.attn.k_proj.weight": "pytorch_model-00010.bin",
+    "model.layers.17.attn.o_proj.weight": "pytorch_model-00010.bin",
+    "model.layers.17.attn.q_proj.weight": "pytorch_model-00010.bin",
+    "model.layers.17.attn.v_proj.weight": "pytorch_model-00010.bin",
+    "model.layers.17.pre_attn_norm.scale": "pytorch_model-00010.bin",
+    "model.layers.17.post_attn_norm.scale": "pytorch_model-00010.bin",
+    "model.layers.17.pre_moe_norm.scale": "pytorch_model-00010.bin",
+    "model.layers.17.post_moe_norm.scale": "pytorch_model-00010.bin",
+    "model.layers.17.moe_block.gate.weight": "pytorch_model-00010.bin",
+    "model.layers.18.moe_block.experts.0.linear.weight": "pytorch_model-00010.bin",
+    "model.layers.18.moe_block.experts.1.linear.weight": "pytorch_model-00010.bin",
+    "model.layers.18.moe_block.experts.2.linear.weight": "pytorch_model-00010.bin",
+    "model.layers.18.moe_block.experts.3.linear.weight": "pytorch_model-00010.bin",
+    "model.layers.18.moe_block.experts.4.linear.weight": "pytorch_model-00010.bin",
+    "model.layers.18.moe_block.experts.5.linear.weight": "pytorch_model-00010.bin",
+    "model.layers.18.moe_block.experts.6.linear.weight": "pytorch_model-00010.bin",
+    "model.layers.18.moe_block.experts.7.linear.weight": "pytorch_model-00010.bin",
+    "model.layers.18.moe_block.experts.0.linear_1.weight": "pytorch_model-00010.bin",
+    "model.layers.18.moe_block.experts.1.linear_1.weight": "pytorch_model-00010.bin",
+    "model.layers.18.moe_block.experts.2.linear_1.weight": "pytorch_model-00010.bin",
+    "model.layers.18.moe_block.experts.3.linear_1.weight": "pytorch_model-00010.bin",
+    "model.layers.18.moe_block.experts.4.linear_1.weight": "pytorch_model-00010.bin",
+    "model.layers.18.moe_block.experts.5.linear_1.weight": "pytorch_model-00010.bin",
+    "model.layers.18.moe_block.experts.6.linear_1.weight": "pytorch_model-00010.bin",
+    "model.layers.18.moe_block.experts.7.linear_1.weight": "pytorch_model-00010.bin",
+    "model.layers.18.moe_block.experts.0.linear_v.weight": "pytorch_model-00011.bin",
+    "model.layers.18.moe_block.experts.1.linear_v.weight": "pytorch_model-00011.bin",
+    "model.layers.18.moe_block.experts.2.linear_v.weight": "pytorch_model-00011.bin",
+    "model.layers.18.moe_block.experts.3.linear_v.weight": "pytorch_model-00011.bin",
+    "model.layers.18.moe_block.experts.4.linear_v.weight": "pytorch_model-00011.bin",
+    "model.layers.18.moe_block.experts.5.linear_v.weight": "pytorch_model-00011.bin",
+    "model.layers.18.moe_block.experts.6.linear_v.weight": "pytorch_model-00011.bin",
+    "model.layers.18.moe_block.experts.7.linear_v.weight": "pytorch_model-00011.bin",
+    "model.layers.18.attn.k_proj.weight": "pytorch_model-00011.bin",
+    "model.layers.18.attn.o_proj.weight": "pytorch_model-00011.bin",
+    "model.layers.18.attn.q_proj.weight": "pytorch_model-00011.bin",
+    "model.layers.18.attn.v_proj.weight": "pytorch_model-00011.bin",
+    "model.layers.18.pre_attn_norm.scale": "pytorch_model-00011.bin",
+    "model.layers.18.post_attn_norm.scale": "pytorch_model-00011.bin",
+    "model.layers.18.pre_moe_norm.scale": "pytorch_model-00011.bin",
+    "model.layers.18.post_moe_norm.scale": "pytorch_model-00011.bin",
+    "model.layers.18.moe_block.gate.weight": "pytorch_model-00011.bin",
+    "model.layers.19.moe_block.experts.0.linear.weight": "pytorch_model-00011.bin",
+    "model.layers.19.moe_block.experts.1.linear.weight": "pytorch_model-00011.bin",
+    "model.layers.19.moe_block.experts.2.linear.weight": "pytorch_model-00011.bin",
+    "model.layers.19.moe_block.experts.3.linear.weight": "pytorch_model-00011.bin",
+    "model.layers.19.moe_block.experts.4.linear.weight": "pytorch_model-00011.bin",
+    "model.layers.19.moe_block.experts.5.linear.weight": "pytorch_model-00011.bin",
+    "model.layers.19.moe_block.experts.6.linear.weight": "pytorch_model-00011.bin",
+    "model.layers.19.moe_block.experts.7.linear.weight": "pytorch_model-00011.bin",
+    "model.layers.19.moe_block.experts.0.linear_1.weight": "pytorch_model-00011.bin",
+    "model.layers.19.moe_block.experts.1.linear_1.weight": "pytorch_model-00011.bin",
+    "model.layers.19.moe_block.experts.2.linear_1.weight": "pytorch_model-00011.bin",
+    "model.layers.19.moe_block.experts.3.linear_1.weight": "pytorch_model-00011.bin",
+    "model.layers.19.moe_block.experts.4.linear_1.weight": "pytorch_model-00011.bin",
+    "model.layers.19.moe_block.experts.5.linear_1.weight": "pytorch_model-00011.bin",
+    "model.layers.19.moe_block.experts.6.linear_1.weight": "pytorch_model-00011.bin",
+    "model.layers.19.moe_block.experts.7.linear_1.weight": "pytorch_model-00011.bin",
+    "model.layers.19.moe_block.experts.0.linear_v.weight": "pytorch_model-00012.bin",
+    "model.layers.19.moe_block.experts.1.linear_v.weight": "pytorch_model-00012.bin",
+    "model.layers.19.moe_block.experts.2.linear_v.weight": "pytorch_model-00012.bin",
+    "model.layers.19.moe_block.experts.3.linear_v.weight": "pytorch_model-00012.bin",
+    "model.layers.19.moe_block.experts.4.linear_v.weight": "pytorch_model-00012.bin",
+    "model.layers.19.moe_block.experts.5.linear_v.weight": "pytorch_model-00012.bin",
+    "model.layers.19.moe_block.experts.6.linear_v.weight": "pytorch_model-00012.bin",
+    "model.layers.19.moe_block.experts.7.linear_v.weight": "pytorch_model-00012.bin",
+    "model.layers.19.attn.k_proj.weight": "pytorch_model-00012.bin",
+    "model.layers.19.attn.o_proj.weight": "pytorch_model-00012.bin",
+    "model.layers.19.attn.q_proj.weight": "pytorch_model-00012.bin",
+    "model.layers.19.attn.v_proj.weight": "pytorch_model-00012.bin",
+    "model.layers.19.pre_attn_norm.scale": "pytorch_model-00012.bin",
+    "model.layers.19.post_attn_norm.scale": "pytorch_model-00012.bin",
+    "model.layers.19.pre_moe_norm.scale": "pytorch_model-00012.bin",
+    "model.layers.19.post_moe_norm.scale": "pytorch_model-00012.bin",
+    "model.layers.19.moe_block.gate.weight": "pytorch_model-00012.bin",
+    "model.layers.2.moe_block.experts.0.linear.weight": "pytorch_model-00012.bin",
+    "model.layers.2.moe_block.experts.1.linear.weight": "pytorch_model-00012.bin",
+    "model.layers.2.moe_block.experts.2.linear.weight": "pytorch_model-00012.bin",
+    "model.layers.2.moe_block.experts.3.linear.weight": "pytorch_model-00012.bin",
+    "model.layers.2.moe_block.experts.4.linear.weight": "pytorch_model-00012.bin",
+    "model.layers.2.moe_block.experts.5.linear.weight": "pytorch_model-00012.bin",
+    "model.layers.2.moe_block.experts.6.linear.weight": "pytorch_model-00012.bin",
+    "model.layers.2.moe_block.experts.7.linear.weight": "pytorch_model-00012.bin",
+    "model.layers.2.moe_block.experts.0.linear_1.weight": "pytorch_model-00012.bin",
+    "model.layers.2.moe_block.experts.1.linear_1.weight": "pytorch_model-00012.bin",
+    "model.layers.2.moe_block.experts.2.linear_1.weight": "pytorch_model-00012.bin",
+    "model.layers.2.moe_block.experts.3.linear_1.weight": "pytorch_model-00012.bin",
+    "model.layers.2.moe_block.experts.4.linear_1.weight": "pytorch_model-00012.bin",
+    "model.layers.2.moe_block.experts.5.linear_1.weight": "pytorch_model-00012.bin",
+    "model.layers.2.moe_block.experts.6.linear_1.weight": "pytorch_model-00012.bin",
+    "model.layers.2.moe_block.experts.7.linear_1.weight": "pytorch_model-00012.bin",
+    "model.layers.2.moe_block.experts.0.linear_v.weight": "pytorch_model-00013.bin",
+    "model.layers.2.moe_block.experts.1.linear_v.weight": "pytorch_model-00013.bin",
+    "model.layers.2.moe_block.experts.2.linear_v.weight": "pytorch_model-00013.bin",
+    "model.layers.2.moe_block.experts.3.linear_v.weight": "pytorch_model-00013.bin",
+    "model.layers.2.moe_block.experts.4.linear_v.weight": "pytorch_model-00013.bin",
+    "model.layers.2.moe_block.experts.5.linear_v.weight": "pytorch_model-00013.bin",
+    "model.layers.2.moe_block.experts.6.linear_v.weight": "pytorch_model-00013.bin",
+    "model.layers.2.moe_block.experts.7.linear_v.weight": "pytorch_model-00013.bin",
+    "model.layers.2.attn.k_proj.weight": "pytorch_model-00013.bin",
+    "model.layers.2.attn.o_proj.weight": "pytorch_model-00013.bin",
+    "model.layers.2.attn.q_proj.weight": "pytorch_model-00013.bin",
+    "model.layers.2.attn.v_proj.weight": "pytorch_model-00013.bin",
+    "model.layers.2.pre_attn_norm.scale": "pytorch_model-00013.bin",
+    "model.layers.2.post_attn_norm.scale": "pytorch_model-00013.bin",
+    "model.layers.2.pre_moe_norm.scale": "pytorch_model-00013.bin",
+    "model.layers.2.post_moe_norm.scale": "pytorch_model-00013.bin",
+    "model.layers.2.moe_block.gate.weight": "pytorch_model-00013.bin",
+    "model.layers.20.moe_block.experts.0.linear.weight": "pytorch_model-00013.bin",
+    "model.layers.20.moe_block.experts.1.linear.weight": "pytorch_model-00013.bin",
+    "model.layers.20.moe_block.experts.2.linear.weight": "pytorch_model-00013.bin",
+    "model.layers.20.moe_block.experts.3.linear.weight": "pytorch_model-00013.bin",
+    "model.layers.20.moe_block.experts.4.linear.weight": "pytorch_model-00013.bin",
+    "model.layers.20.moe_block.experts.5.linear.weight": "pytorch_model-00013.bin",
+    "model.layers.20.moe_block.experts.6.linear.weight": "pytorch_model-00013.bin",
+    "model.layers.20.moe_block.experts.7.linear.weight": "pytorch_model-00013.bin",
+    "model.layers.20.moe_block.experts.0.linear_1.weight": "pytorch_model-00013.bin",
+    "model.layers.20.moe_block.experts.1.linear_1.weight": "pytorch_model-00013.bin",
+    "model.layers.20.moe_block.experts.2.linear_1.weight": "pytorch_model-00013.bin",
+    "model.layers.20.moe_block.experts.3.linear_1.weight": "pytorch_model-00013.bin",
+    "model.layers.20.moe_block.experts.4.linear_1.weight": "pytorch_model-00013.bin",
+    "model.layers.20.moe_block.experts.5.linear_1.weight": "pytorch_model-00013.bin",
+    "model.layers.20.moe_block.experts.6.linear_1.weight": "pytorch_model-00013.bin",
+    "model.layers.20.moe_block.experts.7.linear_1.weight": "pytorch_model-00013.bin",
+    "model.layers.20.moe_block.experts.0.linear_v.weight": "pytorch_model-00014.bin",
+    "model.layers.20.moe_block.experts.1.linear_v.weight": "pytorch_model-00014.bin",
+    "model.layers.20.moe_block.experts.2.linear_v.weight": "pytorch_model-00014.bin",
+    "model.layers.20.moe_block.experts.3.linear_v.weight": "pytorch_model-00014.bin",
+    "model.layers.20.moe_block.experts.4.linear_v.weight": "pytorch_model-00014.bin",
+    "model.layers.20.moe_block.experts.5.linear_v.weight": "pytorch_model-00014.bin",
+    "model.layers.20.moe_block.experts.6.linear_v.weight": "pytorch_model-00014.bin",
+    "model.layers.20.moe_block.experts.7.linear_v.weight": "pytorch_model-00014.bin",
+    "model.layers.20.attn.k_proj.weight": "pytorch_model-00014.bin",
+    "model.layers.20.attn.o_proj.weight": "pytorch_model-00014.bin",
+    "model.layers.20.attn.q_proj.weight": "pytorch_model-00014.bin",
+    "model.layers.20.attn.v_proj.weight": "pytorch_model-00014.bin",
+    "model.layers.20.pre_attn_norm.scale": "pytorch_model-00014.bin",
+    "model.layers.20.post_attn_norm.scale": "pytorch_model-00014.bin",
+    "model.layers.20.pre_moe_norm.scale": "pytorch_model-00014.bin",
+    "model.layers.20.post_moe_norm.scale": "pytorch_model-00014.bin",
+    "model.layers.20.moe_block.gate.weight": "pytorch_model-00014.bin",
+    "model.layers.21.moe_block.experts.0.linear.weight": "pytorch_model-00014.bin",
+    "model.layers.21.moe_block.experts.1.linear.weight": "pytorch_model-00014.bin",
+    "model.layers.21.moe_block.experts.2.linear.weight": "pytorch_model-00014.bin",
+    "model.layers.21.moe_block.experts.3.linear.weight": "pytorch_model-00014.bin",
+    "model.layers.21.moe_block.experts.4.linear.weight": "pytorch_model-00014.bin",
+    "model.layers.21.moe_block.experts.5.linear.weight": "pytorch_model-00014.bin",
+    "model.layers.21.moe_block.experts.6.linear.weight": "pytorch_model-00014.bin",
+    "model.layers.21.moe_block.experts.7.linear.weight": "pytorch_model-00014.bin",
+    "model.layers.21.moe_block.experts.0.linear_1.weight": "pytorch_model-00014.bin",
+    "model.layers.21.moe_block.experts.1.linear_1.weight": "pytorch_model-00014.bin",
+    "model.layers.21.moe_block.experts.2.linear_1.weight": "pytorch_model-00014.bin",
+    "model.layers.21.moe_block.experts.3.linear_1.weight": "pytorch_model-00014.bin",
+    "model.layers.21.moe_block.experts.4.linear_1.weight": "pytorch_model-00014.bin",
+    "model.layers.21.moe_block.experts.5.linear_1.weight": "pytorch_model-00014.bin",
+    "model.layers.21.moe_block.experts.6.linear_1.weight": "pytorch_model-00014.bin",
+    "model.layers.21.moe_block.experts.7.linear_1.weight": "pytorch_model-00014.bin",
+    "model.layers.21.moe_block.experts.0.linear_v.weight": "pytorch_model-00015.bin",
+    "model.layers.21.moe_block.experts.1.linear_v.weight": "pytorch_model-00015.bin",
+    "model.layers.21.moe_block.experts.2.linear_v.weight": "pytorch_model-00015.bin",
+    "model.layers.21.moe_block.experts.3.linear_v.weight": "pytorch_model-00015.bin",
+    "model.layers.21.moe_block.experts.4.linear_v.weight": "pytorch_model-00015.bin",
+    "model.layers.21.moe_block.experts.5.linear_v.weight": "pytorch_model-00015.bin",
+    "model.layers.21.moe_block.experts.6.linear_v.weight": "pytorch_model-00015.bin",
+    "model.layers.21.moe_block.experts.7.linear_v.weight": "pytorch_model-00015.bin",
+    "model.layers.21.attn.k_proj.weight": "pytorch_model-00015.bin",
+    "model.layers.21.attn.o_proj.weight": "pytorch_model-00015.bin",
+    "model.layers.21.attn.q_proj.weight": "pytorch_model-00015.bin",
+    "model.layers.21.attn.v_proj.weight": "pytorch_model-00015.bin",
+    "model.layers.21.pre_attn_norm.scale": "pytorch_model-00015.bin",
+    "model.layers.21.post_attn_norm.scale": "pytorch_model-00015.bin",
+    "model.layers.21.pre_moe_norm.scale": "pytorch_model-00015.bin",
+    "model.layers.21.post_moe_norm.scale": "pytorch_model-00015.bin",
+    "model.layers.21.moe_block.gate.weight": "pytorch_model-00015.bin",
+    "model.layers.22.moe_block.experts.0.linear.weight": "pytorch_model-00015.bin",
+    "model.layers.22.moe_block.experts.1.linear.weight": "pytorch_model-00015.bin",
+    "model.layers.22.moe_block.experts.2.linear.weight": "pytorch_model-00015.bin",
+    "model.layers.22.moe_block.experts.3.linear.weight": "pytorch_model-00015.bin",
+    "model.layers.22.moe_block.experts.4.linear.weight": "pytorch_model-00015.bin",
+    "model.layers.22.moe_block.experts.5.linear.weight": "pytorch_model-00015.bin",
+    "model.layers.22.moe_block.experts.6.linear.weight": "pytorch_model-00015.bin",
+    "model.layers.22.moe_block.experts.7.linear.weight": "pytorch_model-00015.bin",
+    "model.layers.22.moe_block.experts.0.linear_1.weight": "pytorch_model-00015.bin",
+    "model.layers.22.moe_block.experts.1.linear_1.weight": "pytorch_model-00015.bin",
+    "model.layers.22.moe_block.experts.2.linear_1.weight": "pytorch_model-00015.bin",
+    "model.layers.22.moe_block.experts.3.linear_1.weight": "pytorch_model-00015.bin",
+    "model.layers.22.moe_block.experts.4.linear_1.weight": "pytorch_model-00015.bin",
+    "model.layers.22.moe_block.experts.5.linear_1.weight": "pytorch_model-00015.bin",
+    "model.layers.22.moe_block.experts.6.linear_1.weight": "pytorch_model-00015.bin",
+    "model.layers.22.moe_block.experts.7.linear_1.weight": "pytorch_model-00015.bin",
+    "model.layers.22.moe_block.experts.0.linear_v.weight": "pytorch_model-00016.bin",
+    "model.layers.22.moe_block.experts.1.linear_v.weight": "pytorch_model-00016.bin",
+    "model.layers.22.moe_block.experts.2.linear_v.weight": "pytorch_model-00016.bin",
+    "model.layers.22.moe_block.experts.3.linear_v.weight": "pytorch_model-00016.bin",
+    "model.layers.22.moe_block.experts.4.linear_v.weight": "pytorch_model-00016.bin",
+    "model.layers.22.moe_block.experts.5.linear_v.weight": "pytorch_model-00016.bin",
+    "model.layers.22.moe_block.experts.6.linear_v.weight": "pytorch_model-00016.bin",
+    "model.layers.22.moe_block.experts.7.linear_v.weight": "pytorch_model-00016.bin",
+    "model.layers.22.attn.k_proj.weight": "pytorch_model-00016.bin",
+    "model.layers.22.attn.o_proj.weight": "pytorch_model-00016.bin",
+    "model.layers.22.attn.q_proj.weight": "pytorch_model-00016.bin",
+    "model.layers.22.attn.v_proj.weight": "pytorch_model-00016.bin",
+    "model.layers.22.pre_attn_norm.scale": "pytorch_model-00016.bin",
+    "model.layers.22.post_attn_norm.scale": "pytorch_model-00016.bin",
+    "model.layers.22.pre_moe_norm.scale": "pytorch_model-00016.bin",
+    "model.layers.22.post_moe_norm.scale": "pytorch_model-00016.bin",
+    "model.layers.22.moe_block.gate.weight": "pytorch_model-00016.bin",
+    "model.layers.23.moe_block.experts.0.linear.weight": "pytorch_model-00016.bin",
+    "model.layers.23.moe_block.experts.1.linear.weight": "pytorch_model-00016.bin",
+    "model.layers.23.moe_block.experts.2.linear.weight": "pytorch_model-00016.bin",
+    "model.layers.23.moe_block.experts.3.linear.weight": "pytorch_model-00016.bin",
+    "model.layers.23.moe_block.experts.4.linear.weight": "pytorch_model-00016.bin",
+    "model.layers.23.moe_block.experts.5.linear.weight": "pytorch_model-00016.bin",
+    "model.layers.23.moe_block.experts.6.linear.weight": "pytorch_model-00016.bin",
+    "model.layers.23.moe_block.experts.7.linear.weight": "pytorch_model-00016.bin",
+    "model.layers.23.moe_block.experts.0.linear_1.weight": "pytorch_model-00016.bin",
+    "model.layers.23.moe_block.experts.1.linear_1.weight": "pytorch_model-00016.bin",
+    "model.layers.23.moe_block.experts.2.linear_1.weight": "pytorch_model-00016.bin",
+    "model.layers.23.moe_block.experts.3.linear_1.weight": "pytorch_model-00016.bin",
+    "model.layers.23.moe_block.experts.4.linear_1.weight": "pytorch_model-00016.bin",
+    "model.layers.23.moe_block.experts.5.linear_1.weight": "pytorch_model-00016.bin",
+    "model.layers.23.moe_block.experts.6.linear_1.weight": "pytorch_model-00016.bin",
+    "model.layers.23.moe_block.experts.7.linear_1.weight": "pytorch_model-00016.bin",
+    "model.layers.23.moe_block.experts.0.linear_v.weight": "pytorch_model-00017.bin",
+    "model.layers.23.moe_block.experts.1.linear_v.weight": "pytorch_model-00017.bin",
+    "model.layers.23.moe_block.experts.2.linear_v.weight": "pytorch_model-00017.bin",
+    "model.layers.23.moe_block.experts.3.linear_v.weight": "pytorch_model-00017.bin",
+    "model.layers.23.moe_block.experts.4.linear_v.weight": "pytorch_model-00017.bin",
+    "model.layers.23.moe_block.experts.5.linear_v.weight": "pytorch_model-00017.bin",
+    "model.layers.23.moe_block.experts.6.linear_v.weight": "pytorch_model-00017.bin",
+    "model.layers.23.moe_block.experts.7.linear_v.weight": "pytorch_model-00017.bin",
+    "model.layers.23.attn.k_proj.weight": "pytorch_model-00017.bin",
+    "model.layers.23.attn.o_proj.weight": "pytorch_model-00017.bin",
+    "model.layers.23.attn.q_proj.weight": "pytorch_model-00017.bin",
+    "model.layers.23.attn.v_proj.weight": "pytorch_model-00017.bin",
+    "model.layers.23.pre_attn_norm.scale": "pytorch_model-00017.bin",
+    "model.layers.23.post_attn_norm.scale": "pytorch_model-00017.bin",
+    "model.layers.23.pre_moe_norm.scale": "pytorch_model-00017.bin",
+    "model.layers.23.post_moe_norm.scale": "pytorch_model-00017.bin",
+    "model.layers.23.moe_block.gate.weight": "pytorch_model-00017.bin",
+    "model.layers.24.moe_block.experts.0.linear.weight": "pytorch_model-00017.bin",
+    "model.layers.24.moe_block.experts.1.linear.weight": "pytorch_model-00017.bin",
+    "model.layers.24.moe_block.experts.2.linear.weight": "pytorch_model-00017.bin",
+    "model.layers.24.moe_block.experts.3.linear.weight": "pytorch_model-00017.bin",
+    "model.layers.24.moe_block.experts.4.linear.weight": "pytorch_model-00017.bin",
+    "model.layers.24.moe_block.experts.5.linear.weight": "pytorch_model-00017.bin",
+    "model.layers.24.moe_block.experts.6.linear.weight": "pytorch_model-00017.bin",
+    "model.layers.24.moe_block.experts.7.linear.weight": "pytorch_model-00017.bin",
+    "model.layers.24.moe_block.experts.0.linear_1.weight": "pytorch_model-00017.bin",
+    "model.layers.24.moe_block.experts.1.linear_1.weight": "pytorch_model-00017.bin",
+    "model.layers.24.moe_block.experts.2.linear_1.weight": "pytorch_model-00017.bin",
+    "model.layers.24.moe_block.experts.3.linear_1.weight": "pytorch_model-00017.bin",
+    "model.layers.24.moe_block.experts.4.linear_1.weight": "pytorch_model-00017.bin",
+    "model.layers.24.moe_block.experts.5.linear_1.weight": "pytorch_model-00017.bin",
+    "model.layers.24.moe_block.experts.6.linear_1.weight": "pytorch_model-00017.bin",
+    "model.layers.24.moe_block.experts.7.linear_1.weight": "pytorch_model-00017.bin",
+    "model.layers.24.moe_block.experts.0.linear_v.weight": "pytorch_model-00018.bin",
+    "model.layers.24.moe_block.experts.1.linear_v.weight": "pytorch_model-00018.bin",
+    "model.layers.24.moe_block.experts.2.linear_v.weight": "pytorch_model-00018.bin",
+    "model.layers.24.moe_block.experts.3.linear_v.weight": "pytorch_model-00018.bin",
+    "model.layers.24.moe_block.experts.4.linear_v.weight": "pytorch_model-00018.bin",
+    "model.layers.24.moe_block.experts.5.linear_v.weight": "pytorch_model-00018.bin",
+    "model.layers.24.moe_block.experts.6.linear_v.weight": "pytorch_model-00018.bin",
+    "model.layers.24.moe_block.experts.7.linear_v.weight": "pytorch_model-00018.bin",
+    "model.layers.24.attn.k_proj.weight": "pytorch_model-00018.bin",
+    "model.layers.24.attn.o_proj.weight": "pytorch_model-00018.bin",
+    "model.layers.24.attn.q_proj.weight": "pytorch_model-00018.bin",
+    "model.layers.24.attn.v_proj.weight": "pytorch_model-00018.bin",
+    "model.layers.24.pre_attn_norm.scale": "pytorch_model-00018.bin",
+    "model.layers.24.post_attn_norm.scale": "pytorch_model-00018.bin",
+    "model.layers.24.pre_moe_norm.scale": "pytorch_model-00018.bin",
+    "model.layers.24.post_moe_norm.scale": "pytorch_model-00018.bin",
+    "model.layers.24.moe_block.gate.weight": "pytorch_model-00018.bin",
+    "model.layers.25.moe_block.experts.0.linear.weight": "pytorch_model-00018.bin",
+    "model.layers.25.moe_block.experts.1.linear.weight": "pytorch_model-00018.bin",
+    "model.layers.25.moe_block.experts.2.linear.weight": "pytorch_model-00018.bin",
+    "model.layers.25.moe_block.experts.3.linear.weight": "pytorch_model-00018.bin",
+    "model.layers.25.moe_block.experts.4.linear.weight": "pytorch_model-00018.bin",
+    "model.layers.25.moe_block.experts.5.linear.weight": "pytorch_model-00018.bin",
+    "model.layers.25.moe_block.experts.6.linear.weight": "pytorch_model-00018.bin",
+    "model.layers.25.moe_block.experts.7.linear.weight": "pytorch_model-00018.bin",
+    "model.layers.25.moe_block.experts.0.linear_1.weight": "pytorch_model-00018.bin",
+    "model.layers.25.moe_block.experts.1.linear_1.weight": "pytorch_model-00018.bin",
+    "model.layers.25.moe_block.experts.2.linear_1.weight": "pytorch_model-00018.bin",
+    "model.layers.25.moe_block.experts.3.linear_1.weight": "pytorch_model-00018.bin",
+    "model.layers.25.moe_block.experts.4.linear_1.weight": "pytorch_model-00018.bin",
+    "model.layers.25.moe_block.experts.5.linear_1.weight": "pytorch_model-00018.bin",
+    "model.layers.25.moe_block.experts.6.linear_1.weight": "pytorch_model-00018.bin",
+    "model.layers.25.moe_block.experts.7.linear_1.weight": "pytorch_model-00018.bin",
+    "model.layers.25.moe_block.experts.0.linear_v.weight": "pytorch_model-00019.bin",
+    "model.layers.25.moe_block.experts.1.linear_v.weight": "pytorch_model-00019.bin",
+    "model.layers.25.moe_block.experts.2.linear_v.weight": "pytorch_model-00019.bin",
+    "model.layers.25.moe_block.experts.3.linear_v.weight": "pytorch_model-00019.bin",
+    "model.layers.25.moe_block.experts.4.linear_v.weight": "pytorch_model-00019.bin",
+    "model.layers.25.moe_block.experts.5.linear_v.weight": "pytorch_model-00019.bin",
+    "model.layers.25.moe_block.experts.6.linear_v.weight": "pytorch_model-00019.bin",
+    "model.layers.25.moe_block.experts.7.linear_v.weight": "pytorch_model-00019.bin",
+    "model.layers.25.attn.k_proj.weight": "pytorch_model-00019.bin",
+    "model.layers.25.attn.o_proj.weight": "pytorch_model-00019.bin",
+    "model.layers.25.attn.q_proj.weight": "pytorch_model-00019.bin",
+    "model.layers.25.attn.v_proj.weight": "pytorch_model-00019.bin",
+    "model.layers.25.pre_attn_norm.scale": "pytorch_model-00019.bin",
+    "model.layers.25.post_attn_norm.scale": "pytorch_model-00019.bin",
+    "model.layers.25.pre_moe_norm.scale": "pytorch_model-00019.bin",
+    "model.layers.25.post_moe_norm.scale": "pytorch_model-00019.bin",
+    "model.layers.25.moe_block.gate.weight": "pytorch_model-00019.bin",
+    "model.layers.26.moe_block.experts.0.linear.weight": "pytorch_model-00019.bin",
+    "model.layers.26.moe_block.experts.1.linear.weight": "pytorch_model-00019.bin",
+    "model.layers.26.moe_block.experts.2.linear.weight": "pytorch_model-00019.bin",
+    "model.layers.26.moe_block.experts.3.linear.weight": "pytorch_model-00019.bin",
+    "model.layers.26.moe_block.experts.4.linear.weight": "pytorch_model-00019.bin",
+    "model.layers.26.moe_block.experts.5.linear.weight": "pytorch_model-00019.bin",
+    "model.layers.26.moe_block.experts.6.linear.weight": "pytorch_model-00019.bin",
+    "model.layers.26.moe_block.experts.7.linear.weight": "pytorch_model-00019.bin",
+    "model.layers.26.moe_block.experts.0.linear_1.weight": "pytorch_model-00019.bin",
+    "model.layers.26.moe_block.experts.1.linear_1.weight": "pytorch_model-00019.bin",
+    "model.layers.26.moe_block.experts.2.linear_1.weight": "pytorch_model-00019.bin",
+    "model.layers.26.moe_block.experts.3.linear_1.weight": "pytorch_model-00019.bin",
+    "model.layers.26.moe_block.experts.4.linear_1.weight": "pytorch_model-00019.bin",
+    "model.layers.26.moe_block.experts.5.linear_1.weight": "pytorch_model-00019.bin",
+    "model.layers.26.moe_block.experts.6.linear_1.weight": "pytorch_model-00019.bin",
+    "model.layers.26.moe_block.experts.7.linear_1.weight": "pytorch_model-00019.bin",
+    "model.layers.26.moe_block.experts.0.linear_v.weight": "pytorch_model-00020.bin",
+    "model.layers.26.moe_block.experts.1.linear_v.weight": "pytorch_model-00020.bin",
+    "model.layers.26.moe_block.experts.2.linear_v.weight": "pytorch_model-00020.bin",
+    "model.layers.26.moe_block.experts.3.linear_v.weight": "pytorch_model-00020.bin",
+    "model.layers.26.moe_block.experts.4.linear_v.weight": "pytorch_model-00020.bin",
+    "model.layers.26.moe_block.experts.5.linear_v.weight": "pytorch_model-00020.bin",
+    "model.layers.26.moe_block.experts.6.linear_v.weight": "pytorch_model-00020.bin",
+    "model.layers.26.moe_block.experts.7.linear_v.weight": "pytorch_model-00020.bin",
+    "model.layers.26.attn.k_proj.weight": "pytorch_model-00020.bin",
+    "model.layers.26.attn.o_proj.weight": "pytorch_model-00020.bin",
+    "model.layers.26.attn.q_proj.weight": "pytorch_model-00020.bin",
+    "model.layers.26.attn.v_proj.weight": "pytorch_model-00020.bin",
+    "model.layers.26.pre_attn_norm.scale": "pytorch_model-00020.bin",
+    "model.layers.26.post_attn_norm.scale": "pytorch_model-00020.bin",
+    "model.layers.26.pre_moe_norm.scale": "pytorch_model-00020.bin",
+    "model.layers.26.post_moe_norm.scale": "pytorch_model-00020.bin",
+    "model.layers.26.moe_block.gate.weight": "pytorch_model-00020.bin",
+    "model.layers.27.moe_block.experts.0.linear.weight": "pytorch_model-00020.bin",
+    "model.layers.27.moe_block.experts.1.linear.weight": "pytorch_model-00020.bin",
+    "model.layers.27.moe_block.experts.2.linear.weight": "pytorch_model-00020.bin",
+    "model.layers.27.moe_block.experts.3.linear.weight": "pytorch_model-00020.bin",
+    "model.layers.27.moe_block.experts.4.linear.weight": "pytorch_model-00020.bin",
+    "model.layers.27.moe_block.experts.5.linear.weight": "pytorch_model-00020.bin",
+    "model.layers.27.moe_block.experts.6.linear.weight": "pytorch_model-00020.bin",
+    "model.layers.27.moe_block.experts.7.linear.weight": "pytorch_model-00020.bin",
+    "model.layers.27.moe_block.experts.0.linear_1.weight": "pytorch_model-00020.bin",
+    "model.layers.27.moe_block.experts.1.linear_1.weight": "pytorch_model-00020.bin",
+    "model.layers.27.moe_block.experts.2.linear_1.weight": "pytorch_model-00020.bin",
+    "model.layers.27.moe_block.experts.3.linear_1.weight": "pytorch_model-00020.bin",
+    "model.layers.27.moe_block.experts.4.linear_1.weight": "pytorch_model-00020.bin",
+    "model.layers.27.moe_block.experts.5.linear_1.weight": "pytorch_model-00020.bin",
+    "model.layers.27.moe_block.experts.6.linear_1.weight": "pytorch_model-00020.bin",
+    "model.layers.27.moe_block.experts.7.linear_1.weight": "pytorch_model-00020.bin",
+    "model.layers.27.moe_block.experts.0.linear_v.weight": "pytorch_model-00021.bin",
+    "model.layers.27.moe_block.experts.1.linear_v.weight": "pytorch_model-00021.bin",
+    "model.layers.27.moe_block.experts.2.linear_v.weight": "pytorch_model-00021.bin",
+    "model.layers.27.moe_block.experts.3.linear_v.weight": "pytorch_model-00021.bin",
+    "model.layers.27.moe_block.experts.4.linear_v.weight": "pytorch_model-00021.bin",
+    "model.layers.27.moe_block.experts.5.linear_v.weight": "pytorch_model-00021.bin",
+    "model.layers.27.moe_block.experts.6.linear_v.weight": "pytorch_model-00021.bin",
+    "model.layers.27.moe_block.experts.7.linear_v.weight": "pytorch_model-00021.bin",
+    "model.layers.27.attn.k_proj.weight": "pytorch_model-00021.bin",
+    "model.layers.27.attn.o_proj.weight": "pytorch_model-00021.bin",
+    "model.layers.27.attn.q_proj.weight": "pytorch_model-00021.bin",
+    "model.layers.27.attn.v_proj.weight": "pytorch_model-00021.bin",
+    "model.layers.27.pre_attn_norm.scale": "pytorch_model-00021.bin",
+    "model.layers.27.post_attn_norm.scale": "pytorch_model-00021.bin",
+    "model.layers.27.pre_moe_norm.scale": "pytorch_model-00021.bin",
+    "model.layers.27.post_moe_norm.scale": "pytorch_model-00021.bin",
+    "model.layers.27.moe_block.gate.weight": "pytorch_model-00021.bin",
+    "model.layers.28.moe_block.experts.0.linear.weight": "pytorch_model-00021.bin",
+    "model.layers.28.moe_block.experts.1.linear.weight": "pytorch_model-00021.bin",
+    "model.layers.28.moe_block.experts.2.linear.weight": "pytorch_model-00021.bin",
+    "model.layers.28.moe_block.experts.3.linear.weight": "pytorch_model-00021.bin",
+    "model.layers.28.moe_block.experts.4.linear.weight": "pytorch_model-00021.bin",
+    "model.layers.28.moe_block.experts.5.linear.weight": "pytorch_model-00021.bin",
+    "model.layers.28.moe_block.experts.6.linear.weight": "pytorch_model-00021.bin",
+    "model.layers.28.moe_block.experts.7.linear.weight": "pytorch_model-00021.bin",
+    "model.layers.28.moe_block.experts.0.linear_1.weight": "pytorch_model-00021.bin",
+    "model.layers.28.moe_block.experts.1.linear_1.weight": "pytorch_model-00021.bin",
+    "model.layers.28.moe_block.experts.2.linear_1.weight": "pytorch_model-00021.bin",
+    "model.layers.28.moe_block.experts.3.linear_1.weight": "pytorch_model-00021.bin",
+    "model.layers.28.moe_block.experts.4.linear_1.weight": "pytorch_model-00021.bin",
+    "model.layers.28.moe_block.experts.5.linear_1.weight": "pytorch_model-00021.bin",
+    "model.layers.28.moe_block.experts.6.linear_1.weight": "pytorch_model-00021.bin",
+    "model.layers.28.moe_block.experts.7.linear_1.weight": "pytorch_model-00021.bin",
+    "model.layers.28.moe_block.experts.0.linear_v.weight": "pytorch_model-00022.bin",
+    "model.layers.28.moe_block.experts.1.linear_v.weight": "pytorch_model-00022.bin",
+    "model.layers.28.moe_block.experts.2.linear_v.weight": "pytorch_model-00022.bin",
+    "model.layers.28.moe_block.experts.3.linear_v.weight": "pytorch_model-00022.bin",
+    "model.layers.28.moe_block.experts.4.linear_v.weight": "pytorch_model-00022.bin",
+    "model.layers.28.moe_block.experts.5.linear_v.weight": "pytorch_model-00022.bin",
+    "model.layers.28.moe_block.experts.6.linear_v.weight": "pytorch_model-00022.bin",
+    "model.layers.28.moe_block.experts.7.linear_v.weight": "pytorch_model-00022.bin",
+    "model.layers.28.attn.k_proj.weight": "pytorch_model-00022.bin",
+    "model.layers.28.attn.o_proj.weight": "pytorch_model-00022.bin",
+    "model.layers.28.attn.q_proj.weight": "pytorch_model-00022.bin",
+    "model.layers.28.attn.v_proj.weight": "pytorch_model-00022.bin",
+    "model.layers.28.pre_attn_norm.scale": "pytorch_model-00022.bin",
+    "model.layers.28.post_attn_norm.scale": "pytorch_model-00022.bin",
+    "model.layers.28.pre_moe_norm.scale": "pytorch_model-00022.bin",
+    "model.layers.28.post_moe_norm.scale": "pytorch_model-00022.bin",
+    "model.layers.28.moe_block.gate.weight": "pytorch_model-00022.bin",
+    "model.layers.29.moe_block.experts.0.linear.weight": "pytorch_model-00022.bin",
+    "model.layers.29.moe_block.experts.1.linear.weight": "pytorch_model-00022.bin",
+    "model.layers.29.moe_block.experts.2.linear.weight": "pytorch_model-00022.bin",
+    "model.layers.29.moe_block.experts.3.linear.weight": "pytorch_model-00022.bin",
+    "model.layers.29.moe_block.experts.4.linear.weight": "pytorch_model-00022.bin",
+    "model.layers.29.moe_block.experts.5.linear.weight": "pytorch_model-00022.bin",
+    "model.layers.29.moe_block.experts.6.linear.weight": "pytorch_model-00022.bin",
+    "model.layers.29.moe_block.experts.7.linear.weight": "pytorch_model-00022.bin",
+    "model.layers.29.moe_block.experts.0.linear_1.weight": "pytorch_model-00022.bin",
+    "model.layers.29.moe_block.experts.1.linear_1.weight": "pytorch_model-00022.bin",
+    "model.layers.29.moe_block.experts.2.linear_1.weight": "pytorch_model-00022.bin",
+    "model.layers.29.moe_block.experts.3.linear_1.weight": "pytorch_model-00022.bin",
+    "model.layers.29.moe_block.experts.4.linear_1.weight": "pytorch_model-00022.bin",
+    "model.layers.29.moe_block.experts.5.linear_1.weight": "pytorch_model-00022.bin",
+    "model.layers.29.moe_block.experts.6.linear_1.weight": "pytorch_model-00022.bin",
+    "model.layers.29.moe_block.experts.7.linear_1.weight": "pytorch_model-00022.bin",
+    "model.layers.29.moe_block.experts.0.linear_v.weight": "pytorch_model-00023.bin",
+    "model.layers.29.moe_block.experts.1.linear_v.weight": "pytorch_model-00023.bin",
+    "model.layers.29.moe_block.experts.2.linear_v.weight": "pytorch_model-00023.bin",
+    "model.layers.29.moe_block.experts.3.linear_v.weight": "pytorch_model-00023.bin",
+    "model.layers.29.moe_block.experts.4.linear_v.weight": "pytorch_model-00023.bin",
+    "model.layers.29.moe_block.experts.5.linear_v.weight": "pytorch_model-00023.bin",
+    "model.layers.29.moe_block.experts.6.linear_v.weight": "pytorch_model-00023.bin",
+    "model.layers.29.moe_block.experts.7.linear_v.weight": "pytorch_model-00023.bin",
+    "model.layers.29.attn.k_proj.weight": "pytorch_model-00023.bin",
+    "model.layers.29.attn.o_proj.weight": "pytorch_model-00023.bin",
+    "model.layers.29.attn.q_proj.weight": "pytorch_model-00023.bin",
+    "model.layers.29.attn.v_proj.weight": "pytorch_model-00023.bin",
+    "model.layers.29.pre_attn_norm.scale": "pytorch_model-00023.bin",
+    "model.layers.29.post_attn_norm.scale": "pytorch_model-00023.bin",
+    "model.layers.29.pre_moe_norm.scale": "pytorch_model-00023.bin",
+    "model.layers.29.post_moe_norm.scale": "pytorch_model-00023.bin",
+    "model.layers.29.moe_block.gate.weight": "pytorch_model-00023.bin",
+    "model.layers.3.moe_block.experts.0.linear.weight": "pytorch_model-00023.bin",
+    "model.layers.3.moe_block.experts.1.linear.weight": "pytorch_model-00023.bin",
+    "model.layers.3.moe_block.experts.2.linear.weight": "pytorch_model-00023.bin",
+    "model.layers.3.moe_block.experts.3.linear.weight": "pytorch_model-00023.bin",
+    "model.layers.3.moe_block.experts.4.linear.weight": "pytorch_model-00023.bin",
+    "model.layers.3.moe_block.experts.5.linear.weight": "pytorch_model-00023.bin",
+    "model.layers.3.moe_block.experts.6.linear.weight": "pytorch_model-00023.bin",
+    "model.layers.3.moe_block.experts.7.linear.weight": "pytorch_model-00023.bin",
+    "model.layers.3.moe_block.experts.0.linear_1.weight": "pytorch_model-00023.bin",
+    "model.layers.3.moe_block.experts.1.linear_1.weight": "pytorch_model-00023.bin",
+    "model.layers.3.moe_block.experts.2.linear_1.weight": "pytorch_model-00023.bin",
+    "model.layers.3.moe_block.experts.3.linear_1.weight": "pytorch_model-00023.bin",
+    "model.layers.3.moe_block.experts.4.linear_1.weight": "pytorch_model-00023.bin",
+    "model.layers.3.moe_block.experts.5.linear_1.weight": "pytorch_model-00023.bin",
+    "model.layers.3.moe_block.experts.6.linear_1.weight": "pytorch_model-00023.bin",
+    "model.layers.3.moe_block.experts.7.linear_1.weight": "pytorch_model-00023.bin",
+    "model.layers.3.moe_block.experts.0.linear_v.weight": "pytorch_model-00024.bin",
+    "model.layers.3.moe_block.experts.1.linear_v.weight": "pytorch_model-00024.bin",
+    "model.layers.3.moe_block.experts.2.linear_v.weight": "pytorch_model-00024.bin",
+    "model.layers.3.moe_block.experts.3.linear_v.weight": "pytorch_model-00024.bin",
+    "model.layers.3.moe_block.experts.4.linear_v.weight": "pytorch_model-00024.bin",
+    "model.layers.3.moe_block.experts.5.linear_v.weight": "pytorch_model-00024.bin",
+    "model.layers.3.moe_block.experts.6.linear_v.weight": "pytorch_model-00024.bin",
+    "model.layers.3.moe_block.experts.7.linear_v.weight": "pytorch_model-00024.bin",
+    "model.layers.3.attn.k_proj.weight": "pytorch_model-00024.bin",
+    "model.layers.3.attn.o_proj.weight": "pytorch_model-00024.bin",
+    "model.layers.3.attn.q_proj.weight": "pytorch_model-00024.bin",
+    "model.layers.3.attn.v_proj.weight": "pytorch_model-00024.bin",
+    "model.layers.3.pre_attn_norm.scale": "pytorch_model-00024.bin",
+    "model.layers.3.post_attn_norm.scale": "pytorch_model-00024.bin",
+    "model.layers.3.pre_moe_norm.scale": "pytorch_model-00024.bin",
+    "model.layers.3.post_moe_norm.scale": "pytorch_model-00024.bin",
+    "model.layers.3.moe_block.gate.weight": "pytorch_model-00024.bin",
+    "model.layers.30.moe_block.experts.0.linear.weight": "pytorch_model-00024.bin",
+    "model.layers.30.moe_block.experts.1.linear.weight": "pytorch_model-00024.bin",
+    "model.layers.30.moe_block.experts.2.linear.weight": "pytorch_model-00024.bin",
+    "model.layers.30.moe_block.experts.3.linear.weight": "pytorch_model-00024.bin",
+    "model.layers.30.moe_block.experts.4.linear.weight": "pytorch_model-00024.bin",
+    "model.layers.30.moe_block.experts.5.linear.weight": "pytorch_model-00024.bin",
+    "model.layers.30.moe_block.experts.6.linear.weight": "pytorch_model-00024.bin",
+    "model.layers.30.moe_block.experts.7.linear.weight": "pytorch_model-00024.bin",
+    "model.layers.30.moe_block.experts.0.linear_1.weight": "pytorch_model-00024.bin",
+    "model.layers.30.moe_block.experts.1.linear_1.weight": "pytorch_model-00024.bin",
+    "model.layers.30.moe_block.experts.2.linear_1.weight": "pytorch_model-00024.bin",
+    "model.layers.30.moe_block.experts.3.linear_1.weight": "pytorch_model-00024.bin",
+    "model.layers.30.moe_block.experts.4.linear_1.weight": "pytorch_model-00024.bin",
+    "model.layers.30.moe_block.experts.5.linear_1.weight": "pytorch_model-00024.bin",
+    "model.layers.30.moe_block.experts.6.linear_1.weight": "pytorch_model-00024.bin",
+    "model.layers.30.moe_block.experts.7.linear_1.weight": "pytorch_model-00024.bin",
+    "model.layers.30.moe_block.experts.0.linear_v.weight": "pytorch_model-00025.bin",
+    "model.layers.30.moe_block.experts.1.linear_v.weight": "pytorch_model-00025.bin",
+    "model.layers.30.moe_block.experts.2.linear_v.weight": "pytorch_model-00025.bin",
+    "model.layers.30.moe_block.experts.3.linear_v.weight": "pytorch_model-00025.bin",
+    "model.layers.30.moe_block.experts.4.linear_v.weight": "pytorch_model-00025.bin",
+    "model.layers.30.moe_block.experts.5.linear_v.weight": "pytorch_model-00025.bin",
+    "model.layers.30.moe_block.experts.6.linear_v.weight": "pytorch_model-00025.bin",
+    "model.layers.30.moe_block.experts.7.linear_v.weight": "pytorch_model-00025.bin",
+    "model.layers.30.attn.k_proj.weight": "pytorch_model-00025.bin",
+    "model.layers.30.attn.o_proj.weight": "pytorch_model-00025.bin",
+    "model.layers.30.attn.q_proj.weight": "pytorch_model-00025.bin",
+    "model.layers.30.attn.v_proj.weight": "pytorch_model-00025.bin",
+    "model.layers.30.pre_attn_norm.scale": "pytorch_model-00025.bin",
+    "model.layers.30.post_attn_norm.scale": "pytorch_model-00025.bin",
+    "model.layers.30.pre_moe_norm.scale": "pytorch_model-00025.bin",
+    "model.layers.30.post_moe_norm.scale": "pytorch_model-00025.bin",
+    "model.layers.30.moe_block.gate.weight": "pytorch_model-00025.bin",
+    "model.layers.31.moe_block.experts.0.linear.weight": "pytorch_model-00025.bin",
+    "model.layers.31.moe_block.experts.1.linear.weight": "pytorch_model-00025.bin",
+    "model.layers.31.moe_block.experts.2.linear.weight": "pytorch_model-00025.bin",
+    "model.layers.31.moe_block.experts.3.linear.weight": "pytorch_model-00025.bin",
+    "model.layers.31.moe_block.experts.4.linear.weight": "pytorch_model-00025.bin",
+    "model.layers.31.moe_block.experts.5.linear.weight": "pytorch_model-00025.bin",
+    "model.layers.31.moe_block.experts.6.linear.weight": "pytorch_model-00025.bin",
+    "model.layers.31.moe_block.experts.7.linear.weight": "pytorch_model-00025.bin",
+    "model.layers.31.moe_block.experts.0.linear_1.weight": "pytorch_model-00025.bin",
+    "model.layers.31.moe_block.experts.1.linear_1.weight": "pytorch_model-00025.bin",
+    "model.layers.31.moe_block.experts.2.linear_1.weight": "pytorch_model-00025.bin",
+    "model.layers.31.moe_block.experts.3.linear_1.weight": "pytorch_model-00025.bin",
+    "model.layers.31.moe_block.experts.4.linear_1.weight": "pytorch_model-00025.bin",
+    "model.layers.31.moe_block.experts.5.linear_1.weight": "pytorch_model-00025.bin",
+    "model.layers.31.moe_block.experts.6.linear_1.weight": "pytorch_model-00025.bin",
+    "model.layers.31.moe_block.experts.7.linear_1.weight": "pytorch_model-00025.bin",
+    "model.layers.31.moe_block.experts.0.linear_v.weight": "pytorch_model-00026.bin",
+    "model.layers.31.moe_block.experts.1.linear_v.weight": "pytorch_model-00026.bin",
+    "model.layers.31.moe_block.experts.2.linear_v.weight": "pytorch_model-00026.bin",
+    "model.layers.31.moe_block.experts.3.linear_v.weight": "pytorch_model-00026.bin",
+    "model.layers.31.moe_block.experts.4.linear_v.weight": "pytorch_model-00026.bin",
+    "model.layers.31.moe_block.experts.5.linear_v.weight": "pytorch_model-00026.bin",
+    "model.layers.31.moe_block.experts.6.linear_v.weight": "pytorch_model-00026.bin",
+    "model.layers.31.moe_block.experts.7.linear_v.weight": "pytorch_model-00026.bin",
+    "model.layers.31.attn.k_proj.weight": "pytorch_model-00026.bin",
+    "model.layers.31.attn.o_proj.weight": "pytorch_model-00026.bin",
+    "model.layers.31.attn.q_proj.weight": "pytorch_model-00026.bin",
+    "model.layers.31.attn.v_proj.weight": "pytorch_model-00026.bin",
+    "model.layers.31.pre_attn_norm.scale": "pytorch_model-00026.bin",
+    "model.layers.31.post_attn_norm.scale": "pytorch_model-00026.bin",
+    "model.layers.31.pre_moe_norm.scale": "pytorch_model-00026.bin",
+    "model.layers.31.post_moe_norm.scale": "pytorch_model-00026.bin",
+    "model.layers.31.moe_block.gate.weight": "pytorch_model-00026.bin",
+    "model.layers.32.moe_block.experts.0.linear.weight": "pytorch_model-00026.bin",
+    "model.layers.32.moe_block.experts.1.linear.weight": "pytorch_model-00026.bin",
+    "model.layers.32.moe_block.experts.2.linear.weight": "pytorch_model-00026.bin",
+    "model.layers.32.moe_block.experts.3.linear.weight": "pytorch_model-00026.bin",
+    "model.layers.32.moe_block.experts.4.linear.weight": "pytorch_model-00026.bin",
+    "model.layers.32.moe_block.experts.5.linear.weight": "pytorch_model-00026.bin",
+    "model.layers.32.moe_block.experts.6.linear.weight": "pytorch_model-00026.bin",
+    "model.layers.32.moe_block.experts.7.linear.weight": "pytorch_model-00026.bin",
+    "model.layers.32.moe_block.experts.0.linear_1.weight": "pytorch_model-00026.bin",
+    "model.layers.32.moe_block.experts.1.linear_1.weight": "pytorch_model-00026.bin",
+    "model.layers.32.moe_block.experts.2.linear_1.weight": "pytorch_model-00026.bin",
+    "model.layers.32.moe_block.experts.3.linear_1.weight": "pytorch_model-00026.bin",
+    "model.layers.32.moe_block.experts.4.linear_1.weight": "pytorch_model-00026.bin",
+    "model.layers.32.moe_block.experts.5.linear_1.weight": "pytorch_model-00026.bin",
+    "model.layers.32.moe_block.experts.6.linear_1.weight": "pytorch_model-00026.bin",
+    "model.layers.32.moe_block.experts.7.linear_1.weight": "pytorch_model-00026.bin",
+    "model.layers.32.moe_block.experts.0.linear_v.weight": "pytorch_model-00027.bin",
+    "model.layers.32.moe_block.experts.1.linear_v.weight": "pytorch_model-00027.bin",
+    "model.layers.32.moe_block.experts.2.linear_v.weight": "pytorch_model-00027.bin",
+    "model.layers.32.moe_block.experts.3.linear_v.weight": "pytorch_model-00027.bin",
+    "model.layers.32.moe_block.experts.4.linear_v.weight": "pytorch_model-00027.bin",
+    "model.layers.32.moe_block.experts.5.linear_v.weight": "pytorch_model-00027.bin",
+    "model.layers.32.moe_block.experts.6.linear_v.weight": "pytorch_model-00027.bin",
+    "model.layers.32.moe_block.experts.7.linear_v.weight": "pytorch_model-00027.bin",
+    "model.layers.32.attn.k_proj.weight": "pytorch_model-00027.bin",
+    "model.layers.32.attn.o_proj.weight": "pytorch_model-00027.bin",
+    "model.layers.32.attn.q_proj.weight": "pytorch_model-00027.bin",
+    "model.layers.32.attn.v_proj.weight": "pytorch_model-00027.bin",
+    "model.layers.32.pre_attn_norm.scale": "pytorch_model-00027.bin",
+    "model.layers.32.post_attn_norm.scale": "pytorch_model-00027.bin",
+    "model.layers.32.pre_moe_norm.scale": "pytorch_model-00027.bin",
+    "model.layers.32.post_moe_norm.scale": "pytorch_model-00027.bin",
+    "model.layers.32.moe_block.gate.weight": "pytorch_model-00027.bin",
+    "model.layers.33.moe_block.experts.0.linear.weight": "pytorch_model-00027.bin",
+    "model.layers.33.moe_block.experts.1.linear.weight": "pytorch_model-00027.bin",
+    "model.layers.33.moe_block.experts.2.linear.weight": "pytorch_model-00027.bin",
+    "model.layers.33.moe_block.experts.3.linear.weight": "pytorch_model-00027.bin",
+    "model.layers.33.moe_block.experts.4.linear.weight": "pytorch_model-00027.bin",
+    "model.layers.33.moe_block.experts.5.linear.weight": "pytorch_model-00027.bin",
+    "model.layers.33.moe_block.experts.6.linear.weight": "pytorch_model-00027.bin",
+    "model.layers.33.moe_block.experts.7.linear.weight": "pytorch_model-00027.bin",
+    "model.layers.33.moe_block.experts.0.linear_1.weight": "pytorch_model-00027.bin",
+    "model.layers.33.moe_block.experts.1.linear_1.weight": "pytorch_model-00027.bin",
+    "model.layers.33.moe_block.experts.2.linear_1.weight": "pytorch_model-00027.bin",
+    "model.layers.33.moe_block.experts.3.linear_1.weight": "pytorch_model-00027.bin",
+    "model.layers.33.moe_block.experts.4.linear_1.weight": "pytorch_model-00027.bin",
+    "model.layers.33.moe_block.experts.5.linear_1.weight": "pytorch_model-00027.bin",
+    "model.layers.33.moe_block.experts.6.linear_1.weight": "pytorch_model-00027.bin",
+    "model.layers.33.moe_block.experts.7.linear_1.weight": "pytorch_model-00027.bin",
+    "model.layers.33.moe_block.experts.0.linear_v.weight": "pytorch_model-00028.bin",
+    "model.layers.33.moe_block.experts.1.linear_v.weight": "pytorch_model-00028.bin",
+    "model.layers.33.moe_block.experts.2.linear_v.weight": "pytorch_model-00028.bin",
+    "model.layers.33.moe_block.experts.3.linear_v.weight": "pytorch_model-00028.bin",
+    "model.layers.33.moe_block.experts.4.linear_v.weight": "pytorch_model-00028.bin",
+    "model.layers.33.moe_block.experts.5.linear_v.weight": "pytorch_model-00028.bin",
+    "model.layers.33.moe_block.experts.6.linear_v.weight": "pytorch_model-00028.bin",
+    "model.layers.33.moe_block.experts.7.linear_v.weight": "pytorch_model-00028.bin",
+    "model.layers.33.attn.k_proj.weight": "pytorch_model-00028.bin",
+    "model.layers.33.attn.o_proj.weight": "pytorch_model-00028.bin",
+    "model.layers.33.attn.q_proj.weight": "pytorch_model-00028.bin",
+    "model.layers.33.attn.v_proj.weight": "pytorch_model-00028.bin",
+    "model.layers.33.pre_attn_norm.scale": "pytorch_model-00028.bin",
+    "model.layers.33.post_attn_norm.scale": "pytorch_model-00028.bin",
+    "model.layers.33.pre_moe_norm.scale": "pytorch_model-00028.bin",
+    "model.layers.33.post_moe_norm.scale": "pytorch_model-00028.bin",
+    "model.layers.33.moe_block.gate.weight": "pytorch_model-00028.bin",
+    "model.layers.34.moe_block.experts.0.linear.weight": "pytorch_model-00028.bin",
+    "model.layers.34.moe_block.experts.1.linear.weight": "pytorch_model-00028.bin",
+    "model.layers.34.moe_block.experts.2.linear.weight": "pytorch_model-00028.bin",
+    "model.layers.34.moe_block.experts.3.linear.weight": "pytorch_model-00028.bin",
+    "model.layers.34.moe_block.experts.4.linear.weight": "pytorch_model-00028.bin",
+    "model.layers.34.moe_block.experts.5.linear.weight": "pytorch_model-00028.bin",
+    "model.layers.34.moe_block.experts.6.linear.weight": "pytorch_model-00028.bin",
+    "model.layers.34.moe_block.experts.7.linear.weight": "pytorch_model-00028.bin",
+    "model.layers.34.moe_block.experts.0.linear_1.weight": "pytorch_model-00028.bin",
+    "model.layers.34.moe_block.experts.1.linear_1.weight": "pytorch_model-00028.bin",
+    "model.layers.34.moe_block.experts.2.linear_1.weight": "pytorch_model-00028.bin",
+    "model.layers.34.moe_block.experts.3.linear_1.weight": "pytorch_model-00028.bin",
+    "model.layers.34.moe_block.experts.4.linear_1.weight": "pytorch_model-00028.bin",
+    "model.layers.34.moe_block.experts.5.linear_1.weight": "pytorch_model-00028.bin",
+    "model.layers.34.moe_block.experts.6.linear_1.weight": "pytorch_model-00028.bin",
+    "model.layers.34.moe_block.experts.7.linear_1.weight": "pytorch_model-00028.bin",
+    "model.layers.34.moe_block.experts.0.linear_v.weight": "pytorch_model-00029.bin",
+    "model.layers.34.moe_block.experts.1.linear_v.weight": "pytorch_model-00029.bin",
+    "model.layers.34.moe_block.experts.2.linear_v.weight": "pytorch_model-00029.bin",
+    "model.layers.34.moe_block.experts.3.linear_v.weight": "pytorch_model-00029.bin",
+    "model.layers.34.moe_block.experts.4.linear_v.weight": "pytorch_model-00029.bin",
+    "model.layers.34.moe_block.experts.5.linear_v.weight": "pytorch_model-00029.bin",
+    "model.layers.34.moe_block.experts.6.linear_v.weight": "pytorch_model-00029.bin",
+    "model.layers.34.moe_block.experts.7.linear_v.weight": "pytorch_model-00029.bin",
+    "model.layers.34.attn.k_proj.weight": "pytorch_model-00029.bin",
+    "model.layers.34.attn.o_proj.weight": "pytorch_model-00029.bin",
+    "model.layers.34.attn.q_proj.weight": "pytorch_model-00029.bin",
+    "model.layers.34.attn.v_proj.weight": "pytorch_model-00029.bin",
+    "model.layers.34.pre_attn_norm.scale": "pytorch_model-00029.bin",
+    "model.layers.34.post_attn_norm.scale": "pytorch_model-00029.bin",
+    "model.layers.34.pre_moe_norm.scale": "pytorch_model-00029.bin",
+    "model.layers.34.post_moe_norm.scale": "pytorch_model-00029.bin",
+    "model.layers.34.moe_block.gate.weight": "pytorch_model-00029.bin",
+    "model.layers.35.moe_block.experts.0.linear.weight": "pytorch_model-00029.bin",
+    "model.layers.35.moe_block.experts.1.linear.weight": "pytorch_model-00029.bin",
+    "model.layers.35.moe_block.experts.2.linear.weight": "pytorch_model-00029.bin",
+    "model.layers.35.moe_block.experts.3.linear.weight": "pytorch_model-00029.bin",
+    "model.layers.35.moe_block.experts.4.linear.weight": "pytorch_model-00029.bin",
+    "model.layers.35.moe_block.experts.5.linear.weight": "pytorch_model-00029.bin",
+    "model.layers.35.moe_block.experts.6.linear.weight": "pytorch_model-00029.bin",
+    "model.layers.35.moe_block.experts.7.linear.weight": "pytorch_model-00029.bin",
+    "model.layers.35.moe_block.experts.0.linear_1.weight": "pytorch_model-00029.bin",
+    "model.layers.35.moe_block.experts.1.linear_1.weight": "pytorch_model-00029.bin",
+    "model.layers.35.moe_block.experts.2.linear_1.weight": "pytorch_model-00029.bin",
+    "model.layers.35.moe_block.experts.3.linear_1.weight": "pytorch_model-00029.bin",
+    "model.layers.35.moe_block.experts.4.linear_1.weight": "pytorch_model-00029.bin",
+    "model.layers.35.moe_block.experts.5.linear_1.weight": "pytorch_model-00029.bin",
+    "model.layers.35.moe_block.experts.6.linear_1.weight": "pytorch_model-00029.bin",
+    "model.layers.35.moe_block.experts.7.linear_1.weight": "pytorch_model-00029.bin",
+    "model.layers.35.moe_block.experts.0.linear_v.weight": "pytorch_model-00030.bin",
+    "model.layers.35.moe_block.experts.1.linear_v.weight": "pytorch_model-00030.bin",
+    "model.layers.35.moe_block.experts.2.linear_v.weight": "pytorch_model-00030.bin",
+    "model.layers.35.moe_block.experts.3.linear_v.weight": "pytorch_model-00030.bin",
+    "model.layers.35.moe_block.experts.4.linear_v.weight": "pytorch_model-00030.bin",
+    "model.layers.35.moe_block.experts.5.linear_v.weight": "pytorch_model-00030.bin",
+    "model.layers.35.moe_block.experts.6.linear_v.weight": "pytorch_model-00030.bin",
+    "model.layers.35.moe_block.experts.7.linear_v.weight": "pytorch_model-00030.bin",
+    "model.layers.35.attn.k_proj.weight": "pytorch_model-00030.bin",
+    "model.layers.35.attn.o_proj.weight": "pytorch_model-00030.bin",
+    "model.layers.35.attn.q_proj.weight": "pytorch_model-00030.bin",
+    "model.layers.35.attn.v_proj.weight": "pytorch_model-00030.bin",
+    "model.layers.35.pre_attn_norm.scale": "pytorch_model-00030.bin",
+    "model.layers.35.post_attn_norm.scale": "pytorch_model-00030.bin",
+    "model.layers.35.pre_moe_norm.scale": "pytorch_model-00030.bin",
+    "model.layers.35.post_moe_norm.scale": "pytorch_model-00030.bin",
+    "model.layers.35.moe_block.gate.weight": "pytorch_model-00030.bin",
+    "model.layers.36.moe_block.experts.0.linear.weight": "pytorch_model-00030.bin",
+    "model.layers.36.moe_block.experts.1.linear.weight": "pytorch_model-00030.bin",
+    "model.layers.36.moe_block.experts.2.linear.weight": "pytorch_model-00030.bin",
+    "model.layers.36.moe_block.experts.3.linear.weight": "pytorch_model-00030.bin",
+    "model.layers.36.moe_block.experts.4.linear.weight": "pytorch_model-00030.bin",
+    "model.layers.36.moe_block.experts.5.linear.weight": "pytorch_model-00030.bin",
+    "model.layers.36.moe_block.experts.6.linear.weight": "pytorch_model-00030.bin",
+    "model.layers.36.moe_block.experts.7.linear.weight": "pytorch_model-00030.bin",
+    "model.layers.36.moe_block.experts.0.linear_1.weight": "pytorch_model-00030.bin",
+    "model.layers.36.moe_block.experts.1.linear_1.weight": "pytorch_model-00030.bin",
+    "model.layers.36.moe_block.experts.2.linear_1.weight": "pytorch_model-00030.bin",
+    "model.layers.36.moe_block.experts.3.linear_1.weight": "pytorch_model-00030.bin",
+    "model.layers.36.moe_block.experts.4.linear_1.weight": "pytorch_model-00030.bin",
+    "model.layers.36.moe_block.experts.5.linear_1.weight": "pytorch_model-00030.bin",
+    "model.layers.36.moe_block.experts.6.linear_1.weight": "pytorch_model-00030.bin",
+    "model.layers.36.moe_block.experts.7.linear_1.weight": "pytorch_model-00030.bin",
+    "model.layers.36.moe_block.experts.0.linear_v.weight": "pytorch_model-00031.bin",
+    "model.layers.36.moe_block.experts.1.linear_v.weight": "pytorch_model-00031.bin",
+    "model.layers.36.moe_block.experts.2.linear_v.weight": "pytorch_model-00031.bin",
+    "model.layers.36.moe_block.experts.3.linear_v.weight": "pytorch_model-00031.bin",
+    "model.layers.36.moe_block.experts.4.linear_v.weight": "pytorch_model-00031.bin",
+    "model.layers.36.moe_block.experts.5.linear_v.weight": "pytorch_model-00031.bin",
+    "model.layers.36.moe_block.experts.6.linear_v.weight": "pytorch_model-00031.bin",
+    "model.layers.36.moe_block.experts.7.linear_v.weight": "pytorch_model-00031.bin",
+    "model.layers.36.attn.k_proj.weight": "pytorch_model-00031.bin",
+    "model.layers.36.attn.o_proj.weight": "pytorch_model-00031.bin",
+    "model.layers.36.attn.q_proj.weight": "pytorch_model-00031.bin",
+    "model.layers.36.attn.v_proj.weight": "pytorch_model-00031.bin",
+    "model.layers.36.pre_attn_norm.scale": "pytorch_model-00031.bin",
+    "model.layers.36.post_attn_norm.scale": "pytorch_model-00031.bin",
+    "model.layers.36.pre_moe_norm.scale": "pytorch_model-00031.bin",
+    "model.layers.36.post_moe_norm.scale": "pytorch_model-00031.bin",
+    "model.layers.36.moe_block.gate.weight": "pytorch_model-00031.bin",
+    "model.layers.37.moe_block.experts.0.linear.weight": "pytorch_model-00031.bin",
+    "model.layers.37.moe_block.experts.1.linear.weight": "pytorch_model-00031.bin",
+    "model.layers.37.moe_block.experts.2.linear.weight": "pytorch_model-00031.bin",
+    "model.layers.37.moe_block.experts.3.linear.weight": "pytorch_model-00031.bin",
+    "model.layers.37.moe_block.experts.4.linear.weight": "pytorch_model-00031.bin",
+    "model.layers.37.moe_block.experts.5.linear.weight": "pytorch_model-00031.bin",
+    "model.layers.37.moe_block.experts.6.linear.weight": "pytorch_model-00031.bin",
+    "model.layers.37.moe_block.experts.7.linear.weight": "pytorch_model-00031.bin",
+    "model.layers.37.moe_block.experts.0.linear_1.weight": "pytorch_model-00031.bin",
+    "model.layers.37.moe_block.experts.1.linear_1.weight": "pytorch_model-00031.bin",
+    "model.layers.37.moe_block.experts.2.linear_1.weight": "pytorch_model-00031.bin",
+    "model.layers.37.moe_block.experts.3.linear_1.weight": "pytorch_model-00031.bin",
+    "model.layers.37.moe_block.experts.4.linear_1.weight": "pytorch_model-00031.bin",
+    "model.layers.37.moe_block.experts.5.linear_1.weight": "pytorch_model-00031.bin",
+    "model.layers.37.moe_block.experts.6.linear_1.weight": "pytorch_model-00031.bin",
+    "model.layers.37.moe_block.experts.7.linear_1.weight": "pytorch_model-00031.bin",
+    "model.layers.37.moe_block.experts.0.linear_v.weight": "pytorch_model-00032.bin",
+    "model.layers.37.moe_block.experts.1.linear_v.weight": "pytorch_model-00032.bin",
+    "model.layers.37.moe_block.experts.2.linear_v.weight": "pytorch_model-00032.bin",
+    "model.layers.37.moe_block.experts.3.linear_v.weight": "pytorch_model-00032.bin",
+    "model.layers.37.moe_block.experts.4.linear_v.weight": "pytorch_model-00032.bin",
+    "model.layers.37.moe_block.experts.5.linear_v.weight": "pytorch_model-00032.bin",
+    "model.layers.37.moe_block.experts.6.linear_v.weight": "pytorch_model-00032.bin",
+    "model.layers.37.moe_block.experts.7.linear_v.weight": "pytorch_model-00032.bin",
+    "model.layers.37.attn.k_proj.weight": "pytorch_model-00032.bin",
+    "model.layers.37.attn.o_proj.weight": "pytorch_model-00032.bin",
+    "model.layers.37.attn.q_proj.weight": "pytorch_model-00032.bin",
+    "model.layers.37.attn.v_proj.weight": "pytorch_model-00032.bin",
+    "model.layers.37.pre_attn_norm.scale": "pytorch_model-00032.bin",
+    "model.layers.37.post_attn_norm.scale": "pytorch_model-00032.bin",
+    "model.layers.37.pre_moe_norm.scale": "pytorch_model-00032.bin",
+    "model.layers.37.post_moe_norm.scale": "pytorch_model-00032.bin",
+    "model.layers.37.moe_block.gate.weight": "pytorch_model-00032.bin",
+    "model.layers.38.moe_block.experts.0.linear.weight": "pytorch_model-00032.bin",
+    "model.layers.38.moe_block.experts.1.linear.weight": "pytorch_model-00032.bin",
+    "model.layers.38.moe_block.experts.2.linear.weight": "pytorch_model-00032.bin",
+    "model.layers.38.moe_block.experts.3.linear.weight": "pytorch_model-00032.bin",
+    "model.layers.38.moe_block.experts.4.linear.weight": "pytorch_model-00032.bin",
+    "model.layers.38.moe_block.experts.5.linear.weight": "pytorch_model-00032.bin",
+    "model.layers.38.moe_block.experts.6.linear.weight": "pytorch_model-00032.bin",
+    "model.layers.38.moe_block.experts.7.linear.weight": "pytorch_model-00032.bin",
+    "model.layers.38.moe_block.experts.0.linear_1.weight": "pytorch_model-00032.bin",
+    "model.layers.38.moe_block.experts.1.linear_1.weight": "pytorch_model-00032.bin",
+    "model.layers.38.moe_block.experts.2.linear_1.weight": "pytorch_model-00032.bin",
+    "model.layers.38.moe_block.experts.3.linear_1.weight": "pytorch_model-00032.bin",
+    "model.layers.38.moe_block.experts.4.linear_1.weight": "pytorch_model-00032.bin",
+    "model.layers.38.moe_block.experts.5.linear_1.weight": "pytorch_model-00032.bin",
+    "model.layers.38.moe_block.experts.6.linear_1.weight": "pytorch_model-00032.bin",
+    "model.layers.38.moe_block.experts.7.linear_1.weight": "pytorch_model-00032.bin",
+    "model.layers.38.moe_block.experts.0.linear_v.weight": "pytorch_model-00033.bin",
+    "model.layers.38.moe_block.experts.1.linear_v.weight": "pytorch_model-00033.bin",
+    "model.layers.38.moe_block.experts.2.linear_v.weight": "pytorch_model-00033.bin",
+    "model.layers.38.moe_block.experts.3.linear_v.weight": "pytorch_model-00033.bin",
+    "model.layers.38.moe_block.experts.4.linear_v.weight": "pytorch_model-00033.bin",
+    "model.layers.38.moe_block.experts.5.linear_v.weight": "pytorch_model-00033.bin",
+    "model.layers.38.moe_block.experts.6.linear_v.weight": "pytorch_model-00033.bin",
+    "model.layers.38.moe_block.experts.7.linear_v.weight": "pytorch_model-00033.bin",
+    "model.layers.38.attn.k_proj.weight": "pytorch_model-00033.bin",
+    "model.layers.38.attn.o_proj.weight": "pytorch_model-00033.bin",
+    "model.layers.38.attn.q_proj.weight": "pytorch_model-00033.bin",
+    "model.layers.38.attn.v_proj.weight": "pytorch_model-00033.bin",
+    "model.layers.38.pre_attn_norm.scale": "pytorch_model-00033.bin",
+    "model.layers.38.post_attn_norm.scale": "pytorch_model-00033.bin",
+    "model.layers.38.pre_moe_norm.scale": "pytorch_model-00033.bin",
+    "model.layers.38.post_moe_norm.scale": "pytorch_model-00033.bin",
+    "model.layers.38.moe_block.gate.weight": "pytorch_model-00033.bin",
+    "model.layers.39.moe_block.experts.0.linear.weight": "pytorch_model-00033.bin",
+    "model.layers.39.moe_block.experts.1.linear.weight": "pytorch_model-00033.bin",
+    "model.layers.39.moe_block.experts.2.linear.weight": "pytorch_model-00033.bin",
+    "model.layers.39.moe_block.experts.3.linear.weight": "pytorch_model-00033.bin",
+    "model.layers.39.moe_block.experts.4.linear.weight": "pytorch_model-00033.bin",
+    "model.layers.39.moe_block.experts.5.linear.weight": "pytorch_model-00033.bin",
+    "model.layers.39.moe_block.experts.6.linear.weight": "pytorch_model-00033.bin",
+    "model.layers.39.moe_block.experts.7.linear.weight": "pytorch_model-00033.bin",
+    "model.layers.39.moe_block.experts.0.linear_1.weight": "pytorch_model-00033.bin",
+    "model.layers.39.moe_block.experts.1.linear_1.weight": "pytorch_model-00033.bin",
+    "model.layers.39.moe_block.experts.2.linear_1.weight": "pytorch_model-00033.bin",
+    "model.layers.39.moe_block.experts.3.linear_1.weight": "pytorch_model-00033.bin",
+    "model.layers.39.moe_block.experts.4.linear_1.weight": "pytorch_model-00033.bin",
+    "model.layers.39.moe_block.experts.5.linear_1.weight": "pytorch_model-00033.bin",
+    "model.layers.39.moe_block.experts.6.linear_1.weight": "pytorch_model-00033.bin",
+    "model.layers.39.moe_block.experts.7.linear_1.weight": "pytorch_model-00033.bin",
+    "model.layers.39.moe_block.experts.0.linear_v.weight": "pytorch_model-00034.bin",
+    "model.layers.39.moe_block.experts.1.linear_v.weight": "pytorch_model-00034.bin",
+    "model.layers.39.moe_block.experts.2.linear_v.weight": "pytorch_model-00034.bin",
+    "model.layers.39.moe_block.experts.3.linear_v.weight": "pytorch_model-00034.bin",
+    "model.layers.39.moe_block.experts.4.linear_v.weight": "pytorch_model-00034.bin",
+    "model.layers.39.moe_block.experts.5.linear_v.weight": "pytorch_model-00034.bin",
+    "model.layers.39.moe_block.experts.6.linear_v.weight": "pytorch_model-00034.bin",
+    "model.layers.39.moe_block.experts.7.linear_v.weight": "pytorch_model-00034.bin",
+    "model.layers.39.attn.k_proj.weight": "pytorch_model-00034.bin",
+    "model.layers.39.attn.o_proj.weight": "pytorch_model-00034.bin",
+    "model.layers.39.attn.q_proj.weight": "pytorch_model-00034.bin",
+    "model.layers.39.attn.v_proj.weight": "pytorch_model-00034.bin",
+    "model.layers.39.pre_attn_norm.scale": "pytorch_model-00034.bin",
+    "model.layers.39.post_attn_norm.scale": "pytorch_model-00034.bin",
+    "model.layers.39.pre_moe_norm.scale": "pytorch_model-00034.bin",
+    "model.layers.39.post_moe_norm.scale": "pytorch_model-00034.bin",
+    "model.layers.39.moe_block.gate.weight": "pytorch_model-00034.bin",
+    "model.layers.4.moe_block.experts.0.linear.weight": "pytorch_model-00034.bin",
+    "model.layers.4.moe_block.experts.1.linear.weight": "pytorch_model-00034.bin",
+    "model.layers.4.moe_block.experts.2.linear.weight": "pytorch_model-00034.bin",
+    "model.layers.4.moe_block.experts.3.linear.weight": "pytorch_model-00034.bin",
+    "model.layers.4.moe_block.experts.4.linear.weight": "pytorch_model-00034.bin",
+    "model.layers.4.moe_block.experts.5.linear.weight": "pytorch_model-00034.bin",
+    "model.layers.4.moe_block.experts.6.linear.weight": "pytorch_model-00034.bin",
+    "model.layers.4.moe_block.experts.7.linear.weight": "pytorch_model-00034.bin",
+    "model.layers.4.moe_block.experts.0.linear_1.weight": "pytorch_model-00034.bin",
+    "model.layers.4.moe_block.experts.1.linear_1.weight": "pytorch_model-00034.bin",
+    "model.layers.4.moe_block.experts.2.linear_1.weight": "pytorch_model-00034.bin",
+    "model.layers.4.moe_block.experts.3.linear_1.weight": "pytorch_model-00034.bin",
+    "model.layers.4.moe_block.experts.4.linear_1.weight": "pytorch_model-00034.bin",
+    "model.layers.4.moe_block.experts.5.linear_1.weight": "pytorch_model-00034.bin",
+    "model.layers.4.moe_block.experts.6.linear_1.weight": "pytorch_model-00034.bin",
+    "model.layers.4.moe_block.experts.7.linear_1.weight": "pytorch_model-00034.bin",
+    "model.layers.4.moe_block.experts.0.linear_v.weight": "pytorch_model-00035.bin",
+    "model.layers.4.moe_block.experts.1.linear_v.weight": "pytorch_model-00035.bin",
+    "model.layers.4.moe_block.experts.2.linear_v.weight": "pytorch_model-00035.bin",
+    "model.layers.4.moe_block.experts.3.linear_v.weight": "pytorch_model-00035.bin",
+    "model.layers.4.moe_block.experts.4.linear_v.weight": "pytorch_model-00035.bin",
+    "model.layers.4.moe_block.experts.5.linear_v.weight": "pytorch_model-00035.bin",
+    "model.layers.4.moe_block.experts.6.linear_v.weight": "pytorch_model-00035.bin",
+    "model.layers.4.moe_block.experts.7.linear_v.weight": "pytorch_model-00035.bin",
+    "model.layers.4.attn.k_proj.weight": "pytorch_model-00035.bin",
+    "model.layers.4.attn.o_proj.weight": "pytorch_model-00035.bin",
+    "model.layers.4.attn.q_proj.weight": "pytorch_model-00035.bin",
+    "model.layers.4.attn.v_proj.weight": "pytorch_model-00035.bin",
+    "model.layers.4.pre_attn_norm.scale": "pytorch_model-00035.bin",
+    "model.layers.4.post_attn_norm.scale": "pytorch_model-00035.bin",
+    "model.layers.4.pre_moe_norm.scale": "pytorch_model-00035.bin",
+    "model.layers.4.post_moe_norm.scale": "pytorch_model-00035.bin",
+    "model.layers.4.moe_block.gate.weight": "pytorch_model-00035.bin",
+    "model.layers.40.moe_block.experts.0.linear.weight": "pytorch_model-00035.bin",
+    "model.layers.40.moe_block.experts.1.linear.weight": "pytorch_model-00035.bin",
+    "model.layers.40.moe_block.experts.2.linear.weight": "pytorch_model-00035.bin",
+    "model.layers.40.moe_block.experts.3.linear.weight": "pytorch_model-00035.bin",
+    "model.layers.40.moe_block.experts.4.linear.weight": "pytorch_model-00035.bin",
+    "model.layers.40.moe_block.experts.5.linear.weight": "pytorch_model-00035.bin",
+    "model.layers.40.moe_block.experts.6.linear.weight": "pytorch_model-00035.bin",
+    "model.layers.40.moe_block.experts.7.linear.weight": "pytorch_model-00035.bin",
+    "model.layers.40.moe_block.experts.0.linear_1.weight": "pytorch_model-00035.bin",
+    "model.layers.40.moe_block.experts.1.linear_1.weight": "pytorch_model-00035.bin",
+    "model.layers.40.moe_block.experts.2.linear_1.weight": "pytorch_model-00035.bin",
+    "model.layers.40.moe_block.experts.3.linear_1.weight": "pytorch_model-00035.bin",
+    "model.layers.40.moe_block.experts.4.linear_1.weight": "pytorch_model-00035.bin",
+    "model.layers.40.moe_block.experts.5.linear_1.weight": "pytorch_model-00035.bin",
+    "model.layers.40.moe_block.experts.6.linear_1.weight": "pytorch_model-00035.bin",
+    "model.layers.40.moe_block.experts.7.linear_1.weight": "pytorch_model-00035.bin",
+    "model.layers.40.moe_block.experts.0.linear_v.weight": "pytorch_model-00036.bin",
+    "model.layers.40.moe_block.experts.1.linear_v.weight": "pytorch_model-00036.bin",
+    "model.layers.40.moe_block.experts.2.linear_v.weight": "pytorch_model-00036.bin",
+    "model.layers.40.moe_block.experts.3.linear_v.weight": "pytorch_model-00036.bin",
+    "model.layers.40.moe_block.experts.4.linear_v.weight": "pytorch_model-00036.bin",
+    "model.layers.40.moe_block.experts.5.linear_v.weight": "pytorch_model-00036.bin",
+    "model.layers.40.moe_block.experts.6.linear_v.weight": "pytorch_model-00036.bin",
+    "model.layers.40.moe_block.experts.7.linear_v.weight": "pytorch_model-00036.bin",
+    "model.layers.40.attn.k_proj.weight": "pytorch_model-00036.bin",
+    "model.layers.40.attn.o_proj.weight": "pytorch_model-00036.bin",
+    "model.layers.40.attn.q_proj.weight": "pytorch_model-00036.bin",
+    "model.layers.40.attn.v_proj.weight": "pytorch_model-00036.bin",
+    "model.layers.40.pre_attn_norm.scale": "pytorch_model-00036.bin",
+    "model.layers.40.post_attn_norm.scale": "pytorch_model-00036.bin",
+    "model.layers.40.pre_moe_norm.scale": "pytorch_model-00036.bin",
+    "model.layers.40.post_moe_norm.scale": "pytorch_model-00036.bin",
+    "model.layers.40.moe_block.gate.weight": "pytorch_model-00036.bin",
+    "model.layers.41.moe_block.experts.0.linear.weight": "pytorch_model-00036.bin",
+    "model.layers.41.moe_block.experts.1.linear.weight": "pytorch_model-00036.bin",
+    "model.layers.41.moe_block.experts.2.linear.weight": "pytorch_model-00036.bin",
+    "model.layers.41.moe_block.experts.3.linear.weight": "pytorch_model-00036.bin",
+    "model.layers.41.moe_block.experts.4.linear.weight": "pytorch_model-00036.bin",
+    "model.layers.41.moe_block.experts.5.linear.weight": "pytorch_model-00036.bin",
+    "model.layers.41.moe_block.experts.6.linear.weight": "pytorch_model-00036.bin",
+    "model.layers.41.moe_block.experts.7.linear.weight": "pytorch_model-00036.bin",
+    "model.layers.41.moe_block.experts.0.linear_1.weight": "pytorch_model-00036.bin",
+    "model.layers.41.moe_block.experts.1.linear_1.weight": "pytorch_model-00036.bin",
+    "model.layers.41.moe_block.experts.2.linear_1.weight": "pytorch_model-00036.bin",
+    "model.layers.41.moe_block.experts.3.linear_1.weight": "pytorch_model-00036.bin",
+    "model.layers.41.moe_block.experts.4.linear_1.weight": "pytorch_model-00036.bin",
+    "model.layers.41.moe_block.experts.5.linear_1.weight": "pytorch_model-00036.bin",
+    "model.layers.41.moe_block.experts.6.linear_1.weight": "pytorch_model-00036.bin",
+    "model.layers.41.moe_block.experts.7.linear_1.weight": "pytorch_model-00036.bin",
+    "model.layers.41.moe_block.experts.0.linear_v.weight": "pytorch_model-00037.bin",
+    "model.layers.41.moe_block.experts.1.linear_v.weight": "pytorch_model-00037.bin",
+    "model.layers.41.moe_block.experts.2.linear_v.weight": "pytorch_model-00037.bin",
+    "model.layers.41.moe_block.experts.3.linear_v.weight": "pytorch_model-00037.bin",
+    "model.layers.41.moe_block.experts.4.linear_v.weight": "pytorch_model-00037.bin",
+    "model.layers.41.moe_block.experts.5.linear_v.weight": "pytorch_model-00037.bin",
+    "model.layers.41.moe_block.experts.6.linear_v.weight": "pytorch_model-00037.bin",
+    "model.layers.41.moe_block.experts.7.linear_v.weight": "pytorch_model-00037.bin",
+    "model.layers.41.attn.k_proj.weight": "pytorch_model-00037.bin",
+    "model.layers.41.attn.o_proj.weight": "pytorch_model-00037.bin",
+    "model.layers.41.attn.q_proj.weight": "pytorch_model-00037.bin",
+    "model.layers.41.attn.v_proj.weight": "pytorch_model-00037.bin",
+    "model.layers.41.pre_attn_norm.scale": "pytorch_model-00037.bin",
+    "model.layers.41.post_attn_norm.scale": "pytorch_model-00037.bin",
+    "model.layers.41.pre_moe_norm.scale": "pytorch_model-00037.bin",
+    "model.layers.41.post_moe_norm.scale": "pytorch_model-00037.bin",
+    "model.layers.41.moe_block.gate.weight": "pytorch_model-00037.bin",
+    "model.layers.42.moe_block.experts.0.linear.weight": "pytorch_model-00037.bin",
+    "model.layers.42.moe_block.experts.1.linear.weight": "pytorch_model-00037.bin",
+    "model.layers.42.moe_block.experts.2.linear.weight": "pytorch_model-00037.bin",
+    "model.layers.42.moe_block.experts.3.linear.weight": "pytorch_model-00037.bin",
+    "model.layers.42.moe_block.experts.4.linear.weight": "pytorch_model-00037.bin",
+    "model.layers.42.moe_block.experts.5.linear.weight": "pytorch_model-00037.bin",
+    "model.layers.42.moe_block.experts.6.linear.weight": "pytorch_model-00037.bin",
+    "model.layers.42.moe_block.experts.7.linear.weight": "pytorch_model-00037.bin",
+    "model.layers.42.moe_block.experts.0.linear_1.weight": "pytorch_model-00037.bin",
+    "model.layers.42.moe_block.experts.1.linear_1.weight": "pytorch_model-00037.bin",
+    "model.layers.42.moe_block.experts.2.linear_1.weight": "pytorch_model-00037.bin",
+    "model.layers.42.moe_block.experts.3.linear_1.weight": "pytorch_model-00037.bin",
+    "model.layers.42.moe_block.experts.4.linear_1.weight": "pytorch_model-00037.bin",
+    "model.layers.42.moe_block.experts.5.linear_1.weight": "pytorch_model-00037.bin",
+    "model.layers.42.moe_block.experts.6.linear_1.weight": "pytorch_model-00037.bin",
+    "model.layers.42.moe_block.experts.7.linear_1.weight": "pytorch_model-00037.bin",
+    "model.layers.42.moe_block.experts.0.linear_v.weight": "pytorch_model-00038.bin",
+    "model.layers.42.moe_block.experts.1.linear_v.weight": "pytorch_model-00038.bin",
+    "model.layers.42.moe_block.experts.2.linear_v.weight": "pytorch_model-00038.bin",
+    "model.layers.42.moe_block.experts.3.linear_v.weight": "pytorch_model-00038.bin",
+    "model.layers.42.moe_block.experts.4.linear_v.weight": "pytorch_model-00038.bin",
+    "model.layers.42.moe_block.experts.5.linear_v.weight": "pytorch_model-00038.bin",
+    "model.layers.42.moe_block.experts.6.linear_v.weight": "pytorch_model-00038.bin",
+    "model.layers.42.moe_block.experts.7.linear_v.weight": "pytorch_model-00038.bin",
+    "model.layers.42.attn.k_proj.weight": "pytorch_model-00038.bin",
+    "model.layers.42.attn.o_proj.weight": "pytorch_model-00038.bin",
+    "model.layers.42.attn.q_proj.weight": "pytorch_model-00038.bin",
+    "model.layers.42.attn.v_proj.weight": "pytorch_model-00038.bin",
+    "model.layers.42.pre_attn_norm.scale": "pytorch_model-00038.bin",
+    "model.layers.42.post_attn_norm.scale": "pytorch_model-00038.bin",
+    "model.layers.42.pre_moe_norm.scale": "pytorch_model-00038.bin",
+    "model.layers.42.post_moe_norm.scale": "pytorch_model-00038.bin",
+    "model.layers.42.moe_block.gate.weight": "pytorch_model-00038.bin",
+    "model.layers.43.moe_block.experts.0.linear.weight": "pytorch_model-00038.bin",
+    "model.layers.43.moe_block.experts.1.linear.weight": "pytorch_model-00038.bin",
+    "model.layers.43.moe_block.experts.2.linear.weight": "pytorch_model-00038.bin",
+    "model.layers.43.moe_block.experts.3.linear.weight": "pytorch_model-00038.bin",
+    "model.layers.43.moe_block.experts.4.linear.weight": "pytorch_model-00038.bin",
+    "model.layers.43.moe_block.experts.5.linear.weight": "pytorch_model-00038.bin",
+    "model.layers.43.moe_block.experts.6.linear.weight": "pytorch_model-00038.bin",
+    "model.layers.43.moe_block.experts.7.linear.weight": "pytorch_model-00038.bin",
+    "model.layers.43.moe_block.experts.0.linear_1.weight": "pytorch_model-00038.bin",
+    "model.layers.43.moe_block.experts.1.linear_1.weight": "pytorch_model-00038.bin",
+    "model.layers.43.moe_block.experts.2.linear_1.weight": "pytorch_model-00038.bin",
+    "model.layers.43.moe_block.experts.3.linear_1.weight": "pytorch_model-00038.bin",
+    "model.layers.43.moe_block.experts.4.linear_1.weight": "pytorch_model-00038.bin",
+    "model.layers.43.moe_block.experts.5.linear_1.weight": "pytorch_model-00038.bin",
+    "model.layers.43.moe_block.experts.6.linear_1.weight": "pytorch_model-00038.bin",
+    "model.layers.43.moe_block.experts.7.linear_1.weight": "pytorch_model-00038.bin",
+    "model.layers.43.moe_block.experts.0.linear_v.weight": "pytorch_model-00039.bin",
+    "model.layers.43.moe_block.experts.1.linear_v.weight": "pytorch_model-00039.bin",
+    "model.layers.43.moe_block.experts.2.linear_v.weight": "pytorch_model-00039.bin",
+    "model.layers.43.moe_block.experts.3.linear_v.weight": "pytorch_model-00039.bin",
+    "model.layers.43.moe_block.experts.4.linear_v.weight": "pytorch_model-00039.bin",
+    "model.layers.43.moe_block.experts.5.linear_v.weight": "pytorch_model-00039.bin",
+    "model.layers.43.moe_block.experts.6.linear_v.weight": "pytorch_model-00039.bin",
+    "model.layers.43.moe_block.experts.7.linear_v.weight": "pytorch_model-00039.bin",
+    "model.layers.43.attn.k_proj.weight": "pytorch_model-00039.bin",
+    "model.layers.43.attn.o_proj.weight": "pytorch_model-00039.bin",
+    "model.layers.43.attn.q_proj.weight": "pytorch_model-00039.bin",
+    "model.layers.43.attn.v_proj.weight": "pytorch_model-00039.bin",
+    "model.layers.43.pre_attn_norm.scale": "pytorch_model-00039.bin",
+    "model.layers.43.post_attn_norm.scale": "pytorch_model-00039.bin",
+    "model.layers.43.pre_moe_norm.scale": "pytorch_model-00039.bin",
+    "model.layers.43.post_moe_norm.scale": "pytorch_model-00039.bin",
+    "model.layers.43.moe_block.gate.weight": "pytorch_model-00039.bin",
+    "model.layers.44.moe_block.experts.0.linear.weight": "pytorch_model-00039.bin",
+    "model.layers.44.moe_block.experts.1.linear.weight": "pytorch_model-00039.bin",
+    "model.layers.44.moe_block.experts.2.linear.weight": "pytorch_model-00039.bin",
+    "model.layers.44.moe_block.experts.3.linear.weight": "pytorch_model-00039.bin",
+    "model.layers.44.moe_block.experts.4.linear.weight": "pytorch_model-00039.bin",
+    "model.layers.44.moe_block.experts.5.linear.weight": "pytorch_model-00039.bin",
+    "model.layers.44.moe_block.experts.6.linear.weight": "pytorch_model-00039.bin",
+    "model.layers.44.moe_block.experts.7.linear.weight": "pytorch_model-00039.bin",
+    "model.layers.44.moe_block.experts.0.linear_1.weight": "pytorch_model-00039.bin",
+    "model.layers.44.moe_block.experts.1.linear_1.weight": "pytorch_model-00039.bin",
+    "model.layers.44.moe_block.experts.2.linear_1.weight": "pytorch_model-00039.bin",
+    "model.layers.44.moe_block.experts.3.linear_1.weight": "pytorch_model-00039.bin",
+    "model.layers.44.moe_block.experts.4.linear_1.weight": "pytorch_model-00039.bin",
+    "model.layers.44.moe_block.experts.5.linear_1.weight": "pytorch_model-00039.bin",
+    "model.layers.44.moe_block.experts.6.linear_1.weight": "pytorch_model-00039.bin",
+    "model.layers.44.moe_block.experts.7.linear_1.weight": "pytorch_model-00039.bin",
+    "model.layers.44.moe_block.experts.0.linear_v.weight": "pytorch_model-00040.bin",
+    "model.layers.44.moe_block.experts.1.linear_v.weight": "pytorch_model-00040.bin",
+    "model.layers.44.moe_block.experts.2.linear_v.weight": "pytorch_model-00040.bin",
+    "model.layers.44.moe_block.experts.3.linear_v.weight": "pytorch_model-00040.bin",
+    "model.layers.44.moe_block.experts.4.linear_v.weight": "pytorch_model-00040.bin",
+    "model.layers.44.moe_block.experts.5.linear_v.weight": "pytorch_model-00040.bin",
+    "model.layers.44.moe_block.experts.6.linear_v.weight": "pytorch_model-00040.bin",
+    "model.layers.44.moe_block.experts.7.linear_v.weight": "pytorch_model-00040.bin",
+    "model.layers.44.attn.k_proj.weight": "pytorch_model-00040.bin",
+    "model.layers.44.attn.o_proj.weight": "pytorch_model-00040.bin",
+    "model.layers.44.attn.q_proj.weight": "pytorch_model-00040.bin",
+    "model.layers.44.attn.v_proj.weight": "pytorch_model-00040.bin",
+    "model.layers.44.pre_attn_norm.scale": "pytorch_model-00040.bin",
+    "model.layers.44.post_attn_norm.scale": "pytorch_model-00040.bin",
+    "model.layers.44.pre_moe_norm.scale": "pytorch_model-00040.bin",
+    "model.layers.44.post_moe_norm.scale": "pytorch_model-00040.bin",
+    "model.layers.44.moe_block.gate.weight": "pytorch_model-00040.bin",
+    "model.layers.45.moe_block.experts.0.linear.weight": "pytorch_model-00040.bin",
+    "model.layers.45.moe_block.experts.1.linear.weight": "pytorch_model-00040.bin",
+    "model.layers.45.moe_block.experts.2.linear.weight": "pytorch_model-00040.bin",
+    "model.layers.45.moe_block.experts.3.linear.weight": "pytorch_model-00040.bin",
+    "model.layers.45.moe_block.experts.4.linear.weight": "pytorch_model-00040.bin",
+    "model.layers.45.moe_block.experts.5.linear.weight": "pytorch_model-00040.bin",
+    "model.layers.45.moe_block.experts.6.linear.weight": "pytorch_model-00040.bin",
+    "model.layers.45.moe_block.experts.7.linear.weight": "pytorch_model-00040.bin",
+    "model.layers.45.moe_block.experts.0.linear_1.weight": "pytorch_model-00040.bin",
+    "model.layers.45.moe_block.experts.1.linear_1.weight": "pytorch_model-00040.bin",
+    "model.layers.45.moe_block.experts.2.linear_1.weight": "pytorch_model-00040.bin",
+    "model.layers.45.moe_block.experts.3.linear_1.weight": "pytorch_model-00040.bin",
+    "model.layers.45.moe_block.experts.4.linear_1.weight": "pytorch_model-00040.bin",
+    "model.layers.45.moe_block.experts.5.linear_1.weight": "pytorch_model-00040.bin",
+    "model.layers.45.moe_block.experts.6.linear_1.weight": "pytorch_model-00040.bin",
+    "model.layers.45.moe_block.experts.7.linear_1.weight": "pytorch_model-00040.bin",
+    "model.layers.45.moe_block.experts.0.linear_v.weight": "pytorch_model-00041.bin",
+    "model.layers.45.moe_block.experts.1.linear_v.weight": "pytorch_model-00041.bin",
+    "model.layers.45.moe_block.experts.2.linear_v.weight": "pytorch_model-00041.bin",
+    "model.layers.45.moe_block.experts.3.linear_v.weight": "pytorch_model-00041.bin",
+    "model.layers.45.moe_block.experts.4.linear_v.weight": "pytorch_model-00041.bin",
+    "model.layers.45.moe_block.experts.5.linear_v.weight": "pytorch_model-00041.bin",
+    "model.layers.45.moe_block.experts.6.linear_v.weight": "pytorch_model-00041.bin",
+    "model.layers.45.moe_block.experts.7.linear_v.weight": "pytorch_model-00041.bin",
+    "model.layers.45.attn.k_proj.weight": "pytorch_model-00041.bin",
+    "model.layers.45.attn.o_proj.weight": "pytorch_model-00041.bin",
+    "model.layers.45.attn.q_proj.weight": "pytorch_model-00041.bin",
+    "model.layers.45.attn.v_proj.weight": "pytorch_model-00041.bin",
+    "model.layers.45.pre_attn_norm.scale": "pytorch_model-00041.bin",
+    "model.layers.45.post_attn_norm.scale": "pytorch_model-00041.bin",
+    "model.layers.45.pre_moe_norm.scale": "pytorch_model-00041.bin",
+    "model.layers.45.post_moe_norm.scale": "pytorch_model-00041.bin",
+    "model.layers.45.moe_block.gate.weight": "pytorch_model-00041.bin",
+    "model.layers.46.moe_block.experts.0.linear.weight": "pytorch_model-00041.bin",
+    "model.layers.46.moe_block.experts.1.linear.weight": "pytorch_model-00041.bin",
+    "model.layers.46.moe_block.experts.2.linear.weight": "pytorch_model-00041.bin",
+    "model.layers.46.moe_block.experts.3.linear.weight": "pytorch_model-00041.bin",
+    "model.layers.46.moe_block.experts.4.linear.weight": "pytorch_model-00041.bin",
+    "model.layers.46.moe_block.experts.5.linear.weight": "pytorch_model-00041.bin",
+    "model.layers.46.moe_block.experts.6.linear.weight": "pytorch_model-00041.bin",
+    "model.layers.46.moe_block.experts.7.linear.weight": "pytorch_model-00041.bin",
+    "model.layers.46.moe_block.experts.0.linear_1.weight": "pytorch_model-00041.bin",
+    "model.layers.46.moe_block.experts.1.linear_1.weight": "pytorch_model-00041.bin",
+    "model.layers.46.moe_block.experts.2.linear_1.weight": "pytorch_model-00041.bin",
+    "model.layers.46.moe_block.experts.3.linear_1.weight": "pytorch_model-00041.bin",
+    "model.layers.46.moe_block.experts.4.linear_1.weight": "pytorch_model-00041.bin",
+    "model.layers.46.moe_block.experts.5.linear_1.weight": "pytorch_model-00041.bin",
+    "model.layers.46.moe_block.experts.6.linear_1.weight": "pytorch_model-00041.bin",
+    "model.layers.46.moe_block.experts.7.linear_1.weight": "pytorch_model-00041.bin",
+    "model.layers.46.moe_block.experts.0.linear_v.weight": "pytorch_model-00042.bin",
+    "model.layers.46.moe_block.experts.1.linear_v.weight": "pytorch_model-00042.bin",
+    "model.layers.46.moe_block.experts.2.linear_v.weight": "pytorch_model-00042.bin",
+    "model.layers.46.moe_block.experts.3.linear_v.weight": "pytorch_model-00042.bin",
+    "model.layers.46.moe_block.experts.4.linear_v.weight": "pytorch_model-00042.bin",
+    "model.layers.46.moe_block.experts.5.linear_v.weight": "pytorch_model-00042.bin",
+    "model.layers.46.moe_block.experts.6.linear_v.weight": "pytorch_model-00042.bin",
+    "model.layers.46.moe_block.experts.7.linear_v.weight": "pytorch_model-00042.bin",
+    "model.layers.46.attn.k_proj.weight": "pytorch_model-00042.bin",
+    "model.layers.46.attn.o_proj.weight": "pytorch_model-00042.bin",
+    "model.layers.46.attn.q_proj.weight": "pytorch_model-00042.bin",
+    "model.layers.46.attn.v_proj.weight": "pytorch_model-00042.bin",
+    "model.layers.46.pre_attn_norm.scale": "pytorch_model-00042.bin",
+    "model.layers.46.post_attn_norm.scale": "pytorch_model-00042.bin",
+    "model.layers.46.pre_moe_norm.scale": "pytorch_model-00042.bin",
+    "model.layers.46.post_moe_norm.scale": "pytorch_model-00042.bin",
+    "model.layers.46.moe_block.gate.weight": "pytorch_model-00042.bin",
+    "model.layers.47.moe_block.experts.0.linear.weight": "pytorch_model-00042.bin",
+    "model.layers.47.moe_block.experts.1.linear.weight": "pytorch_model-00042.bin",
+    "model.layers.47.moe_block.experts.2.linear.weight": "pytorch_model-00042.bin",
+    "model.layers.47.moe_block.experts.3.linear.weight": "pytorch_model-00042.bin",
+    "model.layers.47.moe_block.experts.4.linear.weight": "pytorch_model-00042.bin",
+    "model.layers.47.moe_block.experts.5.linear.weight": "pytorch_model-00042.bin",
+    "model.layers.47.moe_block.experts.6.linear.weight": "pytorch_model-00042.bin",
+    "model.layers.47.moe_block.experts.7.linear.weight": "pytorch_model-00042.bin",
+    "model.layers.47.moe_block.experts.0.linear_1.weight": "pytorch_model-00042.bin",
+    "model.layers.47.moe_block.experts.1.linear_1.weight": "pytorch_model-00042.bin",
+    "model.layers.47.moe_block.experts.2.linear_1.weight": "pytorch_model-00042.bin",
+    "model.layers.47.moe_block.experts.3.linear_1.weight": "pytorch_model-00042.bin",
+    "model.layers.47.moe_block.experts.4.linear_1.weight": "pytorch_model-00042.bin",
+    "model.layers.47.moe_block.experts.5.linear_1.weight": "pytorch_model-00042.bin",
+    "model.layers.47.moe_block.experts.6.linear_1.weight": "pytorch_model-00042.bin",
+    "model.layers.47.moe_block.experts.7.linear_1.weight": "pytorch_model-00042.bin",
+    "model.layers.47.moe_block.experts.0.linear_v.weight": "pytorch_model-00043.bin",
+    "model.layers.47.moe_block.experts.1.linear_v.weight": "pytorch_model-00043.bin",
+    "model.layers.47.moe_block.experts.2.linear_v.weight": "pytorch_model-00043.bin",
+    "model.layers.47.moe_block.experts.3.linear_v.weight": "pytorch_model-00043.bin",
+    "model.layers.47.moe_block.experts.4.linear_v.weight": "pytorch_model-00043.bin",
+    "model.layers.47.moe_block.experts.5.linear_v.weight": "pytorch_model-00043.bin",
+    "model.layers.47.moe_block.experts.6.linear_v.weight": "pytorch_model-00043.bin",
+    "model.layers.47.moe_block.experts.7.linear_v.weight": "pytorch_model-00043.bin",
+    "model.layers.47.attn.k_proj.weight": "pytorch_model-00043.bin",
+    "model.layers.47.attn.o_proj.weight": "pytorch_model-00043.bin",
+    "model.layers.47.attn.q_proj.weight": "pytorch_model-00043.bin",
+    "model.layers.47.attn.v_proj.weight": "pytorch_model-00043.bin",
+    "model.layers.47.pre_attn_norm.scale": "pytorch_model-00043.bin",
+    "model.layers.47.post_attn_norm.scale": "pytorch_model-00043.bin",
+    "model.layers.47.pre_moe_norm.scale": "pytorch_model-00043.bin",
+    "model.layers.47.post_moe_norm.scale": "pytorch_model-00043.bin",
+    "model.layers.47.moe_block.gate.weight": "pytorch_model-00043.bin",
+    "model.layers.48.moe_block.experts.0.linear.weight": "pytorch_model-00043.bin",
+    "model.layers.48.moe_block.experts.1.linear.weight": "pytorch_model-00043.bin",
+    "model.layers.48.moe_block.experts.2.linear.weight": "pytorch_model-00043.bin",
+    "model.layers.48.moe_block.experts.3.linear.weight": "pytorch_model-00043.bin",
+    "model.layers.48.moe_block.experts.4.linear.weight": "pytorch_model-00043.bin",
+    "model.layers.48.moe_block.experts.5.linear.weight": "pytorch_model-00043.bin",
+    "model.layers.48.moe_block.experts.6.linear.weight": "pytorch_model-00043.bin",
+    "model.layers.48.moe_block.experts.7.linear.weight": "pytorch_model-00043.bin",
+    "model.layers.48.moe_block.experts.0.linear_1.weight": "pytorch_model-00043.bin",
+    "model.layers.48.moe_block.experts.1.linear_1.weight": "pytorch_model-00043.bin",
+    "model.layers.48.moe_block.experts.2.linear_1.weight": "pytorch_model-00043.bin",
+    "model.layers.48.moe_block.experts.3.linear_1.weight": "pytorch_model-00043.bin",
+    "model.layers.48.moe_block.experts.4.linear_1.weight": "pytorch_model-00043.bin",
+    "model.layers.48.moe_block.experts.5.linear_1.weight": "pytorch_model-00043.bin",
+    "model.layers.48.moe_block.experts.6.linear_1.weight": "pytorch_model-00043.bin",
+    "model.layers.48.moe_block.experts.7.linear_1.weight": "pytorch_model-00043.bin",
+    "model.layers.48.moe_block.experts.0.linear_v.weight": "pytorch_model-00044.bin",
+    "model.layers.48.moe_block.experts.1.linear_v.weight": "pytorch_model-00044.bin",
+    "model.layers.48.moe_block.experts.2.linear_v.weight": "pytorch_model-00044.bin",
+    "model.layers.48.moe_block.experts.3.linear_v.weight": "pytorch_model-00044.bin",
+    "model.layers.48.moe_block.experts.4.linear_v.weight": "pytorch_model-00044.bin",
+    "model.layers.48.moe_block.experts.5.linear_v.weight": "pytorch_model-00044.bin",
+    "model.layers.48.moe_block.experts.6.linear_v.weight": "pytorch_model-00044.bin",
+    "model.layers.48.moe_block.experts.7.linear_v.weight": "pytorch_model-00044.bin",
+    "model.layers.48.attn.k_proj.weight": "pytorch_model-00044.bin",
+    "model.layers.48.attn.o_proj.weight": "pytorch_model-00044.bin",
+    "model.layers.48.attn.q_proj.weight": "pytorch_model-00044.bin",
+    "model.layers.48.attn.v_proj.weight": "pytorch_model-00044.bin",
+    "model.layers.48.pre_attn_norm.scale": "pytorch_model-00044.bin",
+    "model.layers.48.post_attn_norm.scale": "pytorch_model-00044.bin",
+    "model.layers.48.pre_moe_norm.scale": "pytorch_model-00044.bin",
+    "model.layers.48.post_moe_norm.scale": "pytorch_model-00044.bin",
+    "model.layers.48.moe_block.gate.weight": "pytorch_model-00044.bin",
+    "model.layers.49.moe_block.experts.0.linear.weight": "pytorch_model-00044.bin",
+    "model.layers.49.moe_block.experts.1.linear.weight": "pytorch_model-00044.bin",
+    "model.layers.49.moe_block.experts.2.linear.weight": "pytorch_model-00044.bin",
+    "model.layers.49.moe_block.experts.3.linear.weight": "pytorch_model-00044.bin",
+    "model.layers.49.moe_block.experts.4.linear.weight": "pytorch_model-00044.bin",
+    "model.layers.49.moe_block.experts.5.linear.weight": "pytorch_model-00044.bin",
+    "model.layers.49.moe_block.experts.6.linear.weight": "pytorch_model-00044.bin",
+    "model.layers.49.moe_block.experts.7.linear.weight": "pytorch_model-00044.bin",
+    "model.layers.49.moe_block.experts.0.linear_1.weight": "pytorch_model-00044.bin",
+    "model.layers.49.moe_block.experts.1.linear_1.weight": "pytorch_model-00044.bin",
+    "model.layers.49.moe_block.experts.2.linear_1.weight": "pytorch_model-00044.bin",
+    "model.layers.49.moe_block.experts.3.linear_1.weight": "pytorch_model-00044.bin",
+    "model.layers.49.moe_block.experts.4.linear_1.weight": "pytorch_model-00044.bin",
+    "model.layers.49.moe_block.experts.5.linear_1.weight": "pytorch_model-00044.bin",
+    "model.layers.49.moe_block.experts.6.linear_1.weight": "pytorch_model-00044.bin",
+    "model.layers.49.moe_block.experts.7.linear_1.weight": "pytorch_model-00044.bin",
+    "model.layers.49.moe_block.experts.0.linear_v.weight": "pytorch_model-00045.bin",
+    "model.layers.49.moe_block.experts.1.linear_v.weight": "pytorch_model-00045.bin",
+    "model.layers.49.moe_block.experts.2.linear_v.weight": "pytorch_model-00045.bin",
+    "model.layers.49.moe_block.experts.3.linear_v.weight": "pytorch_model-00045.bin",
+    "model.layers.49.moe_block.experts.4.linear_v.weight": "pytorch_model-00045.bin",
+    "model.layers.49.moe_block.experts.5.linear_v.weight": "pytorch_model-00045.bin",
+    "model.layers.49.moe_block.experts.6.linear_v.weight": "pytorch_model-00045.bin",
+    "model.layers.49.moe_block.experts.7.linear_v.weight": "pytorch_model-00045.bin",
+    "model.layers.49.attn.k_proj.weight": "pytorch_model-00045.bin",
+    "model.layers.49.attn.o_proj.weight": "pytorch_model-00045.bin",
+    "model.layers.49.attn.q_proj.weight": "pytorch_model-00045.bin",
+    "model.layers.49.attn.v_proj.weight": "pytorch_model-00045.bin",
+    "model.layers.49.pre_attn_norm.scale": "pytorch_model-00045.bin",
+    "model.layers.49.post_attn_norm.scale": "pytorch_model-00045.bin",
+    "model.layers.49.pre_moe_norm.scale": "pytorch_model-00045.bin",
+    "model.layers.49.post_moe_norm.scale": "pytorch_model-00045.bin",
+    "model.layers.49.moe_block.gate.weight": "pytorch_model-00045.bin",
+    "model.layers.5.moe_block.experts.0.linear.weight": "pytorch_model-00045.bin",
+    "model.layers.5.moe_block.experts.1.linear.weight": "pytorch_model-00045.bin",
+    "model.layers.5.moe_block.experts.2.linear.weight": "pytorch_model-00045.bin",
+    "model.layers.5.moe_block.experts.3.linear.weight": "pytorch_model-00045.bin",
+    "model.layers.5.moe_block.experts.4.linear.weight": "pytorch_model-00045.bin",
+    "model.layers.5.moe_block.experts.5.linear.weight": "pytorch_model-00045.bin",
+    "model.layers.5.moe_block.experts.6.linear.weight": "pytorch_model-00045.bin",
+    "model.layers.5.moe_block.experts.7.linear.weight": "pytorch_model-00045.bin",
+    "model.layers.5.moe_block.experts.0.linear_1.weight": "pytorch_model-00045.bin",
+    "model.layers.5.moe_block.experts.1.linear_1.weight": "pytorch_model-00045.bin",
+    "model.layers.5.moe_block.experts.2.linear_1.weight": "pytorch_model-00045.bin",
+    "model.layers.5.moe_block.experts.3.linear_1.weight": "pytorch_model-00045.bin",
+    "model.layers.5.moe_block.experts.4.linear_1.weight": "pytorch_model-00045.bin",
+    "model.layers.5.moe_block.experts.5.linear_1.weight": "pytorch_model-00045.bin",
+    "model.layers.5.moe_block.experts.6.linear_1.weight": "pytorch_model-00045.bin",
+    "model.layers.5.moe_block.experts.7.linear_1.weight": "pytorch_model-00045.bin",
+    "model.layers.5.moe_block.experts.0.linear_v.weight": "pytorch_model-00046.bin",
+    "model.layers.5.moe_block.experts.1.linear_v.weight": "pytorch_model-00046.bin",
+    "model.layers.5.moe_block.experts.2.linear_v.weight": "pytorch_model-00046.bin",
+    "model.layers.5.moe_block.experts.3.linear_v.weight": "pytorch_model-00046.bin",
+    "model.layers.5.moe_block.experts.4.linear_v.weight": "pytorch_model-00046.bin",
+    "model.layers.5.moe_block.experts.5.linear_v.weight": "pytorch_model-00046.bin",
+    "model.layers.5.moe_block.experts.6.linear_v.weight": "pytorch_model-00046.bin",
+    "model.layers.5.moe_block.experts.7.linear_v.weight": "pytorch_model-00046.bin",
+    "model.layers.5.attn.k_proj.weight": "pytorch_model-00046.bin",
+    "model.layers.5.attn.o_proj.weight": "pytorch_model-00046.bin",
+    "model.layers.5.attn.q_proj.weight": "pytorch_model-00046.bin",
+    "model.layers.5.attn.v_proj.weight": "pytorch_model-00046.bin",
+    "model.layers.5.pre_attn_norm.scale": "pytorch_model-00046.bin",
+    "model.layers.5.post_attn_norm.scale": "pytorch_model-00046.bin",
+    "model.layers.5.pre_moe_norm.scale": "pytorch_model-00046.bin",
+    "model.layers.5.post_moe_norm.scale": "pytorch_model-00046.bin",
+    "model.layers.5.moe_block.gate.weight": "pytorch_model-00046.bin",
+    "model.layers.50.moe_block.experts.0.linear.weight": "pytorch_model-00046.bin",
+    "model.layers.50.moe_block.experts.1.linear.weight": "pytorch_model-00046.bin",
+    "model.layers.50.moe_block.experts.2.linear.weight": "pytorch_model-00046.bin",
+    "model.layers.50.moe_block.experts.3.linear.weight": "pytorch_model-00046.bin",
+    "model.layers.50.moe_block.experts.4.linear.weight": "pytorch_model-00046.bin",
+    "model.layers.50.moe_block.experts.5.linear.weight": "pytorch_model-00046.bin",
+    "model.layers.50.moe_block.experts.6.linear.weight": "pytorch_model-00046.bin",
+    "model.layers.50.moe_block.experts.7.linear.weight": "pytorch_model-00046.bin",
+    "model.layers.50.moe_block.experts.0.linear_1.weight": "pytorch_model-00046.bin",
+    "model.layers.50.moe_block.experts.1.linear_1.weight": "pytorch_model-00046.bin",
+    "model.layers.50.moe_block.experts.2.linear_1.weight": "pytorch_model-00046.bin",
+    "model.layers.50.moe_block.experts.3.linear_1.weight": "pytorch_model-00046.bin",
+    "model.layers.50.moe_block.experts.4.linear_1.weight": "pytorch_model-00046.bin",
+    "model.layers.50.moe_block.experts.5.linear_1.weight": "pytorch_model-00046.bin",
+    "model.layers.50.moe_block.experts.6.linear_1.weight": "pytorch_model-00046.bin",
+    "model.layers.50.moe_block.experts.7.linear_1.weight": "pytorch_model-00046.bin",
+    "model.layers.50.moe_block.experts.0.linear_v.weight": "pytorch_model-00047.bin",
+    "model.layers.50.moe_block.experts.1.linear_v.weight": "pytorch_model-00047.bin",
+    "model.layers.50.moe_block.experts.2.linear_v.weight": "pytorch_model-00047.bin",
+    "model.layers.50.moe_block.experts.3.linear_v.weight": "pytorch_model-00047.bin",
+    "model.layers.50.moe_block.experts.4.linear_v.weight": "pytorch_model-00047.bin",
+    "model.layers.50.moe_block.experts.5.linear_v.weight": "pytorch_model-00047.bin",
+    "model.layers.50.moe_block.experts.6.linear_v.weight": "pytorch_model-00047.bin",
+    "model.layers.50.moe_block.experts.7.linear_v.weight": "pytorch_model-00047.bin",
+    "model.layers.50.attn.k_proj.weight": "pytorch_model-00047.bin",
+    "model.layers.50.attn.o_proj.weight": "pytorch_model-00047.bin",
+    "model.layers.50.attn.q_proj.weight": "pytorch_model-00047.bin",
+    "model.layers.50.attn.v_proj.weight": "pytorch_model-00047.bin",
+    "model.layers.50.pre_attn_norm.scale": "pytorch_model-00047.bin",
+    "model.layers.50.post_attn_norm.scale": "pytorch_model-00047.bin",
+    "model.layers.50.pre_moe_norm.scale": "pytorch_model-00047.bin",
+    "model.layers.50.post_moe_norm.scale": "pytorch_model-00047.bin",
+    "model.layers.50.moe_block.gate.weight": "pytorch_model-00047.bin",
+    "model.layers.51.moe_block.experts.0.linear.weight": "pytorch_model-00047.bin",
+    "model.layers.51.moe_block.experts.1.linear.weight": "pytorch_model-00047.bin",
+    "model.layers.51.moe_block.experts.2.linear.weight": "pytorch_model-00047.bin",
+    "model.layers.51.moe_block.experts.3.linear.weight": "pytorch_model-00047.bin",
+    "model.layers.51.moe_block.experts.4.linear.weight": "pytorch_model-00047.bin",
+    "model.layers.51.moe_block.experts.5.linear.weight": "pytorch_model-00047.bin",
+    "model.layers.51.moe_block.experts.6.linear.weight": "pytorch_model-00047.bin",
+    "model.layers.51.moe_block.experts.7.linear.weight": "pytorch_model-00047.bin",
+    "model.layers.51.moe_block.experts.0.linear_1.weight": "pytorch_model-00047.bin",
+    "model.layers.51.moe_block.experts.1.linear_1.weight": "pytorch_model-00047.bin",
+    "model.layers.51.moe_block.experts.2.linear_1.weight": "pytorch_model-00047.bin",
+    "model.layers.51.moe_block.experts.3.linear_1.weight": "pytorch_model-00047.bin",
+    "model.layers.51.moe_block.experts.4.linear_1.weight": "pytorch_model-00047.bin",
+    "model.layers.51.moe_block.experts.5.linear_1.weight": "pytorch_model-00047.bin",
+    "model.layers.51.moe_block.experts.6.linear_1.weight": "pytorch_model-00047.bin",
+    "model.layers.51.moe_block.experts.7.linear_1.weight": "pytorch_model-00047.bin",
+    "model.layers.51.moe_block.experts.0.linear_v.weight": "pytorch_model-00048.bin",
+    "model.layers.51.moe_block.experts.1.linear_v.weight": "pytorch_model-00048.bin",
+    "model.layers.51.moe_block.experts.2.linear_v.weight": "pytorch_model-00048.bin",
+    "model.layers.51.moe_block.experts.3.linear_v.weight": "pytorch_model-00048.bin",
+    "model.layers.51.moe_block.experts.4.linear_v.weight": "pytorch_model-00048.bin",
+    "model.layers.51.moe_block.experts.5.linear_v.weight": "pytorch_model-00048.bin",
+    "model.layers.51.moe_block.experts.6.linear_v.weight": "pytorch_model-00048.bin",
+    "model.layers.51.moe_block.experts.7.linear_v.weight": "pytorch_model-00048.bin",
+    "model.layers.51.attn.k_proj.weight": "pytorch_model-00048.bin",
+    "model.layers.51.attn.o_proj.weight": "pytorch_model-00048.bin",
+    "model.layers.51.attn.q_proj.weight": "pytorch_model-00048.bin",
+    "model.layers.51.attn.v_proj.weight": "pytorch_model-00048.bin",
+    "model.layers.51.pre_attn_norm.scale": "pytorch_model-00048.bin",
+    "model.layers.51.post_attn_norm.scale": "pytorch_model-00048.bin",
+    "model.layers.51.pre_moe_norm.scale": "pytorch_model-00048.bin",
+    "model.layers.51.post_moe_norm.scale": "pytorch_model-00048.bin",
+    "model.layers.51.moe_block.gate.weight": "pytorch_model-00048.bin",
+    "model.layers.52.moe_block.experts.0.linear.weight": "pytorch_model-00048.bin",
+    "model.layers.52.moe_block.experts.1.linear.weight": "pytorch_model-00048.bin",
+    "model.layers.52.moe_block.experts.2.linear.weight": "pytorch_model-00048.bin",
+    "model.layers.52.moe_block.experts.3.linear.weight": "pytorch_model-00048.bin",
+    "model.layers.52.moe_block.experts.4.linear.weight": "pytorch_model-00048.bin",
+    "model.layers.52.moe_block.experts.5.linear.weight": "pytorch_model-00048.bin",
+    "model.layers.52.moe_block.experts.6.linear.weight": "pytorch_model-00048.bin",
+    "model.layers.52.moe_block.experts.7.linear.weight": "pytorch_model-00048.bin",
+    "model.layers.52.moe_block.experts.0.linear_1.weight": "pytorch_model-00048.bin",
+    "model.layers.52.moe_block.experts.1.linear_1.weight": "pytorch_model-00048.bin",
+    "model.layers.52.moe_block.experts.2.linear_1.weight": "pytorch_model-00048.bin",
+    "model.layers.52.moe_block.experts.3.linear_1.weight": "pytorch_model-00048.bin",
+    "model.layers.52.moe_block.experts.4.linear_1.weight": "pytorch_model-00048.bin",
+    "model.layers.52.moe_block.experts.5.linear_1.weight": "pytorch_model-00048.bin",
+    "model.layers.52.moe_block.experts.6.linear_1.weight": "pytorch_model-00048.bin",
+    "model.layers.52.moe_block.experts.7.linear_1.weight": "pytorch_model-00048.bin",
+    "model.layers.52.moe_block.experts.0.linear_v.weight": "pytorch_model-00049.bin",
+    "model.layers.52.moe_block.experts.1.linear_v.weight": "pytorch_model-00049.bin",
+    "model.layers.52.moe_block.experts.2.linear_v.weight": "pytorch_model-00049.bin",
+    "model.layers.52.moe_block.experts.3.linear_v.weight": "pytorch_model-00049.bin",
+    "model.layers.52.moe_block.experts.4.linear_v.weight": "pytorch_model-00049.bin",
+    "model.layers.52.moe_block.experts.5.linear_v.weight": "pytorch_model-00049.bin",
+    "model.layers.52.moe_block.experts.6.linear_v.weight": "pytorch_model-00049.bin",
+    "model.layers.52.moe_block.experts.7.linear_v.weight": "pytorch_model-00049.bin",
+    "model.layers.52.attn.k_proj.weight": "pytorch_model-00049.bin",
+    "model.layers.52.attn.o_proj.weight": "pytorch_model-00049.bin",
+    "model.layers.52.attn.q_proj.weight": "pytorch_model-00049.bin",
+    "model.layers.52.attn.v_proj.weight": "pytorch_model-00049.bin",
+    "model.layers.52.pre_attn_norm.scale": "pytorch_model-00049.bin",
+    "model.layers.52.post_attn_norm.scale": "pytorch_model-00049.bin",
+    "model.layers.52.pre_moe_norm.scale": "pytorch_model-00049.bin",
+    "model.layers.52.post_moe_norm.scale": "pytorch_model-00049.bin",
+    "model.layers.52.moe_block.gate.weight": "pytorch_model-00049.bin",
+    "model.layers.53.moe_block.experts.0.linear.weight": "pytorch_model-00049.bin",
+    "model.layers.53.moe_block.experts.1.linear.weight": "pytorch_model-00049.bin",
+    "model.layers.53.moe_block.experts.2.linear.weight": "pytorch_model-00049.bin",
+    "model.layers.53.moe_block.experts.3.linear.weight": "pytorch_model-00049.bin",
+    "model.layers.53.moe_block.experts.4.linear.weight": "pytorch_model-00049.bin",
+    "model.layers.53.moe_block.experts.5.linear.weight": "pytorch_model-00049.bin",
+    "model.layers.53.moe_block.experts.6.linear.weight": "pytorch_model-00049.bin",
+    "model.layers.53.moe_block.experts.7.linear.weight": "pytorch_model-00049.bin",
+    "model.layers.53.moe_block.experts.0.linear_1.weight": "pytorch_model-00049.bin",
+    "model.layers.53.moe_block.experts.1.linear_1.weight": "pytorch_model-00049.bin",
+    "model.layers.53.moe_block.experts.2.linear_1.weight": "pytorch_model-00049.bin",
+    "model.layers.53.moe_block.experts.3.linear_1.weight": "pytorch_model-00049.bin",
+    "model.layers.53.moe_block.experts.4.linear_1.weight": "pytorch_model-00049.bin",
+    "model.layers.53.moe_block.experts.5.linear_1.weight": "pytorch_model-00049.bin",
+    "model.layers.53.moe_block.experts.6.linear_1.weight": "pytorch_model-00049.bin",
+    "model.layers.53.moe_block.experts.7.linear_1.weight": "pytorch_model-00049.bin",
+    "model.layers.53.moe_block.experts.0.linear_v.weight": "pytorch_model-00050.bin",
+    "model.layers.53.moe_block.experts.1.linear_v.weight": "pytorch_model-00050.bin",
+    "model.layers.53.moe_block.experts.2.linear_v.weight": "pytorch_model-00050.bin",
+    "model.layers.53.moe_block.experts.3.linear_v.weight": "pytorch_model-00050.bin",
+    "model.layers.53.moe_block.experts.4.linear_v.weight": "pytorch_model-00050.bin",
+    "model.layers.53.moe_block.experts.5.linear_v.weight": "pytorch_model-00050.bin",
+    "model.layers.53.moe_block.experts.6.linear_v.weight": "pytorch_model-00050.bin",
+    "model.layers.53.moe_block.experts.7.linear_v.weight": "pytorch_model-00050.bin",
+    "model.layers.53.attn.k_proj.weight": "pytorch_model-00050.bin",
+    "model.layers.53.attn.o_proj.weight": "pytorch_model-00050.bin",
+    "model.layers.53.attn.q_proj.weight": "pytorch_model-00050.bin",
+    "model.layers.53.attn.v_proj.weight": "pytorch_model-00050.bin",
+    "model.layers.53.pre_attn_norm.scale": "pytorch_model-00050.bin",
+    "model.layers.53.post_attn_norm.scale": "pytorch_model-00050.bin",
+    "model.layers.53.pre_moe_norm.scale": "pytorch_model-00050.bin",
+    "model.layers.53.post_moe_norm.scale": "pytorch_model-00050.bin",
+    "model.layers.53.moe_block.gate.weight": "pytorch_model-00050.bin",
+    "model.layers.54.moe_block.experts.0.linear.weight": "pytorch_model-00050.bin",
+    "model.layers.54.moe_block.experts.1.linear.weight": "pytorch_model-00050.bin",
+    "model.layers.54.moe_block.experts.2.linear.weight": "pytorch_model-00050.bin",
+    "model.layers.54.moe_block.experts.3.linear.weight": "pytorch_model-00050.bin",
+    "model.layers.54.moe_block.experts.4.linear.weight": "pytorch_model-00050.bin",
+    "model.layers.54.moe_block.experts.5.linear.weight": "pytorch_model-00050.bin",
+    "model.layers.54.moe_block.experts.6.linear.weight": "pytorch_model-00050.bin",
+    "model.layers.54.moe_block.experts.7.linear.weight": "pytorch_model-00050.bin",
+    "model.layers.54.moe_block.experts.0.linear_1.weight": "pytorch_model-00050.bin",
+    "model.layers.54.moe_block.experts.1.linear_1.weight": "pytorch_model-00050.bin",
+    "model.layers.54.moe_block.experts.2.linear_1.weight": "pytorch_model-00050.bin",
+    "model.layers.54.moe_block.experts.3.linear_1.weight": "pytorch_model-00050.bin",
+    "model.layers.54.moe_block.experts.4.linear_1.weight": "pytorch_model-00050.bin",
+    "model.layers.54.moe_block.experts.5.linear_1.weight": "pytorch_model-00050.bin",
+    "model.layers.54.moe_block.experts.6.linear_1.weight": "pytorch_model-00050.bin",
+    "model.layers.54.moe_block.experts.7.linear_1.weight": "pytorch_model-00050.bin",
+    "model.layers.54.moe_block.experts.0.linear_v.weight": "pytorch_model-00051.bin",
+    "model.layers.54.moe_block.experts.1.linear_v.weight": "pytorch_model-00051.bin",
+    "model.layers.54.moe_block.experts.2.linear_v.weight": "pytorch_model-00051.bin",
+    "model.layers.54.moe_block.experts.3.linear_v.weight": "pytorch_model-00051.bin",
+    "model.layers.54.moe_block.experts.4.linear_v.weight": "pytorch_model-00051.bin",
+    "model.layers.54.moe_block.experts.5.linear_v.weight": "pytorch_model-00051.bin",
+    "model.layers.54.moe_block.experts.6.linear_v.weight": "pytorch_model-00051.bin",
+    "model.layers.54.moe_block.experts.7.linear_v.weight": "pytorch_model-00051.bin",
+    "model.layers.54.attn.k_proj.weight": "pytorch_model-00051.bin",
+    "model.layers.54.attn.o_proj.weight": "pytorch_model-00051.bin",
+    "model.layers.54.attn.q_proj.weight": "pytorch_model-00051.bin",
+    "model.layers.54.attn.v_proj.weight": "pytorch_model-00051.bin",
+    "model.layers.54.pre_attn_norm.scale": "pytorch_model-00051.bin",
+    "model.layers.54.post_attn_norm.scale": "pytorch_model-00051.bin",
+    "model.layers.54.pre_moe_norm.scale": "pytorch_model-00051.bin",
+    "model.layers.54.post_moe_norm.scale": "pytorch_model-00051.bin",
+    "model.layers.54.moe_block.gate.weight": "pytorch_model-00051.bin",
+    "model.layers.55.moe_block.experts.0.linear.weight": "pytorch_model-00051.bin",
+    "model.layers.55.moe_block.experts.1.linear.weight": "pytorch_model-00051.bin",
+    "model.layers.55.moe_block.experts.2.linear.weight": "pytorch_model-00051.bin",
+    "model.layers.55.moe_block.experts.3.linear.weight": "pytorch_model-00051.bin",
+    "model.layers.55.moe_block.experts.4.linear.weight": "pytorch_model-00051.bin",
+    "model.layers.55.moe_block.experts.5.linear.weight": "pytorch_model-00051.bin",
+    "model.layers.55.moe_block.experts.6.linear.weight": "pytorch_model-00051.bin",
+    "model.layers.55.moe_block.experts.7.linear.weight": "pytorch_model-00051.bin",
+    "model.layers.55.moe_block.experts.0.linear_1.weight": "pytorch_model-00051.bin",
+    "model.layers.55.moe_block.experts.1.linear_1.weight": "pytorch_model-00051.bin",
+    "model.layers.55.moe_block.experts.2.linear_1.weight": "pytorch_model-00051.bin",
+    "model.layers.55.moe_block.experts.3.linear_1.weight": "pytorch_model-00051.bin",
+    "model.layers.55.moe_block.experts.4.linear_1.weight": "pytorch_model-00051.bin",
+    "model.layers.55.moe_block.experts.5.linear_1.weight": "pytorch_model-00051.bin",
+    "model.layers.55.moe_block.experts.6.linear_1.weight": "pytorch_model-00051.bin",
+    "model.layers.55.moe_block.experts.7.linear_1.weight": "pytorch_model-00051.bin",
+    "model.layers.55.moe_block.experts.0.linear_v.weight": "pytorch_model-00052.bin",
+    "model.layers.55.moe_block.experts.1.linear_v.weight": "pytorch_model-00052.bin",
+    "model.layers.55.moe_block.experts.2.linear_v.weight": "pytorch_model-00052.bin",
+    "model.layers.55.moe_block.experts.3.linear_v.weight": "pytorch_model-00052.bin",
+    "model.layers.55.moe_block.experts.4.linear_v.weight": "pytorch_model-00052.bin",
+    "model.layers.55.moe_block.experts.5.linear_v.weight": "pytorch_model-00052.bin",
+    "model.layers.55.moe_block.experts.6.linear_v.weight": "pytorch_model-00052.bin",
+    "model.layers.55.moe_block.experts.7.linear_v.weight": "pytorch_model-00052.bin",
+    "model.layers.55.attn.k_proj.weight": "pytorch_model-00052.bin",
+    "model.layers.55.attn.o_proj.weight": "pytorch_model-00052.bin",
+    "model.layers.55.attn.q_proj.weight": "pytorch_model-00052.bin",
+    "model.layers.55.attn.v_proj.weight": "pytorch_model-00052.bin",
+    "model.layers.55.pre_attn_norm.scale": "pytorch_model-00052.bin",
+    "model.layers.55.post_attn_norm.scale": "pytorch_model-00052.bin",
+    "model.layers.55.pre_moe_norm.scale": "pytorch_model-00052.bin",
+    "model.layers.55.post_moe_norm.scale": "pytorch_model-00052.bin",
+    "model.layers.55.moe_block.gate.weight": "pytorch_model-00052.bin",
+    "model.layers.56.moe_block.experts.0.linear.weight": "pytorch_model-00052.bin",
+    "model.layers.56.moe_block.experts.1.linear.weight": "pytorch_model-00052.bin",
+    "model.layers.56.moe_block.experts.2.linear.weight": "pytorch_model-00052.bin",
+    "model.layers.56.moe_block.experts.3.linear.weight": "pytorch_model-00052.bin",
+    "model.layers.56.moe_block.experts.4.linear.weight": "pytorch_model-00052.bin",
+    "model.layers.56.moe_block.experts.5.linear.weight": "pytorch_model-00052.bin",
+    "model.layers.56.moe_block.experts.6.linear.weight": "pytorch_model-00052.bin",
+    "model.layers.56.moe_block.experts.7.linear.weight": "pytorch_model-00052.bin",
+    "model.layers.56.moe_block.experts.0.linear_1.weight": "pytorch_model-00052.bin",
+    "model.layers.56.moe_block.experts.1.linear_1.weight": "pytorch_model-00052.bin",
+    "model.layers.56.moe_block.experts.2.linear_1.weight": "pytorch_model-00052.bin",
+    "model.layers.56.moe_block.experts.3.linear_1.weight": "pytorch_model-00052.bin",
+    "model.layers.56.moe_block.experts.4.linear_1.weight": "pytorch_model-00052.bin",
+    "model.layers.56.moe_block.experts.5.linear_1.weight": "pytorch_model-00052.bin",
+    "model.layers.56.moe_block.experts.6.linear_1.weight": "pytorch_model-00052.bin",
+    "model.layers.56.moe_block.experts.7.linear_1.weight": "pytorch_model-00052.bin",
+    "model.layers.56.moe_block.experts.0.linear_v.weight": "pytorch_model-00053.bin",
+    "model.layers.56.moe_block.experts.1.linear_v.weight": "pytorch_model-00053.bin",
+    "model.layers.56.moe_block.experts.2.linear_v.weight": "pytorch_model-00053.bin",
+    "model.layers.56.moe_block.experts.3.linear_v.weight": "pytorch_model-00053.bin",
+    "model.layers.56.moe_block.experts.4.linear_v.weight": "pytorch_model-00053.bin",
+    "model.layers.56.moe_block.experts.5.linear_v.weight": "pytorch_model-00053.bin",
+    "model.layers.56.moe_block.experts.6.linear_v.weight": "pytorch_model-00053.bin",
+    "model.layers.56.moe_block.experts.7.linear_v.weight": "pytorch_model-00053.bin",
+    "model.layers.56.attn.k_proj.weight": "pytorch_model-00053.bin",
+    "model.layers.56.attn.o_proj.weight": "pytorch_model-00053.bin",
+    "model.layers.56.attn.q_proj.weight": "pytorch_model-00053.bin",
+    "model.layers.56.attn.v_proj.weight": "pytorch_model-00053.bin",
+    "model.layers.56.pre_attn_norm.scale": "pytorch_model-00053.bin",
+    "model.layers.56.post_attn_norm.scale": "pytorch_model-00053.bin",
+    "model.layers.56.pre_moe_norm.scale": "pytorch_model-00053.bin",
+    "model.layers.56.post_moe_norm.scale": "pytorch_model-00053.bin",
+    "model.layers.56.moe_block.gate.weight": "pytorch_model-00053.bin",
+    "model.layers.57.moe_block.experts.0.linear.weight": "pytorch_model-00053.bin",
+    "model.layers.57.moe_block.experts.1.linear.weight": "pytorch_model-00053.bin",
+    "model.layers.57.moe_block.experts.2.linear.weight": "pytorch_model-00053.bin",
+    "model.layers.57.moe_block.experts.3.linear.weight": "pytorch_model-00053.bin",
+    "model.layers.57.moe_block.experts.4.linear.weight": "pytorch_model-00053.bin",
+    "model.layers.57.moe_block.experts.5.linear.weight": "pytorch_model-00053.bin",
+    "model.layers.57.moe_block.experts.6.linear.weight": "pytorch_model-00053.bin",
+    "model.layers.57.moe_block.experts.7.linear.weight": "pytorch_model-00053.bin",
+    "model.layers.57.moe_block.experts.0.linear_1.weight": "pytorch_model-00053.bin",
+    "model.layers.57.moe_block.experts.1.linear_1.weight": "pytorch_model-00053.bin",
+    "model.layers.57.moe_block.experts.2.linear_1.weight": "pytorch_model-00053.bin",
+    "model.layers.57.moe_block.experts.3.linear_1.weight": "pytorch_model-00053.bin",
+    "model.layers.57.moe_block.experts.4.linear_1.weight": "pytorch_model-00053.bin",
+    "model.layers.57.moe_block.experts.5.linear_1.weight": "pytorch_model-00053.bin",
+    "model.layers.57.moe_block.experts.6.linear_1.weight": "pytorch_model-00053.bin",
+    "model.layers.57.moe_block.experts.7.linear_1.weight": "pytorch_model-00053.bin",
+    "model.layers.57.moe_block.experts.0.linear_v.weight": "pytorch_model-00054.bin",
+    "model.layers.57.moe_block.experts.1.linear_v.weight": "pytorch_model-00054.bin",
+    "model.layers.57.moe_block.experts.2.linear_v.weight": "pytorch_model-00054.bin",
+    "model.layers.57.moe_block.experts.3.linear_v.weight": "pytorch_model-00054.bin",
+    "model.layers.57.moe_block.experts.4.linear_v.weight": "pytorch_model-00054.bin",
+    "model.layers.57.moe_block.experts.5.linear_v.weight": "pytorch_model-00054.bin",
+    "model.layers.57.moe_block.experts.6.linear_v.weight": "pytorch_model-00054.bin",
+    "model.layers.57.moe_block.experts.7.linear_v.weight": "pytorch_model-00054.bin",
+    "model.layers.57.attn.k_proj.weight": "pytorch_model-00054.bin",
+    "model.layers.57.attn.o_proj.weight": "pytorch_model-00054.bin",
+    "model.layers.57.attn.q_proj.weight": "pytorch_model-00054.bin",
+    "model.layers.57.attn.v_proj.weight": "pytorch_model-00054.bin",
+    "model.layers.57.pre_attn_norm.scale": "pytorch_model-00054.bin",
+    "model.layers.57.post_attn_norm.scale": "pytorch_model-00054.bin",
+    "model.layers.57.pre_moe_norm.scale": "pytorch_model-00054.bin",
+    "model.layers.57.post_moe_norm.scale": "pytorch_model-00054.bin",
+    "model.layers.57.moe_block.gate.weight": "pytorch_model-00054.bin",
+    "model.layers.58.moe_block.experts.0.linear.weight": "pytorch_model-00054.bin",
+    "model.layers.58.moe_block.experts.1.linear.weight": "pytorch_model-00054.bin",
+    "model.layers.58.moe_block.experts.2.linear.weight": "pytorch_model-00054.bin",
+    "model.layers.58.moe_block.experts.3.linear.weight": "pytorch_model-00054.bin",
+    "model.layers.58.moe_block.experts.4.linear.weight": "pytorch_model-00054.bin",
+    "model.layers.58.moe_block.experts.5.linear.weight": "pytorch_model-00054.bin",
+    "model.layers.58.moe_block.experts.6.linear.weight": "pytorch_model-00054.bin",
+    "model.layers.58.moe_block.experts.7.linear.weight": "pytorch_model-00054.bin",
+    "model.layers.58.moe_block.experts.0.linear_1.weight": "pytorch_model-00054.bin",
+    "model.layers.58.moe_block.experts.1.linear_1.weight": "pytorch_model-00054.bin",
+    "model.layers.58.moe_block.experts.2.linear_1.weight": "pytorch_model-00054.bin",
+    "model.layers.58.moe_block.experts.3.linear_1.weight": "pytorch_model-00054.bin",
+    "model.layers.58.moe_block.experts.4.linear_1.weight": "pytorch_model-00054.bin",
+    "model.layers.58.moe_block.experts.5.linear_1.weight": "pytorch_model-00054.bin",
+    "model.layers.58.moe_block.experts.6.linear_1.weight": "pytorch_model-00054.bin",
+    "model.layers.58.moe_block.experts.7.linear_1.weight": "pytorch_model-00054.bin",
+    "model.layers.58.moe_block.experts.0.linear_v.weight": "pytorch_model-00055.bin",
+    "model.layers.58.moe_block.experts.1.linear_v.weight": "pytorch_model-00055.bin",
+    "model.layers.58.moe_block.experts.2.linear_v.weight": "pytorch_model-00055.bin",
+    "model.layers.58.moe_block.experts.3.linear_v.weight": "pytorch_model-00055.bin",
+    "model.layers.58.moe_block.experts.4.linear_v.weight": "pytorch_model-00055.bin",
+    "model.layers.58.moe_block.experts.5.linear_v.weight": "pytorch_model-00055.bin",
+    "model.layers.58.moe_block.experts.6.linear_v.weight": "pytorch_model-00055.bin",
+    "model.layers.58.moe_block.experts.7.linear_v.weight": "pytorch_model-00055.bin",
+    "model.layers.58.attn.k_proj.weight": "pytorch_model-00055.bin",
+    "model.layers.58.attn.o_proj.weight": "pytorch_model-00055.bin",
+    "model.layers.58.attn.q_proj.weight": "pytorch_model-00055.bin",
+    "model.layers.58.attn.v_proj.weight": "pytorch_model-00055.bin",
+    "model.layers.58.pre_attn_norm.scale": "pytorch_model-00055.bin",
+    "model.layers.58.post_attn_norm.scale": "pytorch_model-00055.bin",
+    "model.layers.58.pre_moe_norm.scale": "pytorch_model-00055.bin",
+    "model.layers.58.post_moe_norm.scale": "pytorch_model-00055.bin",
+    "model.layers.58.moe_block.gate.weight": "pytorch_model-00055.bin",
+    "model.layers.59.moe_block.experts.0.linear.weight": "pytorch_model-00055.bin",
+    "model.layers.59.moe_block.experts.1.linear.weight": "pytorch_model-00055.bin",
+    "model.layers.59.moe_block.experts.2.linear.weight": "pytorch_model-00055.bin",
+    "model.layers.59.moe_block.experts.3.linear.weight": "pytorch_model-00055.bin",
+    "model.layers.59.moe_block.experts.4.linear.weight": "pytorch_model-00055.bin",
+    "model.layers.59.moe_block.experts.5.linear.weight": "pytorch_model-00055.bin",
+    "model.layers.59.moe_block.experts.6.linear.weight": "pytorch_model-00055.bin",
+    "model.layers.59.moe_block.experts.7.linear.weight": "pytorch_model-00055.bin",
+    "model.layers.59.moe_block.experts.0.linear_1.weight": "pytorch_model-00055.bin",
+    "model.layers.59.moe_block.experts.1.linear_1.weight": "pytorch_model-00055.bin",
+    "model.layers.59.moe_block.experts.2.linear_1.weight": "pytorch_model-00055.bin",
+    "model.layers.59.moe_block.experts.3.linear_1.weight": "pytorch_model-00055.bin",
+    "model.layers.59.moe_block.experts.4.linear_1.weight": "pytorch_model-00055.bin",
+    "model.layers.59.moe_block.experts.5.linear_1.weight": "pytorch_model-00055.bin",
+    "model.layers.59.moe_block.experts.6.linear_1.weight": "pytorch_model-00055.bin",
+    "model.layers.59.moe_block.experts.7.linear_1.weight": "pytorch_model-00055.bin",
+    "model.layers.59.moe_block.experts.0.linear_v.weight": "pytorch_model-00056.bin",
+    "model.layers.59.moe_block.experts.1.linear_v.weight": "pytorch_model-00056.bin",
+    "model.layers.59.moe_block.experts.2.linear_v.weight": "pytorch_model-00056.bin",
+    "model.layers.59.moe_block.experts.3.linear_v.weight": "pytorch_model-00056.bin",
+    "model.layers.59.moe_block.experts.4.linear_v.weight": "pytorch_model-00056.bin",
+    "model.layers.59.moe_block.experts.5.linear_v.weight": "pytorch_model-00056.bin",
+    "model.layers.59.moe_block.experts.6.linear_v.weight": "pytorch_model-00056.bin",
+    "model.layers.59.moe_block.experts.7.linear_v.weight": "pytorch_model-00056.bin",
+    "model.layers.59.attn.k_proj.weight": "pytorch_model-00056.bin",
+    "model.layers.59.attn.o_proj.weight": "pytorch_model-00056.bin",
+    "model.layers.59.attn.q_proj.weight": "pytorch_model-00056.bin",
+    "model.layers.59.attn.v_proj.weight": "pytorch_model-00056.bin",
+    "model.layers.59.pre_attn_norm.scale": "pytorch_model-00056.bin",
+    "model.layers.59.post_attn_norm.scale": "pytorch_model-00056.bin",
+    "model.layers.59.pre_moe_norm.scale": "pytorch_model-00056.bin",
+    "model.layers.59.post_moe_norm.scale": "pytorch_model-00056.bin",
+    "model.layers.59.moe_block.gate.weight": "pytorch_model-00056.bin",
+    "model.layers.6.moe_block.experts.0.linear.weight": "pytorch_model-00056.bin",
+    "model.layers.6.moe_block.experts.1.linear.weight": "pytorch_model-00056.bin",
+    "model.layers.6.moe_block.experts.2.linear.weight": "pytorch_model-00056.bin",
+    "model.layers.6.moe_block.experts.3.linear.weight": "pytorch_model-00056.bin",
+    "model.layers.6.moe_block.experts.4.linear.weight": "pytorch_model-00056.bin",
+    "model.layers.6.moe_block.experts.5.linear.weight": "pytorch_model-00056.bin",
+    "model.layers.6.moe_block.experts.6.linear.weight": "pytorch_model-00056.bin",
+    "model.layers.6.moe_block.experts.7.linear.weight": "pytorch_model-00056.bin",
+    "model.layers.6.moe_block.experts.0.linear_1.weight": "pytorch_model-00056.bin",
+    "model.layers.6.moe_block.experts.1.linear_1.weight": "pytorch_model-00056.bin",
+    "model.layers.6.moe_block.experts.2.linear_1.weight": "pytorch_model-00056.bin",
+    "model.layers.6.moe_block.experts.3.linear_1.weight": "pytorch_model-00056.bin",
+    "model.layers.6.moe_block.experts.4.linear_1.weight": "pytorch_model-00056.bin",
+    "model.layers.6.moe_block.experts.5.linear_1.weight": "pytorch_model-00056.bin",
+    "model.layers.6.moe_block.experts.6.linear_1.weight": "pytorch_model-00056.bin",
+    "model.layers.6.moe_block.experts.7.linear_1.weight": "pytorch_model-00056.bin",
+    "model.layers.6.moe_block.experts.0.linear_v.weight": "pytorch_model-00057.bin",
+    "model.layers.6.moe_block.experts.1.linear_v.weight": "pytorch_model-00057.bin",
+    "model.layers.6.moe_block.experts.2.linear_v.weight": "pytorch_model-00057.bin",
+    "model.layers.6.moe_block.experts.3.linear_v.weight": "pytorch_model-00057.bin",
+    "model.layers.6.moe_block.experts.4.linear_v.weight": "pytorch_model-00057.bin",
+    "model.layers.6.moe_block.experts.5.linear_v.weight": "pytorch_model-00057.bin",
+    "model.layers.6.moe_block.experts.6.linear_v.weight": "pytorch_model-00057.bin",
+    "model.layers.6.moe_block.experts.7.linear_v.weight": "pytorch_model-00057.bin",
+    "model.layers.6.attn.k_proj.weight": "pytorch_model-00057.bin",
+    "model.layers.6.attn.o_proj.weight": "pytorch_model-00057.bin",
+    "model.layers.6.attn.q_proj.weight": "pytorch_model-00057.bin",
+    "model.layers.6.attn.v_proj.weight": "pytorch_model-00057.bin",
+    "model.layers.6.pre_attn_norm.scale": "pytorch_model-00057.bin",
+    "model.layers.6.post_attn_norm.scale": "pytorch_model-00057.bin",
+    "model.layers.6.pre_moe_norm.scale": "pytorch_model-00057.bin",
+    "model.layers.6.post_moe_norm.scale": "pytorch_model-00057.bin",
+    "model.layers.6.moe_block.gate.weight": "pytorch_model-00057.bin",
+    "model.layers.60.moe_block.experts.0.linear.weight": "pytorch_model-00057.bin",
+    "model.layers.60.moe_block.experts.1.linear.weight": "pytorch_model-00057.bin",
+    "model.layers.60.moe_block.experts.2.linear.weight": "pytorch_model-00057.bin",
+    "model.layers.60.moe_block.experts.3.linear.weight": "pytorch_model-00057.bin",
+    "model.layers.60.moe_block.experts.4.linear.weight": "pytorch_model-00057.bin",
+    "model.layers.60.moe_block.experts.5.linear.weight": "pytorch_model-00057.bin",
+    "model.layers.60.moe_block.experts.6.linear.weight": "pytorch_model-00057.bin",
+    "model.layers.60.moe_block.experts.7.linear.weight": "pytorch_model-00057.bin",
+    "model.layers.60.moe_block.experts.0.linear_1.weight": "pytorch_model-00057.bin",
+    "model.layers.60.moe_block.experts.1.linear_1.weight": "pytorch_model-00057.bin",
+    "model.layers.60.moe_block.experts.2.linear_1.weight": "pytorch_model-00057.bin",
+    "model.layers.60.moe_block.experts.3.linear_1.weight": "pytorch_model-00057.bin",
+    "model.layers.60.moe_block.experts.4.linear_1.weight": "pytorch_model-00057.bin",
+    "model.layers.60.moe_block.experts.5.linear_1.weight": "pytorch_model-00057.bin",
+    "model.layers.60.moe_block.experts.6.linear_1.weight": "pytorch_model-00057.bin",
+    "model.layers.60.moe_block.experts.7.linear_1.weight": "pytorch_model-00057.bin",
+    "model.layers.60.moe_block.experts.0.linear_v.weight": "pytorch_model-00058.bin",
+    "model.layers.60.moe_block.experts.1.linear_v.weight": "pytorch_model-00058.bin",
+    "model.layers.60.moe_block.experts.2.linear_v.weight": "pytorch_model-00058.bin",
+    "model.layers.60.moe_block.experts.3.linear_v.weight": "pytorch_model-00058.bin",
+    "model.layers.60.moe_block.experts.4.linear_v.weight": "pytorch_model-00058.bin",
+    "model.layers.60.moe_block.experts.5.linear_v.weight": "pytorch_model-00058.bin",
+    "model.layers.60.moe_block.experts.6.linear_v.weight": "pytorch_model-00058.bin",
+    "model.layers.60.moe_block.experts.7.linear_v.weight": "pytorch_model-00058.bin",
+    "model.layers.60.attn.k_proj.weight": "pytorch_model-00058.bin",
+    "model.layers.60.attn.o_proj.weight": "pytorch_model-00058.bin",
+    "model.layers.60.attn.q_proj.weight": "pytorch_model-00058.bin",
+    "model.layers.60.attn.v_proj.weight": "pytorch_model-00058.bin",
+    "model.layers.60.pre_attn_norm.scale": "pytorch_model-00058.bin",
+    "model.layers.60.post_attn_norm.scale": "pytorch_model-00058.bin",
+    "model.layers.60.pre_moe_norm.scale": "pytorch_model-00058.bin",
+    "model.layers.60.post_moe_norm.scale": "pytorch_model-00058.bin",
+    "model.layers.60.moe_block.gate.weight": "pytorch_model-00058.bin",
+    "model.layers.61.moe_block.experts.0.linear.weight": "pytorch_model-00058.bin",
+    "model.layers.61.moe_block.experts.1.linear.weight": "pytorch_model-00058.bin",
+    "model.layers.61.moe_block.experts.2.linear.weight": "pytorch_model-00058.bin",
+    "model.layers.61.moe_block.experts.3.linear.weight": "pytorch_model-00058.bin",
+    "model.layers.61.moe_block.experts.4.linear.weight": "pytorch_model-00058.bin",
+    "model.layers.61.moe_block.experts.5.linear.weight": "pytorch_model-00058.bin",
+    "model.layers.61.moe_block.experts.6.linear.weight": "pytorch_model-00058.bin",
+    "model.layers.61.moe_block.experts.7.linear.weight": "pytorch_model-00058.bin",
+    "model.layers.61.moe_block.experts.0.linear_1.weight": "pytorch_model-00058.bin",
+    "model.layers.61.moe_block.experts.1.linear_1.weight": "pytorch_model-00058.bin",
+    "model.layers.61.moe_block.experts.2.linear_1.weight": "pytorch_model-00058.bin",
+    "model.layers.61.moe_block.experts.3.linear_1.weight": "pytorch_model-00058.bin",
+    "model.layers.61.moe_block.experts.4.linear_1.weight": "pytorch_model-00058.bin",
+    "model.layers.61.moe_block.experts.5.linear_1.weight": "pytorch_model-00058.bin",
+    "model.layers.61.moe_block.experts.6.linear_1.weight": "pytorch_model-00058.bin",
+    "model.layers.61.moe_block.experts.7.linear_1.weight": "pytorch_model-00058.bin",
+    "model.layers.61.moe_block.experts.0.linear_v.weight": "pytorch_model-00059.bin",
+    "model.layers.61.moe_block.experts.1.linear_v.weight": "pytorch_model-00059.bin",
+    "model.layers.61.moe_block.experts.2.linear_v.weight": "pytorch_model-00059.bin",
+    "model.layers.61.moe_block.experts.3.linear_v.weight": "pytorch_model-00059.bin",
+    "model.layers.61.moe_block.experts.4.linear_v.weight": "pytorch_model-00059.bin",
+    "model.layers.61.moe_block.experts.5.linear_v.weight": "pytorch_model-00059.bin",
+    "model.layers.61.moe_block.experts.6.linear_v.weight": "pytorch_model-00059.bin",
+    "model.layers.61.moe_block.experts.7.linear_v.weight": "pytorch_model-00059.bin",
+    "model.layers.61.attn.k_proj.weight": "pytorch_model-00059.bin",
+    "model.layers.61.attn.o_proj.weight": "pytorch_model-00059.bin",
+    "model.layers.61.attn.q_proj.weight": "pytorch_model-00059.bin",
+    "model.layers.61.attn.v_proj.weight": "pytorch_model-00059.bin",
+    "model.layers.61.pre_attn_norm.scale": "pytorch_model-00059.bin",
+    "model.layers.61.post_attn_norm.scale": "pytorch_model-00059.bin",
+    "model.layers.61.pre_moe_norm.scale": "pytorch_model-00059.bin",
+    "model.layers.61.post_moe_norm.scale": "pytorch_model-00059.bin",
+    "model.layers.61.moe_block.gate.weight": "pytorch_model-00059.bin",
+    "model.layers.62.moe_block.experts.0.linear.weight": "pytorch_model-00059.bin",
+    "model.layers.62.moe_block.experts.1.linear.weight": "pytorch_model-00059.bin",
+    "model.layers.62.moe_block.experts.2.linear.weight": "pytorch_model-00059.bin",
+    "model.layers.62.moe_block.experts.3.linear.weight": "pytorch_model-00059.bin",
+    "model.layers.62.moe_block.experts.4.linear.weight": "pytorch_model-00059.bin",
+    "model.layers.62.moe_block.experts.5.linear.weight": "pytorch_model-00059.bin",
+    "model.layers.62.moe_block.experts.6.linear.weight": "pytorch_model-00059.bin",
+    "model.layers.62.moe_block.experts.7.linear.weight": "pytorch_model-00059.bin",
+    "model.layers.62.moe_block.experts.0.linear_1.weight": "pytorch_model-00059.bin",
+    "model.layers.62.moe_block.experts.1.linear_1.weight": "pytorch_model-00059.bin",
+    "model.layers.62.moe_block.experts.2.linear_1.weight": "pytorch_model-00059.bin",
+    "model.layers.62.moe_block.experts.3.linear_1.weight": "pytorch_model-00059.bin",
+    "model.layers.62.moe_block.experts.4.linear_1.weight": "pytorch_model-00059.bin",
+    "model.layers.62.moe_block.experts.5.linear_1.weight": "pytorch_model-00059.bin",
+    "model.layers.62.moe_block.experts.6.linear_1.weight": "pytorch_model-00059.bin",
+    "model.layers.62.moe_block.experts.7.linear_1.weight": "pytorch_model-00059.bin",
+    "model.layers.62.moe_block.experts.0.linear_v.weight": "pytorch_model-00060.bin",
+    "model.layers.62.moe_block.experts.1.linear_v.weight": "pytorch_model-00060.bin",
+    "model.layers.62.moe_block.experts.2.linear_v.weight": "pytorch_model-00060.bin",
+    "model.layers.62.moe_block.experts.3.linear_v.weight": "pytorch_model-00060.bin",
+    "model.layers.62.moe_block.experts.4.linear_v.weight": "pytorch_model-00060.bin",
+    "model.layers.62.moe_block.experts.5.linear_v.weight": "pytorch_model-00060.bin",
+    "model.layers.62.moe_block.experts.6.linear_v.weight": "pytorch_model-00060.bin",
+    "model.layers.62.moe_block.experts.7.linear_v.weight": "pytorch_model-00060.bin",
+    "model.layers.62.attn.k_proj.weight": "pytorch_model-00060.bin",
+    "model.layers.62.attn.o_proj.weight": "pytorch_model-00060.bin",
+    "model.layers.62.attn.q_proj.weight": "pytorch_model-00060.bin",
+    "model.layers.62.attn.v_proj.weight": "pytorch_model-00060.bin",
+    "model.layers.62.pre_attn_norm.scale": "pytorch_model-00060.bin",
+    "model.layers.62.post_attn_norm.scale": "pytorch_model-00060.bin",
+    "model.layers.62.pre_moe_norm.scale": "pytorch_model-00060.bin",
+    "model.layers.62.post_moe_norm.scale": "pytorch_model-00060.bin",
+    "model.layers.62.moe_block.gate.weight": "pytorch_model-00060.bin",
+    "model.layers.63.moe_block.experts.0.linear.weight": "pytorch_model-00060.bin",
+    "model.layers.63.moe_block.experts.1.linear.weight": "pytorch_model-00060.bin",
+    "model.layers.63.moe_block.experts.2.linear.weight": "pytorch_model-00060.bin",
+    "model.layers.63.moe_block.experts.3.linear.weight": "pytorch_model-00060.bin",
+    "model.layers.63.moe_block.experts.4.linear.weight": "pytorch_model-00060.bin",
+    "model.layers.63.moe_block.experts.5.linear.weight": "pytorch_model-00060.bin",
+    "model.layers.63.moe_block.experts.6.linear.weight": "pytorch_model-00060.bin",
+    "model.layers.63.moe_block.experts.7.linear.weight": "pytorch_model-00060.bin",
+    "model.layers.63.moe_block.experts.0.linear_1.weight": "pytorch_model-00060.bin",
+    "model.layers.63.moe_block.experts.1.linear_1.weight": "pytorch_model-00060.bin",
+    "model.layers.63.moe_block.experts.2.linear_1.weight": "pytorch_model-00060.bin",
+    "model.layers.63.moe_block.experts.3.linear_1.weight": "pytorch_model-00060.bin",
+    "model.layers.63.moe_block.experts.4.linear_1.weight": "pytorch_model-00060.bin",
+    "model.layers.63.moe_block.experts.5.linear_1.weight": "pytorch_model-00060.bin",
+    "model.layers.63.moe_block.experts.6.linear_1.weight": "pytorch_model-00060.bin",
+    "model.layers.63.moe_block.experts.7.linear_1.weight": "pytorch_model-00060.bin",
+    "model.layers.63.moe_block.experts.0.linear_v.weight": "pytorch_model-00061.bin",
+    "model.layers.63.moe_block.experts.1.linear_v.weight": "pytorch_model-00061.bin",
+    "model.layers.63.moe_block.experts.2.linear_v.weight": "pytorch_model-00061.bin",
+    "model.layers.63.moe_block.experts.3.linear_v.weight": "pytorch_model-00061.bin",
+    "model.layers.63.moe_block.experts.4.linear_v.weight": "pytorch_model-00061.bin",
+    "model.layers.63.moe_block.experts.5.linear_v.weight": "pytorch_model-00061.bin",
+    "model.layers.63.moe_block.experts.6.linear_v.weight": "pytorch_model-00061.bin",
+    "model.layers.63.moe_block.experts.7.linear_v.weight": "pytorch_model-00061.bin",
+    "model.layers.63.attn.k_proj.weight": "pytorch_model-00061.bin",
+    "model.layers.63.attn.o_proj.weight": "pytorch_model-00061.bin",
+    "model.layers.63.attn.q_proj.weight": "pytorch_model-00061.bin",
+    "model.layers.63.attn.v_proj.weight": "pytorch_model-00061.bin",
+    "model.layers.63.pre_attn_norm.scale": "pytorch_model-00061.bin",
+    "model.layers.63.post_attn_norm.scale": "pytorch_model-00061.bin",
+    "model.layers.63.pre_moe_norm.scale": "pytorch_model-00061.bin",
+    "model.layers.63.post_moe_norm.scale": "pytorch_model-00061.bin",
+    "model.layers.63.moe_block.gate.weight": "pytorch_model-00061.bin",
+    "model.layers.7.moe_block.experts.0.linear.weight": "pytorch_model-00061.bin",
+    "model.layers.7.moe_block.experts.1.linear.weight": "pytorch_model-00061.bin",
+    "model.layers.7.moe_block.experts.2.linear.weight": "pytorch_model-00061.bin",
+    "model.layers.7.moe_block.experts.3.linear.weight": "pytorch_model-00061.bin",
+    "model.layers.7.moe_block.experts.4.linear.weight": "pytorch_model-00061.bin",
+    "model.layers.7.moe_block.experts.5.linear.weight": "pytorch_model-00061.bin",
+    "model.layers.7.moe_block.experts.6.linear.weight": "pytorch_model-00061.bin",
+    "model.layers.7.moe_block.experts.7.linear.weight": "pytorch_model-00061.bin",
+    "model.layers.7.moe_block.experts.0.linear_1.weight": "pytorch_model-00061.bin",
+    "model.layers.7.moe_block.experts.1.linear_1.weight": "pytorch_model-00061.bin",
+    "model.layers.7.moe_block.experts.2.linear_1.weight": "pytorch_model-00061.bin",
+    "model.layers.7.moe_block.experts.3.linear_1.weight": "pytorch_model-00061.bin",
+    "model.layers.7.moe_block.experts.4.linear_1.weight": "pytorch_model-00061.bin",
+    "model.layers.7.moe_block.experts.5.linear_1.weight": "pytorch_model-00061.bin",
+    "model.layers.7.moe_block.experts.6.linear_1.weight": "pytorch_model-00061.bin",
+    "model.layers.7.moe_block.experts.7.linear_1.weight": "pytorch_model-00061.bin",
+    "model.layers.7.moe_block.experts.0.linear_v.weight": "pytorch_model-00062.bin",
+    "model.layers.7.moe_block.experts.1.linear_v.weight": "pytorch_model-00062.bin",
+    "model.layers.7.moe_block.experts.2.linear_v.weight": "pytorch_model-00062.bin",
+    "model.layers.7.moe_block.experts.3.linear_v.weight": "pytorch_model-00062.bin",
+    "model.layers.7.moe_block.experts.4.linear_v.weight": "pytorch_model-00062.bin",
+    "model.layers.7.moe_block.experts.5.linear_v.weight": "pytorch_model-00062.bin",
+    "model.layers.7.moe_block.experts.6.linear_v.weight": "pytorch_model-00062.bin",
+    "model.layers.7.moe_block.experts.7.linear_v.weight": "pytorch_model-00062.bin",
+    "model.layers.7.attn.k_proj.weight": "pytorch_model-00062.bin",
+    "model.layers.7.attn.o_proj.weight": "pytorch_model-00062.bin",
+    "model.layers.7.attn.q_proj.weight": "pytorch_model-00062.bin",
+    "model.layers.7.attn.v_proj.weight": "pytorch_model-00062.bin",
+    "model.layers.7.pre_attn_norm.scale": "pytorch_model-00062.bin",
+    "model.layers.7.post_attn_norm.scale": "pytorch_model-00062.bin",
+    "model.layers.7.pre_moe_norm.scale": "pytorch_model-00062.bin",
+    "model.layers.7.post_moe_norm.scale": "pytorch_model-00062.bin",
+    "model.layers.7.moe_block.gate.weight": "pytorch_model-00062.bin",
+    "model.layers.8.moe_block.experts.0.linear.weight": "pytorch_model-00062.bin",
+    "model.layers.8.moe_block.experts.1.linear.weight": "pytorch_model-00062.bin",
+    "model.layers.8.moe_block.experts.2.linear.weight": "pytorch_model-00062.bin",
+    "model.layers.8.moe_block.experts.3.linear.weight": "pytorch_model-00062.bin",
+    "model.layers.8.moe_block.experts.4.linear.weight": "pytorch_model-00062.bin",
+    "model.layers.8.moe_block.experts.5.linear.weight": "pytorch_model-00062.bin",
+    "model.layers.8.moe_block.experts.6.linear.weight": "pytorch_model-00062.bin",
+    "model.layers.8.moe_block.experts.7.linear.weight": "pytorch_model-00062.bin",
+    "model.layers.8.moe_block.experts.0.linear_1.weight": "pytorch_model-00062.bin",
+    "model.layers.8.moe_block.experts.1.linear_1.weight": "pytorch_model-00062.bin",
+    "model.layers.8.moe_block.experts.2.linear_1.weight": "pytorch_model-00062.bin",
+    "model.layers.8.moe_block.experts.3.linear_1.weight": "pytorch_model-00062.bin",
+    "model.layers.8.moe_block.experts.4.linear_1.weight": "pytorch_model-00062.bin",
+    "model.layers.8.moe_block.experts.5.linear_1.weight": "pytorch_model-00062.bin",
+    "model.layers.8.moe_block.experts.6.linear_1.weight": "pytorch_model-00062.bin",
+    "model.layers.8.moe_block.experts.7.linear_1.weight": "pytorch_model-00062.bin",
+    "model.layers.8.moe_block.experts.0.linear_v.weight": "pytorch_model-00063.bin",
+    "model.layers.8.moe_block.experts.1.linear_v.weight": "pytorch_model-00063.bin",
+    "model.layers.8.moe_block.experts.2.linear_v.weight": "pytorch_model-00063.bin",
+    "model.layers.8.moe_block.experts.3.linear_v.weight": "pytorch_model-00063.bin",
+    "model.layers.8.moe_block.experts.4.linear_v.weight": "pytorch_model-00063.bin",
+    "model.layers.8.moe_block.experts.5.linear_v.weight": "pytorch_model-00063.bin",
+    "model.layers.8.moe_block.experts.6.linear_v.weight": "pytorch_model-00063.bin",
+    "model.layers.8.moe_block.experts.7.linear_v.weight": "pytorch_model-00063.bin",
+    "model.layers.8.attn.k_proj.weight": "pytorch_model-00063.bin",
+    "model.layers.8.attn.o_proj.weight": "pytorch_model-00063.bin",
+    "model.layers.8.attn.q_proj.weight": "pytorch_model-00063.bin",
+    "model.layers.8.attn.v_proj.weight": "pytorch_model-00063.bin",
+    "model.layers.8.pre_attn_norm.scale": "pytorch_model-00063.bin",
+    "model.layers.8.post_attn_norm.scale": "pytorch_model-00063.bin",
+    "model.layers.8.pre_moe_norm.scale": "pytorch_model-00063.bin",
+    "model.layers.8.post_moe_norm.scale": "pytorch_model-00063.bin",
+    "model.layers.8.moe_block.gate.weight": "pytorch_model-00063.bin",
+    "model.layers.9.moe_block.experts.0.linear.weight": "pytorch_model-00063.bin",
+    "model.layers.9.moe_block.experts.1.linear.weight": "pytorch_model-00063.bin",
+    "model.layers.9.moe_block.experts.2.linear.weight": "pytorch_model-00063.bin",
+    "model.layers.9.moe_block.experts.3.linear.weight": "pytorch_model-00063.bin",
+    "model.layers.9.moe_block.experts.4.linear.weight": "pytorch_model-00063.bin",
+    "model.layers.9.moe_block.experts.5.linear.weight": "pytorch_model-00063.bin",
+    "model.layers.9.moe_block.experts.6.linear.weight": "pytorch_model-00063.bin",
+    "model.layers.9.moe_block.experts.7.linear.weight": "pytorch_model-00063.bin",
+    "model.layers.9.moe_block.experts.0.linear_1.weight": "pytorch_model-00063.bin",
+    "model.layers.9.moe_block.experts.1.linear_1.weight": "pytorch_model-00063.bin",
+    "model.layers.9.moe_block.experts.2.linear_1.weight": "pytorch_model-00063.bin",
+    "model.layers.9.moe_block.experts.3.linear_1.weight": "pytorch_model-00063.bin",
+    "model.layers.9.moe_block.experts.4.linear_1.weight": "pytorch_model-00063.bin",
+    "model.layers.9.moe_block.experts.5.linear_1.weight": "pytorch_model-00063.bin",
+    "model.layers.9.moe_block.experts.6.linear_1.weight": "pytorch_model-00063.bin",
+    "model.layers.9.moe_block.experts.7.linear_1.weight": "pytorch_model-00063.bin",
+    "model.layers.9.moe_block.experts.0.linear_v.weight": "pytorch_model-00064.bin",
+    "model.layers.9.moe_block.experts.1.linear_v.weight": "pytorch_model-00064.bin",
+    "model.layers.9.moe_block.experts.2.linear_v.weight": "pytorch_model-00064.bin",
+    "model.layers.9.moe_block.experts.3.linear_v.weight": "pytorch_model-00064.bin",
+    "model.layers.9.moe_block.experts.4.linear_v.weight": "pytorch_model-00064.bin",
+    "model.layers.9.moe_block.experts.5.linear_v.weight": "pytorch_model-00064.bin",
+    "model.layers.9.moe_block.experts.6.linear_v.weight": "pytorch_model-00064.bin",
+    "model.layers.9.moe_block.experts.7.linear_v.weight": "pytorch_model-00064.bin",
+    "model.layers.9.attn.k_proj.weight": "pytorch_model-00064.bin",
+    "model.layers.9.attn.o_proj.weight": "pytorch_model-00064.bin",
+    "model.layers.9.attn.q_proj.weight": "pytorch_model-00064.bin",
+    "model.layers.9.attn.v_proj.weight": "pytorch_model-00064.bin",
+    "model.layers.9.pre_attn_norm.scale": "pytorch_model-00064.bin",
+    "model.layers.9.post_attn_norm.scale": "pytorch_model-00064.bin",
+    "model.layers.9.pre_moe_norm.scale": "pytorch_model-00064.bin",
+    "model.layers.9.post_moe_norm.scale": "pytorch_model-00064.bin",
+    "model.layers.9.moe_block.gate.weight": "pytorch_model-00064.bin"
+  }
+}
\ No newline at end of file