diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..f8a579ffc10debc47392fff541cacb301d60a592 --- /dev/null +++ b/config.json @@ -0,0 +1,31 @@ +{ + "_name_or_path": "SiguienteGlobal/mexa-22b", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 6144, + "initializer_range": 0.02, + "intermediate_size": 16384, + "max_position_embeddings": 32768, + "model_type": "mixtral", + "num_attention_heads": 48, + "num_experts_per_tok": 2, + "num_hidden_layers": 56, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 1000000.0, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": null, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.41.2", + "use_cache": true, + "vocab_size": 32768 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e8111d86778fdd940e8eea0d504bfb052fafd5af --- /dev/null +++ b/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "transformers_version": "4.41.2" +} diff --git a/model-00001-of-00059.safetensors b/model-00001-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..99a3a4987d982aaa61007928b936ba1bb2f88f54 --- /dev/null +++ b/model-00001-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa0c5b865b3efb881287cd5786dcd01bfa43b9a4505fff4d5eddc844742260ea +size 4806774160 diff --git a/model-00002-of-00059.safetensors b/model-00002-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0185082a2ca2413dbd514ffac11c46bf01864241 --- /dev/null +++ b/model-00002-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4713515299288ae29e8914468af16bbc0fc1e379b598dc7568ab038682190de9 +size 4806799120 diff --git a/model-00003-of-00059.safetensors b/model-00003-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9c3dd82d853c9e0bb47dbb005644eafb46c124d2 --- /dev/null +++ b/model-00003-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d75868cce7f52815ef90d40a4f12463d065baa1277a6e8e3b75ad6bc51f93417 +size 4806799120 diff --git a/model-00004-of-00059.safetensors b/model-00004-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..846596d0ca220cf4b747ddf15aa2040ac15224c5 --- /dev/null +++ b/model-00004-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d5c4a2d8b5f2fd1450b9f34de57d85b0c6f65225e9cd4cefc925b79dcebba2b +size 4806799120 diff --git a/model-00005-of-00059.safetensors b/model-00005-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1854a276d7070a50b0efe809e09b3bdfa728f922 --- /dev/null +++ b/model-00005-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec6210a78c9d4d2585905093f7c52f4c75fd128e58b8b13830e2001833f65b73 +size 4806799120 diff --git a/model-00006-of-00059.safetensors b/model-00006-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..828ee7e6864161855e09eca8d432451434910132 --- /dev/null +++ b/model-00006-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2865d6d61f421bedf67316a1d26a498cd119793ec609e0cf0e9d324eb6e855ce +size 4806799120 diff --git a/model-00007-of-00059.safetensors b/model-00007-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2d24923d8e68205ab35a1fc5447ab9b709d64f59 --- /dev/null +++ b/model-00007-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd85ab9d543f649ee82d3d85f63e13ab1e6dc2e77af09e28f0802529b15e6b13 +size 4806799120 diff --git a/model-00008-of-00059.safetensors b/model-00008-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a6fe5d28179944eae6facd92ef68c672dd2029c4 --- /dev/null +++ b/model-00008-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14e416e2045cd89f448f28dbddfdf01b8e6f54cda53248879a573d99c52d1747 +size 4806799120 diff --git a/model-00009-of-00059.safetensors b/model-00009-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..33e31ad45ea265be33700ec43a30db73d267518b --- /dev/null +++ b/model-00009-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c34b877f84b4d0978ea8bda19f02911e7bd52eb127f18f9de88067ab218413b +size 4806799120 diff --git a/model-00010-of-00059.safetensors b/model-00010-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..140877bdfacea75611eb455d0a6630ae28b46333 --- /dev/null +++ b/model-00010-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dba019699b472591acb3829340beee2a47e1c7ee3a38f2a789ec12dfab6cc493 +size 4806799120 diff --git a/model-00011-of-00059.safetensors b/model-00011-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6f380e1dfd1478903bff5ea24efdef429370fd6e --- /dev/null +++ b/model-00011-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ebf38855704daa2c8baf236db66bf9d6577d2f9f7142fd4293504eb8cdc6558 +size 4806799136 diff --git a/model-00012-of-00059.safetensors b/model-00012-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b303e64a7c0b5687f654174c5f64603cd3326260 --- /dev/null +++ b/model-00012-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8f64a66aa2eda0fc02b6d840cc4d48fc75571cb48793c15eec3adfdaa0bba27 +size 4806799152 diff --git a/model-00013-of-00059.safetensors b/model-00013-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3637d6436512eeb7decaf3c2535d90099f0f7701 --- /dev/null +++ b/model-00013-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f72363748b0cfcb795b1e2a3ba5af96f0a07cd354a53202ae4a08dae6661f587 +size 4806799152 diff --git a/model-00014-of-00059.safetensors b/model-00014-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..61f2daf7182a6ec5b7a3abcbaddfdf3b5e3aa818 --- /dev/null +++ b/model-00014-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c71eaa3055182f0296e31bff971c8d494beacdfa0b9ac53b7c772e59aefeb434 +size 4806799152 diff --git a/model-00015-of-00059.safetensors b/model-00015-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0ed8b5e9364bd409048ce358edcb31e08fb396b0 --- /dev/null +++ b/model-00015-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9657da97604c90b4b175ef09f0c33ee81032142b24b3d1442399d74d9b976988 +size 4806799152 diff --git a/model-00016-of-00059.safetensors b/model-00016-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5c557c19646c5c0a7048d9545714832c9bfe4b78 --- /dev/null +++ b/model-00016-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4cf455223e4b59b684b208a082e929c6a3ba7399a1f85b659bff36e5c41fb65 +size 4806799152 diff --git a/model-00017-of-00059.safetensors b/model-00017-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1830b3fa7b47d194d4d20ec0f9ed5e666a7c382b --- /dev/null +++ b/model-00017-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83caeb300f9f9d6fad333410f565ddd229cf7e00161ea77a73e560ae6f7307b8 +size 4806799152 diff --git a/model-00018-of-00059.safetensors b/model-00018-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4a010e0aaad2a9b70ed6977ca3194c3a5f4e48c2 --- /dev/null +++ b/model-00018-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f537ad9339f8250dc466290971e7e9c382a99fe65eef2e0373fbda5fd6889604 +size 4806799152 diff --git a/model-00019-of-00059.safetensors b/model-00019-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..65b89eec5b96f84c4571a69664fe40c4bd69e194 --- /dev/null +++ b/model-00019-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59383beb53781eed58615f1e3309a692d88071e32854d34a9277b39ea1b56446 +size 4806799152 diff --git a/model-00020-of-00059.safetensors b/model-00020-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d8b87829a6c1ecd60d2a7165fc95637ec8dec242 --- /dev/null +++ b/model-00020-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fbae147434ee2e48024e3e17b4e2d9b136c0fa1b81c26dc7feb9f4eca0db1ad +size 4806799152 diff --git a/model-00021-of-00059.safetensors b/model-00021-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1bbc8683248b7b5196c6f68f3f70680b73336d07 --- /dev/null +++ b/model-00021-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1aa0435f8f282421995c3a08acb057faec011050e17a7fd65775c47a94e4c7a +size 4806799152 diff --git a/model-00022-of-00059.safetensors b/model-00022-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..894aea59c1bd3e030f4da3a01c60448115c122f6 --- /dev/null +++ b/model-00022-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc1ad1fb70bf1fc3e94d33bf2cb39c3d2506a1489af9613a6d2f1e2adff19e97 +size 4806799152 diff --git a/model-00023-of-00059.safetensors b/model-00023-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8eb294044a4b754eb9d783863d76c41b64b1b63e --- /dev/null +++ b/model-00023-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff61fcd1549dac6759911ebc928bae7025a77142033b4170a75a273c7a04e26f +size 4932529864 diff --git a/model-00024-of-00059.safetensors b/model-00024-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3cee67b85e9d4a524b8c17808323898521deaa5c --- /dev/null +++ b/model-00024-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85616966020abbdaa65b4931016bc8127dc571c3aee26c5b87ba9b0f4a03a65c +size 4995542848 diff --git a/model-00025-of-00059.safetensors b/model-00025-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..59dcfa71f8dea784280d53b948436f180c5761b8 --- /dev/null +++ b/model-00025-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d72e96e5abc5809743c52d7cc4da1c4ee0cea03037893b780ec2660885c291d +size 4995542848 diff --git a/model-00026-of-00059.safetensors b/model-00026-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7ec97b83b20ea5eceaf6e2523828980151190d96 --- /dev/null +++ b/model-00026-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:292782929156f892b187a3d344c6d912be827bb506c6d29eee384a1f69dbb113 +size 4932628288 diff --git a/model-00027-of-00059.safetensors b/model-00027-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a0fdc3f9ebad03d44aeaf84719946cd0301d2303 --- /dev/null +++ b/model-00027-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d31c4d6971ad2d7e1df9b4637d164b21500869e8d95cc93f868b70b1a435c5a7 +size 4806774344 diff --git a/model-00028-of-00059.safetensors b/model-00028-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1599b692b9ffe9763225df6fd12e33975193b40c --- /dev/null +++ b/model-00028-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccefc4df39daefe49f0d94190c5fa9788e911598e513340dc41dc1b3e619ce94 +size 4806799144 diff --git a/model-00029-of-00059.safetensors b/model-00029-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8192cf2944cef50623e2021d18104877358f2333 --- /dev/null +++ b/model-00029-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ce44e1c095718d239ff55b49c162a45a6281af66af62b70e67438e81111fb36 +size 4806799144 diff --git a/model-00030-of-00059.safetensors b/model-00030-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6dd350f19e584f7e24786a30ab889457b9750a45 --- /dev/null +++ b/model-00030-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f2a95358e0f1489d4afc571192c914e9ef33cd6ff95d93b1b7b1d232dbcc11f +size 4806799144 diff --git a/model-00031-of-00059.safetensors b/model-00031-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..852551f39f3199c0955b73c5d105895b981c4cf4 --- /dev/null +++ b/model-00031-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae623beb60e9a57fd1d92c7aeb688149fa60a3e40d72780c94b15c1e5124380f +size 4806799144 diff --git a/model-00032-of-00059.safetensors b/model-00032-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..856aa99e476d35f91c79ccb5c89273d0efc8040e --- /dev/null +++ b/model-00032-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd91c7d51285d397680db02a9dfc8cf5f3260e0c079bfbda8d1d7b524ec782ab +size 4806799152 diff --git a/model-00033-of-00059.safetensors b/model-00033-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c662133ad386c9fbd797e140d996fdefeda51a3c --- /dev/null +++ b/model-00033-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6dceab36a2754700f2c32b56d3d0909880dfadb92b0b1690d33875a2ae69fb8c +size 4806799152 diff --git a/model-00034-of-00059.safetensors b/model-00034-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f15584469593f428e1f4f1c93c18ad71aa46a7c7 --- /dev/null +++ b/model-00034-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bf855208d0f0417b3eacf2bf714d11b2df8b06b6a7410f339744c98c0e7826e +size 4806799152 diff --git a/model-00035-of-00059.safetensors b/model-00035-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1fb1f09acf9cfc50de8242ca530e4af6f63e53f6 --- /dev/null +++ b/model-00035-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3813117440845858bd96e57afed0c98824359b51816184871b480c2c82823b53 +size 4806799152 diff --git a/model-00036-of-00059.safetensors b/model-00036-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..76c10bb154ac065b008fecbc43aea5f9c825bf5d --- /dev/null +++ b/model-00036-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a565c100d3a972852db88a38a76667c92a329ee5876d9a2b47f1076629075ad +size 4806799152 diff --git a/model-00037-of-00059.safetensors b/model-00037-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f55a6e1c54550e440e5fbdd9e08b09622bf552f0 --- /dev/null +++ b/model-00037-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06192f9c3045d6dad1a248d5bc3fbfe671e0a52d3fb94f660a354a15c4cfa456 +size 4806799152 diff --git a/model-00038-of-00059.safetensors b/model-00038-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..177f2daa76cfccca93c22438e8abc57b5b63b842 --- /dev/null +++ b/model-00038-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:786ad10f10e1cb5fb146ee12dacda3518a85005f92e06d2a32e539c56229a08c +size 4806799152 diff --git a/model-00039-of-00059.safetensors b/model-00039-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b6268bfa42b12a07252b72a2b6cd0ba3c438b7f2 --- /dev/null +++ b/model-00039-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c60f40d4b61eb80f6b9f543032c7eeb8a712189acaec4a818c30d26a33d8c60 +size 4806799152 diff --git a/model-00040-of-00059.safetensors b/model-00040-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..95e4b93cbd7cd80055aa972043f6877b05598b5b --- /dev/null +++ b/model-00040-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:423640868ac0a733f74d596aee0617114dc12c77ec1687d0ff686a75efee7b06 +size 4806799152 diff --git a/model-00041-of-00059.safetensors b/model-00041-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3cffb80e70f2e4157a2a38746511628776c634ad --- /dev/null +++ b/model-00041-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e991ac536f01a45664fef49c16166c65836a1e940ed3da78c2eee7ffc6b54994 +size 4806799152 diff --git a/model-00042-of-00059.safetensors b/model-00042-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..56505d079fe2d385dbc9c78e5d0d8c5848816ff5 --- /dev/null +++ b/model-00042-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7e3cab4613bee0e05448896e72a9b59db294b74609aea017a09ce22ce6da3e1 +size 4806799152 diff --git a/model-00043-of-00059.safetensors b/model-00043-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..14efa8c0ddf44d40780e5b112fe962a02aaeef62 --- /dev/null +++ b/model-00043-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a957f38b7fc8b79264f891eaa74d539f12991979194472d61fc1387adbe31dba +size 4806799152 diff --git a/model-00044-of-00059.safetensors b/model-00044-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..986c830bf626a538da633694d7ef08b164810a84 --- /dev/null +++ b/model-00044-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1286cc5aa66f39476c621fbcdb62c652636f432da072a83550d3b82360a5c13 +size 4806799152 diff --git a/model-00045-of-00059.safetensors b/model-00045-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..375ca257a255b6da50f66cb35a497a5cc76fb73a --- /dev/null +++ b/model-00045-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffb6376020a3da7b6ea413f2cdf2e411b04ae3c7c38e63ce3cfe2bbf0c9f86f4 +size 4806799152 diff --git a/model-00046-of-00059.safetensors b/model-00046-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..39328fd22ff9d560012dc5824fe6930c7114c933 --- /dev/null +++ b/model-00046-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1743e4569a5e47f57cb0bceffbb8fff92ffc176139e10fa6ccd415e89f558dad +size 4806799152 diff --git a/model-00047-of-00059.safetensors b/model-00047-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d2c674f2d8d18b89fb2902bf1db973d149b6c680 --- /dev/null +++ b/model-00047-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43280533b6ef99a57b29fb93497b2640fc331e8bcd67116bf60f358df6e3d482 +size 4806799152 diff --git a/model-00048-of-00059.safetensors b/model-00048-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f720f51087ffd66f62d4cb859bbfa01df3acf626 --- /dev/null +++ b/model-00048-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5905cbab9d3454f3a199fc5b1f8db4b032a2667445bc79c62d0f877001bd3781 +size 4806799152 diff --git a/model-00049-of-00059.safetensors b/model-00049-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d916b0dd2dd8b0882a5cfd9093c308dd88fc0cfc --- /dev/null +++ b/model-00049-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a690ce1965991c615e4680b73f706b1bb608938b09e66a6d9315162d4e4f4f9 +size 4806799152 diff --git a/model-00050-of-00059.safetensors b/model-00050-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b2aacf9ebadf2e38bacf05c9ae3a09a529a16b2b --- /dev/null +++ b/model-00050-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59d67db9825f3d06e63ae3a40dade2d6d574414baa8c74860940a6761b6250b3 +size 4806799152 diff --git a/model-00051-of-00059.safetensors b/model-00051-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e9d4ed2bae657b2139add73cb50847c16fdd38c6 --- /dev/null +++ b/model-00051-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:251c6f3e0231dd8cce9b9d82dd57d8c15616bbbccdf1decfad1a94a34e11bc84 +size 4932529864 diff --git a/model-00052-of-00059.safetensors b/model-00052-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c64e3e44442d05221948fae4cc94ff8cab020ce2 --- /dev/null +++ b/model-00052-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f8693c79ce7a5cf34595995df98e8c87d56b3317df45db583546f82935ee49e +size 4995542848 diff --git a/model-00053-of-00059.safetensors b/model-00053-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cc55c430255dc3f22961d82f51a335d35a3c5f7c --- /dev/null +++ b/model-00053-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d6633923b8db03880edb6f0796ac5a8e5ccd8ec2ffbc49fb63a723f3243b08d +size 4995542848 diff --git a/model-00054-of-00059.safetensors b/model-00054-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9417bdf074e49638a5bfc15c18e06789029e433a --- /dev/null +++ b/model-00054-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:681188aef0582c7aa1e525d7c966f8af263bbeae3845df04c17c7c71c8da3a22 +size 4932628288 diff --git a/model-00055-of-00059.safetensors b/model-00055-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b2ff56e8267da8e40415308c6611072e425b5be8 --- /dev/null +++ b/model-00055-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efb74f5a1dfcd6e176e8922205fd55d0a27f8843cc79767b168d8b7f47eb4eef +size 4806774344 diff --git a/model-00056-of-00059.safetensors b/model-00056-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fd22b1879509d0934f572b68a935b82cf2e58396 --- /dev/null +++ b/model-00056-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f65557bece2f6934bd26a23c36039d43b8714a094c3eea3188fed2be7c45e734 +size 4806799144 diff --git a/model-00057-of-00059.safetensors b/model-00057-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ad3644f0eb6dc3ea20d75580ef9e9433edacee7f --- /dev/null +++ b/model-00057-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d66b6771ab5282691341fd564aa89b3d0cef6518dead734c8190477b3da2554 +size 4806799144 diff --git a/model-00058-of-00059.safetensors b/model-00058-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b6501b237e5b76ed23e0a0372a4ec60ef89cc7e0 --- /dev/null +++ b/model-00058-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9b5e0888fb994fc260a3e8bd0e399a28c3426ab5afa5d1e860163d6905191e8 +size 4806799144 diff --git a/model-00059-of-00059.safetensors b/model-00059-of-00059.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d90d8fbeded5f6a6d9c3ef130eb9f5e1703fe1b1 --- /dev/null +++ b/model-00059-of-00059.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:893a2a7dc11f2869f5b2dbe197c4692f4aa7a1888726f8c1a45ff8e8bc6b1faa +size 1207997392 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..e227b6ef31a4a52f46d86fbcf5d1ab6c295560fe --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,1746 @@ +{ + "metadata": { + "total_size": 281260142592 + }, + "weight_map": { + "lm_head.weight": "model-00059-of-00059.safetensors", + "model.embed_tokens.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.4.w1.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.4.w2.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.5.w1.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.5.w2.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.6.w1.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.6.w2.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.7.w1.weight": "model-00002-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.7.w2.weight": "model-00002-of-00059.safetensors", + "model.layers.0.block_sparse_moe.experts.7.w3.weight": "model-00002-of-00059.safetensors", + "model.layers.0.block_sparse_moe.gate.weight": "model-00001-of-00059.safetensors", + "model.layers.0.input_layernorm.weight": "model-00002-of-00059.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00002-of-00059.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00059.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00059.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00059.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.0.w1.weight": "model-00002-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.0.w2.weight": "model-00002-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.0.w3.weight": "model-00002-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.1.w1.weight": "model-00002-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.1.w2.weight": "model-00002-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.1.w3.weight": "model-00002-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.2.w1.weight": "model-00002-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.2.w2.weight": "model-00002-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.2.w3.weight": "model-00002-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.3.w1.weight": "model-00002-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.3.w2.weight": "model-00002-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.3.w3.weight": "model-00002-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.4.w1.weight": "model-00002-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.4.w2.weight": "model-00002-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.4.w3.weight": "model-00002-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.5.w1.weight": "model-00002-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.5.w2.weight": "model-00002-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.5.w3.weight": "model-00002-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.6.w1.weight": "model-00002-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.6.w2.weight": "model-00002-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.6.w3.weight": "model-00003-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.7.w1.weight": "model-00003-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.7.w2.weight": "model-00003-of-00059.safetensors", + "model.layers.1.block_sparse_moe.experts.7.w3.weight": "model-00003-of-00059.safetensors", + "model.layers.1.block_sparse_moe.gate.weight": "model-00002-of-00059.safetensors", + "model.layers.1.input_layernorm.weight": "model-00003-of-00059.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00003-of-00059.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00002-of-00059.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00002-of-00059.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00002-of-00059.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00002-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.0.w1.weight": "model-00011-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.0.w2.weight": "model-00011-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.0.w3.weight": "model-00011-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.1.w1.weight": "model-00011-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.1.w2.weight": "model-00011-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.1.w3.weight": "model-00011-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.2.w1.weight": "model-00011-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.2.w2.weight": "model-00011-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.2.w3.weight": "model-00011-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.3.w1.weight": "model-00011-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.3.w2.weight": "model-00011-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.3.w3.weight": "model-00012-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.4.w1.weight": "model-00012-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.4.w2.weight": "model-00012-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.4.w3.weight": "model-00012-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.5.w1.weight": "model-00012-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.5.w2.weight": "model-00012-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.5.w3.weight": "model-00012-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.6.w1.weight": "model-00012-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.6.w2.weight": "model-00012-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.6.w3.weight": "model-00012-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.7.w1.weight": "model-00012-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.7.w2.weight": "model-00012-of-00059.safetensors", + "model.layers.10.block_sparse_moe.experts.7.w3.weight": "model-00012-of-00059.safetensors", + "model.layers.10.block_sparse_moe.gate.weight": "model-00011-of-00059.safetensors", + "model.layers.10.input_layernorm.weight": "model-00012-of-00059.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00012-of-00059.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00011-of-00059.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00011-of-00059.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00011-of-00059.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00011-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.0.w1.weight": "model-00012-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.0.w2.weight": "model-00012-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.0.w3.weight": "model-00012-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.1.w1.weight": "model-00012-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.1.w2.weight": "model-00012-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.1.w3.weight": "model-00012-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.2.w1.weight": "model-00012-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.2.w2.weight": "model-00012-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.2.w3.weight": "model-00012-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.3.w1.weight": "model-00012-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.3.w2.weight": "model-00013-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.3.w3.weight": "model-00013-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.4.w1.weight": "model-00013-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.4.w2.weight": "model-00013-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.4.w3.weight": "model-00013-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.5.w1.weight": "model-00013-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.5.w2.weight": "model-00013-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.5.w3.weight": "model-00013-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.6.w1.weight": "model-00013-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.6.w2.weight": "model-00013-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.6.w3.weight": "model-00013-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.7.w1.weight": "model-00013-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.7.w2.weight": "model-00013-of-00059.safetensors", + "model.layers.11.block_sparse_moe.experts.7.w3.weight": "model-00013-of-00059.safetensors", + "model.layers.11.block_sparse_moe.gate.weight": "model-00012-of-00059.safetensors", + "model.layers.11.input_layernorm.weight": "model-00013-of-00059.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00013-of-00059.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00012-of-00059.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00012-of-00059.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00012-of-00059.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00012-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.0.w1.weight": "model-00013-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.0.w2.weight": "model-00013-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.0.w3.weight": "model-00013-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.1.w1.weight": "model-00013-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.1.w2.weight": "model-00013-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.1.w3.weight": "model-00013-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.2.w1.weight": "model-00013-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.2.w2.weight": "model-00013-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.2.w3.weight": "model-00013-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.3.w1.weight": "model-00014-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.3.w2.weight": "model-00014-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.3.w3.weight": "model-00014-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.4.w1.weight": "model-00014-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.4.w2.weight": "model-00014-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.4.w3.weight": "model-00014-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.5.w1.weight": "model-00014-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.5.w2.weight": "model-00014-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.5.w3.weight": "model-00014-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.6.w1.weight": "model-00014-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.6.w2.weight": "model-00014-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.6.w3.weight": "model-00014-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.7.w1.weight": "model-00014-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.7.w2.weight": "model-00014-of-00059.safetensors", + "model.layers.12.block_sparse_moe.experts.7.w3.weight": "model-00014-of-00059.safetensors", + "model.layers.12.block_sparse_moe.gate.weight": "model-00013-of-00059.safetensors", + "model.layers.12.input_layernorm.weight": "model-00014-of-00059.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00014-of-00059.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00013-of-00059.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00013-of-00059.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00013-of-00059.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00013-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.0.w1.weight": "model-00014-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.0.w2.weight": "model-00014-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.0.w3.weight": "model-00014-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.1.w1.weight": "model-00014-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.1.w2.weight": "model-00014-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.1.w3.weight": "model-00014-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.2.w1.weight": "model-00014-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.2.w2.weight": "model-00014-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.2.w3.weight": "model-00015-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.3.w1.weight": "model-00015-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.3.w2.weight": "model-00015-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.3.w3.weight": "model-00015-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.4.w1.weight": "model-00015-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.4.w2.weight": "model-00015-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.4.w3.weight": "model-00015-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.5.w1.weight": "model-00015-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.5.w2.weight": "model-00015-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.5.w3.weight": "model-00015-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.6.w1.weight": "model-00015-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.6.w2.weight": "model-00015-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.6.w3.weight": "model-00015-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.7.w1.weight": "model-00015-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.7.w2.weight": "model-00015-of-00059.safetensors", + "model.layers.13.block_sparse_moe.experts.7.w3.weight": "model-00015-of-00059.safetensors", + "model.layers.13.block_sparse_moe.gate.weight": "model-00014-of-00059.safetensors", + "model.layers.13.input_layernorm.weight": "model-00015-of-00059.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00015-of-00059.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00014-of-00059.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00014-of-00059.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00014-of-00059.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00014-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.0.w1.weight": "model-00015-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.0.w2.weight": "model-00015-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.0.w3.weight": "model-00015-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.1.w1.weight": "model-00015-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.1.w2.weight": "model-00015-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.1.w3.weight": "model-00015-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.2.w1.weight": "model-00015-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.2.w2.weight": "model-00016-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.2.w3.weight": "model-00016-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.3.w1.weight": "model-00016-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.3.w2.weight": "model-00016-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.3.w3.weight": "model-00016-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.4.w1.weight": "model-00016-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.4.w2.weight": "model-00016-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.4.w3.weight": "model-00016-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.5.w1.weight": "model-00016-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.5.w2.weight": "model-00016-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.5.w3.weight": "model-00016-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.6.w1.weight": "model-00016-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.6.w2.weight": "model-00016-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.6.w3.weight": "model-00016-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.7.w1.weight": "model-00016-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.7.w2.weight": "model-00016-of-00059.safetensors", + "model.layers.14.block_sparse_moe.experts.7.w3.weight": "model-00016-of-00059.safetensors", + "model.layers.14.block_sparse_moe.gate.weight": "model-00015-of-00059.safetensors", + "model.layers.14.input_layernorm.weight": "model-00016-of-00059.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00016-of-00059.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00015-of-00059.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00015-of-00059.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00015-of-00059.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00015-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.0.w1.weight": "model-00016-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.0.w2.weight": "model-00016-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.0.w3.weight": "model-00016-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.1.w1.weight": "model-00016-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.1.w2.weight": "model-00016-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.1.w3.weight": "model-00016-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.2.w1.weight": "model-00017-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.2.w2.weight": "model-00017-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.2.w3.weight": "model-00017-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.3.w1.weight": "model-00017-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.3.w2.weight": "model-00017-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.3.w3.weight": "model-00017-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.4.w1.weight": "model-00017-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.4.w2.weight": "model-00017-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.4.w3.weight": "model-00017-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.5.w1.weight": "model-00017-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.5.w2.weight": "model-00017-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.5.w3.weight": "model-00017-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.6.w1.weight": "model-00017-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.6.w2.weight": "model-00017-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.6.w3.weight": "model-00017-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.7.w1.weight": "model-00017-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.7.w2.weight": "model-00017-of-00059.safetensors", + "model.layers.15.block_sparse_moe.experts.7.w3.weight": "model-00017-of-00059.safetensors", + "model.layers.15.block_sparse_moe.gate.weight": "model-00016-of-00059.safetensors", + "model.layers.15.input_layernorm.weight": "model-00017-of-00059.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00017-of-00059.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00016-of-00059.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00016-of-00059.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00016-of-00059.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00016-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.0.w1.weight": "model-00017-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.0.w2.weight": "model-00017-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.0.w3.weight": "model-00017-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.1.w1.weight": "model-00017-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.1.w2.weight": "model-00017-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.1.w3.weight": "model-00018-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.2.w1.weight": "model-00018-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.2.w2.weight": "model-00018-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.2.w3.weight": "model-00018-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.3.w1.weight": "model-00018-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.3.w2.weight": "model-00018-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.3.w3.weight": "model-00018-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.4.w1.weight": "model-00018-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.4.w2.weight": "model-00018-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.4.w3.weight": "model-00018-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.5.w1.weight": "model-00018-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.5.w2.weight": "model-00018-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.5.w3.weight": "model-00018-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.6.w1.weight": "model-00018-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.6.w2.weight": "model-00018-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.6.w3.weight": "model-00018-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.7.w1.weight": "model-00018-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.7.w2.weight": "model-00018-of-00059.safetensors", + "model.layers.16.block_sparse_moe.experts.7.w3.weight": "model-00018-of-00059.safetensors", + "model.layers.16.block_sparse_moe.gate.weight": "model-00017-of-00059.safetensors", + "model.layers.16.input_layernorm.weight": "model-00018-of-00059.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00018-of-00059.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00017-of-00059.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00017-of-00059.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00017-of-00059.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00017-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.0.w1.weight": "model-00018-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.0.w2.weight": "model-00018-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.0.w3.weight": "model-00018-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.1.w1.weight": "model-00018-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.1.w2.weight": "model-00019-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.1.w3.weight": "model-00019-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.2.w1.weight": "model-00019-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.2.w2.weight": "model-00019-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.2.w3.weight": "model-00019-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.3.w1.weight": "model-00019-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.3.w2.weight": "model-00019-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.3.w3.weight": "model-00019-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.4.w1.weight": "model-00019-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.4.w2.weight": "model-00019-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.4.w3.weight": "model-00019-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.5.w1.weight": "model-00019-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.5.w2.weight": "model-00019-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.5.w3.weight": "model-00019-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.6.w1.weight": "model-00019-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.6.w2.weight": "model-00019-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.6.w3.weight": "model-00019-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.7.w1.weight": "model-00019-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.7.w2.weight": "model-00019-of-00059.safetensors", + "model.layers.17.block_sparse_moe.experts.7.w3.weight": "model-00019-of-00059.safetensors", + "model.layers.17.block_sparse_moe.gate.weight": "model-00018-of-00059.safetensors", + "model.layers.17.input_layernorm.weight": "model-00019-of-00059.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00019-of-00059.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00018-of-00059.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00018-of-00059.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00018-of-00059.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00018-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.0.w1.weight": "model-00019-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.0.w2.weight": "model-00019-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.0.w3.weight": "model-00019-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.1.w1.weight": "model-00020-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.1.w2.weight": "model-00020-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.1.w3.weight": "model-00020-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.2.w1.weight": "model-00020-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.2.w2.weight": "model-00020-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.2.w3.weight": "model-00020-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.3.w1.weight": "model-00020-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.3.w2.weight": "model-00020-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.3.w3.weight": "model-00020-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.4.w1.weight": "model-00020-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.4.w2.weight": "model-00020-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.4.w3.weight": "model-00020-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.5.w1.weight": "model-00020-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.5.w2.weight": "model-00020-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.5.w3.weight": "model-00020-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.6.w1.weight": "model-00020-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.6.w2.weight": "model-00020-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.6.w3.weight": "model-00020-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.7.w1.weight": "model-00020-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.7.w2.weight": "model-00020-of-00059.safetensors", + "model.layers.18.block_sparse_moe.experts.7.w3.weight": "model-00020-of-00059.safetensors", + "model.layers.18.block_sparse_moe.gate.weight": "model-00019-of-00059.safetensors", + "model.layers.18.input_layernorm.weight": "model-00020-of-00059.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00020-of-00059.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00019-of-00059.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00019-of-00059.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00019-of-00059.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00019-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.0.w1.weight": "model-00020-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.0.w2.weight": "model-00020-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.0.w3.weight": "model-00021-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.1.w1.weight": "model-00021-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.1.w2.weight": "model-00021-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.1.w3.weight": "model-00021-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.2.w1.weight": "model-00021-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.2.w2.weight": "model-00021-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.2.w3.weight": "model-00021-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.3.w1.weight": "model-00021-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.3.w2.weight": "model-00021-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.3.w3.weight": "model-00021-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.4.w1.weight": "model-00021-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.4.w2.weight": "model-00021-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.4.w3.weight": "model-00021-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.5.w1.weight": "model-00021-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.5.w2.weight": "model-00021-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.5.w3.weight": "model-00021-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.6.w1.weight": "model-00021-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.6.w2.weight": "model-00021-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.6.w3.weight": "model-00021-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.7.w1.weight": "model-00021-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.7.w2.weight": "model-00021-of-00059.safetensors", + "model.layers.19.block_sparse_moe.experts.7.w3.weight": "model-00021-of-00059.safetensors", + "model.layers.19.block_sparse_moe.gate.weight": "model-00020-of-00059.safetensors", + "model.layers.19.input_layernorm.weight": "model-00021-of-00059.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00021-of-00059.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00020-of-00059.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00020-of-00059.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00020-of-00059.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00020-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.0.w1.weight": "model-00003-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.0.w2.weight": "model-00003-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.0.w3.weight": "model-00003-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.1.w1.weight": "model-00003-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.1.w2.weight": "model-00003-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.1.w3.weight": "model-00003-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.2.w1.weight": "model-00003-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.2.w2.weight": "model-00003-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.2.w3.weight": "model-00003-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.3.w1.weight": "model-00003-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.3.w2.weight": "model-00003-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.3.w3.weight": "model-00003-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.4.w1.weight": "model-00003-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.4.w2.weight": "model-00003-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.4.w3.weight": "model-00003-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.5.w1.weight": "model-00003-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.5.w2.weight": "model-00003-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.5.w3.weight": "model-00003-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.6.w1.weight": "model-00003-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.6.w2.weight": "model-00004-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.6.w3.weight": "model-00004-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.7.w1.weight": "model-00004-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.7.w2.weight": "model-00004-of-00059.safetensors", + "model.layers.2.block_sparse_moe.experts.7.w3.weight": "model-00004-of-00059.safetensors", + "model.layers.2.block_sparse_moe.gate.weight": "model-00003-of-00059.safetensors", + "model.layers.2.input_layernorm.weight": "model-00004-of-00059.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00004-of-00059.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00003-of-00059.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00003-of-00059.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00003-of-00059.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00003-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.0.w1.weight": "model-00021-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.0.w2.weight": "model-00022-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.0.w3.weight": "model-00022-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.1.w1.weight": "model-00022-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.1.w2.weight": "model-00022-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.1.w3.weight": "model-00022-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.2.w1.weight": "model-00022-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.2.w2.weight": "model-00022-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.2.w3.weight": "model-00022-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.3.w1.weight": "model-00022-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.3.w2.weight": "model-00022-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.3.w3.weight": "model-00022-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.4.w1.weight": "model-00022-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.4.w2.weight": "model-00022-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.4.w3.weight": "model-00022-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.5.w1.weight": "model-00022-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.5.w2.weight": "model-00022-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.5.w3.weight": "model-00022-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.6.w1.weight": "model-00022-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.6.w2.weight": "model-00022-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.6.w3.weight": "model-00022-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.7.w1.weight": "model-00022-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.7.w2.weight": "model-00022-of-00059.safetensors", + "model.layers.20.block_sparse_moe.experts.7.w3.weight": "model-00022-of-00059.safetensors", + "model.layers.20.block_sparse_moe.gate.weight": "model-00021-of-00059.safetensors", + "model.layers.20.input_layernorm.weight": "model-00022-of-00059.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00022-of-00059.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00021-of-00059.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00021-of-00059.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00021-of-00059.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00021-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.0.w1.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.0.w2.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.0.w3.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.1.w1.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.1.w2.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.1.w3.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.2.w1.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.2.w2.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.2.w3.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.3.w1.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.3.w2.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.3.w3.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.4.w1.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.4.w2.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.4.w3.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.5.w1.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.5.w2.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.5.w3.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.6.w1.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.6.w2.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.6.w3.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.7.w1.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.7.w2.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.experts.7.w3.weight": "model-00023-of-00059.safetensors", + "model.layers.21.block_sparse_moe.gate.weight": "model-00022-of-00059.safetensors", + "model.layers.21.input_layernorm.weight": "model-00023-of-00059.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00023-of-00059.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00022-of-00059.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00022-of-00059.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00022-of-00059.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00022-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.0.w1.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.0.w2.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.0.w3.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.1.w1.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.1.w2.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.1.w3.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.2.w1.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.2.w2.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.2.w3.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.3.w1.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.3.w2.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.3.w3.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.4.w1.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.4.w2.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.4.w3.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.5.w1.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.5.w2.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.5.w3.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.6.w1.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.6.w2.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.6.w3.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.7.w1.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.7.w2.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.experts.7.w3.weight": "model-00024-of-00059.safetensors", + "model.layers.22.block_sparse_moe.gate.weight": "model-00024-of-00059.safetensors", + "model.layers.22.input_layernorm.weight": "model-00024-of-00059.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00024-of-00059.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00023-of-00059.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00024-of-00059.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00023-of-00059.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00023-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.0.w1.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.0.w2.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.0.w3.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.1.w1.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.1.w2.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.1.w3.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.2.w1.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.2.w2.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.2.w3.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.3.w1.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.3.w2.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.3.w3.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.4.w1.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.4.w2.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.4.w3.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.5.w1.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.5.w2.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.5.w3.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.6.w1.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.6.w2.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.6.w3.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.7.w1.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.7.w2.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.experts.7.w3.weight": "model-00025-of-00059.safetensors", + "model.layers.23.block_sparse_moe.gate.weight": "model-00025-of-00059.safetensors", + "model.layers.23.input_layernorm.weight": "model-00025-of-00059.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00025-of-00059.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00024-of-00059.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00025-of-00059.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00024-of-00059.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00025-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.0.w1.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.0.w2.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.0.w3.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.1.w1.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.1.w2.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.1.w3.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.2.w1.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.2.w2.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.2.w3.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.3.w1.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.3.w2.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.3.w3.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.4.w1.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.4.w2.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.4.w3.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.5.w1.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.5.w2.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.5.w3.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.6.w1.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.6.w2.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.6.w3.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.7.w1.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.7.w2.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.experts.7.w3.weight": "model-00026-of-00059.safetensors", + "model.layers.24.block_sparse_moe.gate.weight": "model-00026-of-00059.safetensors", + "model.layers.24.input_layernorm.weight": "model-00026-of-00059.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00026-of-00059.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00026-of-00059.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00026-of-00059.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00025-of-00059.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00026-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.0.w1.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.0.w2.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.0.w3.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.1.w1.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.1.w2.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.1.w3.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.2.w1.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.2.w2.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.2.w3.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.3.w1.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.3.w2.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.3.w3.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.4.w1.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.4.w2.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.4.w3.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.5.w1.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.5.w2.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.5.w3.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.6.w1.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.6.w2.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.6.w3.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.7.w1.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.7.w2.weight": "model-00027-of-00059.safetensors", + "model.layers.25.block_sparse_moe.experts.7.w3.weight": "model-00028-of-00059.safetensors", + "model.layers.25.block_sparse_moe.gate.weight": "model-00027-of-00059.safetensors", + "model.layers.25.input_layernorm.weight": "model-00028-of-00059.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00028-of-00059.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00027-of-00059.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00027-of-00059.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00027-of-00059.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00027-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.0.w1.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.0.w2.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.0.w3.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.1.w1.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.1.w2.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.1.w3.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.2.w1.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.2.w2.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.2.w3.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.3.w1.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.3.w2.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.3.w3.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.4.w1.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.4.w2.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.4.w3.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.5.w1.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.5.w2.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.5.w3.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.6.w1.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.6.w2.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.6.w3.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.7.w1.weight": "model-00028-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.7.w2.weight": "model-00029-of-00059.safetensors", + "model.layers.26.block_sparse_moe.experts.7.w3.weight": "model-00029-of-00059.safetensors", + "model.layers.26.block_sparse_moe.gate.weight": "model-00028-of-00059.safetensors", + "model.layers.26.input_layernorm.weight": "model-00029-of-00059.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00029-of-00059.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00028-of-00059.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00028-of-00059.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00028-of-00059.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00028-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.0.w1.weight": "model-00029-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.0.w2.weight": "model-00029-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.0.w3.weight": "model-00029-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.1.w1.weight": "model-00029-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.1.w2.weight": "model-00029-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.1.w3.weight": "model-00029-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.2.w1.weight": "model-00029-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.2.w2.weight": "model-00029-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.2.w3.weight": "model-00029-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.3.w1.weight": "model-00029-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.3.w2.weight": "model-00029-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.3.w3.weight": "model-00029-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.4.w1.weight": "model-00029-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.4.w2.weight": "model-00029-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.4.w3.weight": "model-00029-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.5.w1.weight": "model-00029-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.5.w2.weight": "model-00029-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.5.w3.weight": "model-00029-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.6.w1.weight": "model-00029-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.6.w2.weight": "model-00029-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.6.w3.weight": "model-00029-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.7.w1.weight": "model-00030-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.7.w2.weight": "model-00030-of-00059.safetensors", + "model.layers.27.block_sparse_moe.experts.7.w3.weight": "model-00030-of-00059.safetensors", + "model.layers.27.block_sparse_moe.gate.weight": "model-00029-of-00059.safetensors", + "model.layers.27.input_layernorm.weight": "model-00030-of-00059.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00030-of-00059.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00029-of-00059.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00029-of-00059.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00029-of-00059.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00029-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.0.w1.weight": "model-00030-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.0.w2.weight": "model-00030-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.0.w3.weight": "model-00030-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.1.w1.weight": "model-00030-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.1.w2.weight": "model-00030-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.1.w3.weight": "model-00030-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.2.w1.weight": "model-00030-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.2.w2.weight": "model-00030-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.2.w3.weight": "model-00030-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.3.w1.weight": "model-00030-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.3.w2.weight": "model-00030-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.3.w3.weight": "model-00030-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.4.w1.weight": "model-00030-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.4.w2.weight": "model-00030-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.4.w3.weight": "model-00030-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.5.w1.weight": "model-00030-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.5.w2.weight": "model-00030-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.5.w3.weight": "model-00030-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.6.w1.weight": "model-00030-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.6.w2.weight": "model-00030-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.6.w3.weight": "model-00031-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.7.w1.weight": "model-00031-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.7.w2.weight": "model-00031-of-00059.safetensors", + "model.layers.28.block_sparse_moe.experts.7.w3.weight": "model-00031-of-00059.safetensors", + "model.layers.28.block_sparse_moe.gate.weight": "model-00030-of-00059.safetensors", + "model.layers.28.input_layernorm.weight": "model-00031-of-00059.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00031-of-00059.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00030-of-00059.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00030-of-00059.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00030-of-00059.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00030-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.0.w1.weight": "model-00031-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.0.w2.weight": "model-00031-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.0.w3.weight": "model-00031-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.1.w1.weight": "model-00031-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.1.w2.weight": "model-00031-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.1.w3.weight": "model-00031-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.2.w1.weight": "model-00031-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.2.w2.weight": "model-00031-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.2.w3.weight": "model-00031-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.3.w1.weight": "model-00031-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.3.w2.weight": "model-00031-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.3.w3.weight": "model-00031-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.4.w1.weight": "model-00031-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.4.w2.weight": "model-00031-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.4.w3.weight": "model-00031-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.5.w1.weight": "model-00031-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.5.w2.weight": "model-00031-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.5.w3.weight": "model-00031-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.6.w1.weight": "model-00031-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.6.w2.weight": "model-00032-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.6.w3.weight": "model-00032-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.7.w1.weight": "model-00032-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.7.w2.weight": "model-00032-of-00059.safetensors", + "model.layers.29.block_sparse_moe.experts.7.w3.weight": "model-00032-of-00059.safetensors", + "model.layers.29.block_sparse_moe.gate.weight": "model-00031-of-00059.safetensors", + "model.layers.29.input_layernorm.weight": "model-00032-of-00059.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00032-of-00059.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00031-of-00059.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00031-of-00059.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00031-of-00059.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00031-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.0.w1.weight": "model-00004-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.0.w2.weight": "model-00004-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.0.w3.weight": "model-00004-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.1.w1.weight": "model-00004-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.1.w2.weight": "model-00004-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.1.w3.weight": "model-00004-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.2.w1.weight": "model-00004-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.2.w2.weight": "model-00004-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.2.w3.weight": "model-00004-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.3.w1.weight": "model-00004-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.3.w2.weight": "model-00004-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.3.w3.weight": "model-00004-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.4.w1.weight": "model-00004-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.4.w2.weight": "model-00004-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.4.w3.weight": "model-00004-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.5.w1.weight": "model-00004-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.5.w2.weight": "model-00004-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.5.w3.weight": "model-00004-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.6.w1.weight": "model-00005-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.6.w2.weight": "model-00005-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.6.w3.weight": "model-00005-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.7.w1.weight": "model-00005-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.7.w2.weight": "model-00005-of-00059.safetensors", + "model.layers.3.block_sparse_moe.experts.7.w3.weight": "model-00005-of-00059.safetensors", + "model.layers.3.block_sparse_moe.gate.weight": "model-00004-of-00059.safetensors", + "model.layers.3.input_layernorm.weight": "model-00005-of-00059.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00005-of-00059.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00004-of-00059.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00004-of-00059.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00004-of-00059.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00004-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.0.w1.weight": "model-00032-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.0.w2.weight": "model-00032-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.0.w3.weight": "model-00032-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.1.w1.weight": "model-00032-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.1.w2.weight": "model-00032-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.1.w3.weight": "model-00032-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.2.w1.weight": "model-00032-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.2.w2.weight": "model-00032-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.2.w3.weight": "model-00032-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.3.w1.weight": "model-00032-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.3.w2.weight": "model-00032-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.3.w3.weight": "model-00032-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.4.w1.weight": "model-00032-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.4.w2.weight": "model-00032-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.4.w3.weight": "model-00032-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.5.w1.weight": "model-00032-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.5.w2.weight": "model-00032-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.5.w3.weight": "model-00032-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.6.w1.weight": "model-00033-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.6.w2.weight": "model-00033-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.6.w3.weight": "model-00033-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.7.w1.weight": "model-00033-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.7.w2.weight": "model-00033-of-00059.safetensors", + "model.layers.30.block_sparse_moe.experts.7.w3.weight": "model-00033-of-00059.safetensors", + "model.layers.30.block_sparse_moe.gate.weight": "model-00032-of-00059.safetensors", + "model.layers.30.input_layernorm.weight": "model-00033-of-00059.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00033-of-00059.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00032-of-00059.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00032-of-00059.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00032-of-00059.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00032-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.0.w1.weight": "model-00033-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.0.w2.weight": "model-00033-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.0.w3.weight": "model-00033-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.1.w1.weight": "model-00033-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.1.w2.weight": "model-00033-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.1.w3.weight": "model-00033-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.2.w1.weight": "model-00033-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.2.w2.weight": "model-00033-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.2.w3.weight": "model-00033-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.3.w1.weight": "model-00033-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.3.w2.weight": "model-00033-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.3.w3.weight": "model-00033-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.4.w1.weight": "model-00033-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.4.w2.weight": "model-00033-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.4.w3.weight": "model-00033-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.5.w1.weight": "model-00033-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.5.w2.weight": "model-00033-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.5.w3.weight": "model-00034-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.6.w1.weight": "model-00034-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.6.w2.weight": "model-00034-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.6.w3.weight": "model-00034-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.7.w1.weight": "model-00034-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.7.w2.weight": "model-00034-of-00059.safetensors", + "model.layers.31.block_sparse_moe.experts.7.w3.weight": "model-00034-of-00059.safetensors", + "model.layers.31.block_sparse_moe.gate.weight": "model-00033-of-00059.safetensors", + "model.layers.31.input_layernorm.weight": "model-00034-of-00059.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00034-of-00059.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00033-of-00059.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00033-of-00059.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00033-of-00059.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00033-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.0.w1.weight": "model-00034-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.0.w2.weight": "model-00034-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.0.w3.weight": "model-00034-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.1.w1.weight": "model-00034-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.1.w2.weight": "model-00034-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.1.w3.weight": "model-00034-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.2.w1.weight": "model-00034-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.2.w2.weight": "model-00034-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.2.w3.weight": "model-00034-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.3.w1.weight": "model-00034-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.3.w2.weight": "model-00034-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.3.w3.weight": "model-00034-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.4.w1.weight": "model-00034-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.4.w2.weight": "model-00034-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.4.w3.weight": "model-00034-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.5.w1.weight": "model-00034-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.5.w2.weight": "model-00035-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.5.w3.weight": "model-00035-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.6.w1.weight": "model-00035-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.6.w2.weight": "model-00035-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.6.w3.weight": "model-00035-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.7.w1.weight": "model-00035-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.7.w2.weight": "model-00035-of-00059.safetensors", + "model.layers.32.block_sparse_moe.experts.7.w3.weight": "model-00035-of-00059.safetensors", + "model.layers.32.block_sparse_moe.gate.weight": "model-00034-of-00059.safetensors", + "model.layers.32.input_layernorm.weight": "model-00035-of-00059.safetensors", + "model.layers.32.post_attention_layernorm.weight": "model-00035-of-00059.safetensors", + "model.layers.32.self_attn.k_proj.weight": "model-00034-of-00059.safetensors", + "model.layers.32.self_attn.o_proj.weight": "model-00034-of-00059.safetensors", + "model.layers.32.self_attn.q_proj.weight": "model-00034-of-00059.safetensors", + "model.layers.32.self_attn.v_proj.weight": "model-00034-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.0.w1.weight": "model-00035-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.0.w2.weight": "model-00035-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.0.w3.weight": "model-00035-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.1.w1.weight": "model-00035-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.1.w2.weight": "model-00035-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.1.w3.weight": "model-00035-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.2.w1.weight": "model-00035-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.2.w2.weight": "model-00035-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.2.w3.weight": "model-00035-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.3.w1.weight": "model-00035-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.3.w2.weight": "model-00035-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.3.w3.weight": "model-00035-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.4.w1.weight": "model-00035-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.4.w2.weight": "model-00035-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.4.w3.weight": "model-00035-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.5.w1.weight": "model-00036-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.5.w2.weight": "model-00036-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.5.w3.weight": "model-00036-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.6.w1.weight": "model-00036-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.6.w2.weight": "model-00036-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.6.w3.weight": "model-00036-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.7.w1.weight": "model-00036-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.7.w2.weight": "model-00036-of-00059.safetensors", + "model.layers.33.block_sparse_moe.experts.7.w3.weight": "model-00036-of-00059.safetensors", + "model.layers.33.block_sparse_moe.gate.weight": "model-00035-of-00059.safetensors", + "model.layers.33.input_layernorm.weight": "model-00036-of-00059.safetensors", + "model.layers.33.post_attention_layernorm.weight": "model-00036-of-00059.safetensors", + "model.layers.33.self_attn.k_proj.weight": "model-00035-of-00059.safetensors", + "model.layers.33.self_attn.o_proj.weight": "model-00035-of-00059.safetensors", + "model.layers.33.self_attn.q_proj.weight": "model-00035-of-00059.safetensors", + "model.layers.33.self_attn.v_proj.weight": "model-00035-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.0.w1.weight": "model-00036-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.0.w2.weight": "model-00036-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.0.w3.weight": "model-00036-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.1.w1.weight": "model-00036-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.1.w2.weight": "model-00036-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.1.w3.weight": "model-00036-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.2.w1.weight": "model-00036-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.2.w2.weight": "model-00036-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.2.w3.weight": "model-00036-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.3.w1.weight": "model-00036-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.3.w2.weight": "model-00036-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.3.w3.weight": "model-00036-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.4.w1.weight": "model-00036-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.4.w2.weight": "model-00036-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.4.w3.weight": "model-00037-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.5.w1.weight": "model-00037-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.5.w2.weight": "model-00037-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.5.w3.weight": "model-00037-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.6.w1.weight": "model-00037-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.6.w2.weight": "model-00037-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.6.w3.weight": "model-00037-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.7.w1.weight": "model-00037-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.7.w2.weight": "model-00037-of-00059.safetensors", + "model.layers.34.block_sparse_moe.experts.7.w3.weight": "model-00037-of-00059.safetensors", + "model.layers.34.block_sparse_moe.gate.weight": "model-00036-of-00059.safetensors", + "model.layers.34.input_layernorm.weight": "model-00037-of-00059.safetensors", + "model.layers.34.post_attention_layernorm.weight": "model-00037-of-00059.safetensors", + "model.layers.34.self_attn.k_proj.weight": "model-00036-of-00059.safetensors", + "model.layers.34.self_attn.o_proj.weight": "model-00036-of-00059.safetensors", + "model.layers.34.self_attn.q_proj.weight": "model-00036-of-00059.safetensors", + "model.layers.34.self_attn.v_proj.weight": "model-00036-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.0.w1.weight": "model-00037-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.0.w2.weight": "model-00037-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.0.w3.weight": "model-00037-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.1.w1.weight": "model-00037-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.1.w2.weight": "model-00037-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.1.w3.weight": "model-00037-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.2.w1.weight": "model-00037-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.2.w2.weight": "model-00037-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.2.w3.weight": "model-00037-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.3.w1.weight": "model-00037-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.3.w2.weight": "model-00037-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.3.w3.weight": "model-00037-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.4.w1.weight": "model-00037-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.4.w2.weight": "model-00038-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.4.w3.weight": "model-00038-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.5.w1.weight": "model-00038-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.5.w2.weight": "model-00038-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.5.w3.weight": "model-00038-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.6.w1.weight": "model-00038-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.6.w2.weight": "model-00038-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.6.w3.weight": "model-00038-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.7.w1.weight": "model-00038-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.7.w2.weight": "model-00038-of-00059.safetensors", + "model.layers.35.block_sparse_moe.experts.7.w3.weight": "model-00038-of-00059.safetensors", + "model.layers.35.block_sparse_moe.gate.weight": "model-00037-of-00059.safetensors", + "model.layers.35.input_layernorm.weight": "model-00038-of-00059.safetensors", + "model.layers.35.post_attention_layernorm.weight": "model-00038-of-00059.safetensors", + "model.layers.35.self_attn.k_proj.weight": "model-00037-of-00059.safetensors", + "model.layers.35.self_attn.o_proj.weight": "model-00037-of-00059.safetensors", + "model.layers.35.self_attn.q_proj.weight": "model-00037-of-00059.safetensors", + "model.layers.35.self_attn.v_proj.weight": "model-00037-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.0.w1.weight": "model-00038-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.0.w2.weight": "model-00038-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.0.w3.weight": "model-00038-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.1.w1.weight": "model-00038-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.1.w2.weight": "model-00038-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.1.w3.weight": "model-00038-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.2.w1.weight": "model-00038-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.2.w2.weight": "model-00038-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.2.w3.weight": "model-00038-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.3.w1.weight": "model-00038-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.3.w2.weight": "model-00038-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.3.w3.weight": "model-00038-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.4.w1.weight": "model-00039-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.4.w2.weight": "model-00039-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.4.w3.weight": "model-00039-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.5.w1.weight": "model-00039-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.5.w2.weight": "model-00039-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.5.w3.weight": "model-00039-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.6.w1.weight": "model-00039-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.6.w2.weight": "model-00039-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.6.w3.weight": "model-00039-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.7.w1.weight": "model-00039-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.7.w2.weight": "model-00039-of-00059.safetensors", + "model.layers.36.block_sparse_moe.experts.7.w3.weight": "model-00039-of-00059.safetensors", + "model.layers.36.block_sparse_moe.gate.weight": "model-00038-of-00059.safetensors", + "model.layers.36.input_layernorm.weight": "model-00039-of-00059.safetensors", + "model.layers.36.post_attention_layernorm.weight": "model-00039-of-00059.safetensors", + "model.layers.36.self_attn.k_proj.weight": "model-00038-of-00059.safetensors", + "model.layers.36.self_attn.o_proj.weight": "model-00038-of-00059.safetensors", + "model.layers.36.self_attn.q_proj.weight": "model-00038-of-00059.safetensors", + "model.layers.36.self_attn.v_proj.weight": "model-00038-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.0.w1.weight": "model-00039-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.0.w2.weight": "model-00039-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.0.w3.weight": "model-00039-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.1.w1.weight": "model-00039-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.1.w2.weight": "model-00039-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.1.w3.weight": "model-00039-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.2.w1.weight": "model-00039-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.2.w2.weight": "model-00039-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.2.w3.weight": "model-00039-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.3.w1.weight": "model-00039-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.3.w2.weight": "model-00039-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.3.w3.weight": "model-00040-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.4.w1.weight": "model-00040-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.4.w2.weight": "model-00040-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.4.w3.weight": "model-00040-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.5.w1.weight": "model-00040-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.5.w2.weight": "model-00040-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.5.w3.weight": "model-00040-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.6.w1.weight": "model-00040-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.6.w2.weight": "model-00040-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.6.w3.weight": "model-00040-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.7.w1.weight": "model-00040-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.7.w2.weight": "model-00040-of-00059.safetensors", + "model.layers.37.block_sparse_moe.experts.7.w3.weight": "model-00040-of-00059.safetensors", + "model.layers.37.block_sparse_moe.gate.weight": "model-00039-of-00059.safetensors", + "model.layers.37.input_layernorm.weight": "model-00040-of-00059.safetensors", + "model.layers.37.post_attention_layernorm.weight": "model-00040-of-00059.safetensors", + "model.layers.37.self_attn.k_proj.weight": "model-00039-of-00059.safetensors", + "model.layers.37.self_attn.o_proj.weight": "model-00039-of-00059.safetensors", + "model.layers.37.self_attn.q_proj.weight": "model-00039-of-00059.safetensors", + "model.layers.37.self_attn.v_proj.weight": "model-00039-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.0.w1.weight": "model-00040-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.0.w2.weight": "model-00040-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.0.w3.weight": "model-00040-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.1.w1.weight": "model-00040-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.1.w2.weight": "model-00040-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.1.w3.weight": "model-00040-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.2.w1.weight": "model-00040-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.2.w2.weight": "model-00040-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.2.w3.weight": "model-00040-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.3.w1.weight": "model-00040-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.3.w2.weight": "model-00041-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.3.w3.weight": "model-00041-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.4.w1.weight": "model-00041-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.4.w2.weight": "model-00041-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.4.w3.weight": "model-00041-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.5.w1.weight": "model-00041-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.5.w2.weight": "model-00041-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.5.w3.weight": "model-00041-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.6.w1.weight": "model-00041-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.6.w2.weight": "model-00041-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.6.w3.weight": "model-00041-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.7.w1.weight": "model-00041-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.7.w2.weight": "model-00041-of-00059.safetensors", + "model.layers.38.block_sparse_moe.experts.7.w3.weight": "model-00041-of-00059.safetensors", + "model.layers.38.block_sparse_moe.gate.weight": "model-00040-of-00059.safetensors", + "model.layers.38.input_layernorm.weight": "model-00041-of-00059.safetensors", + "model.layers.38.post_attention_layernorm.weight": "model-00041-of-00059.safetensors", + "model.layers.38.self_attn.k_proj.weight": "model-00040-of-00059.safetensors", + "model.layers.38.self_attn.o_proj.weight": "model-00040-of-00059.safetensors", + "model.layers.38.self_attn.q_proj.weight": "model-00040-of-00059.safetensors", + "model.layers.38.self_attn.v_proj.weight": "model-00040-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.0.w1.weight": "model-00041-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.0.w2.weight": "model-00041-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.0.w3.weight": "model-00041-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.1.w1.weight": "model-00041-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.1.w2.weight": "model-00041-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.1.w3.weight": "model-00041-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.2.w1.weight": "model-00041-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.2.w2.weight": "model-00041-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.2.w3.weight": "model-00041-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.3.w1.weight": "model-00042-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.3.w2.weight": "model-00042-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.3.w3.weight": "model-00042-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.4.w1.weight": "model-00042-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.4.w2.weight": "model-00042-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.4.w3.weight": "model-00042-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.5.w1.weight": "model-00042-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.5.w2.weight": "model-00042-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.5.w3.weight": "model-00042-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.6.w1.weight": "model-00042-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.6.w2.weight": "model-00042-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.6.w3.weight": "model-00042-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.7.w1.weight": "model-00042-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.7.w2.weight": "model-00042-of-00059.safetensors", + "model.layers.39.block_sparse_moe.experts.7.w3.weight": "model-00042-of-00059.safetensors", + "model.layers.39.block_sparse_moe.gate.weight": "model-00041-of-00059.safetensors", + "model.layers.39.input_layernorm.weight": "model-00042-of-00059.safetensors", + "model.layers.39.post_attention_layernorm.weight": "model-00042-of-00059.safetensors", + "model.layers.39.self_attn.k_proj.weight": "model-00041-of-00059.safetensors", + "model.layers.39.self_attn.o_proj.weight": "model-00041-of-00059.safetensors", + "model.layers.39.self_attn.q_proj.weight": "model-00041-of-00059.safetensors", + "model.layers.39.self_attn.v_proj.weight": "model-00041-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.0.w1.weight": "model-00005-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.0.w2.weight": "model-00005-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.0.w3.weight": "model-00005-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.1.w1.weight": "model-00005-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.1.w2.weight": "model-00005-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.1.w3.weight": "model-00005-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.2.w1.weight": "model-00005-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.2.w2.weight": "model-00005-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.2.w3.weight": "model-00005-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.3.w1.weight": "model-00005-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.3.w2.weight": "model-00005-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.3.w3.weight": "model-00005-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.4.w1.weight": "model-00005-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.4.w2.weight": "model-00005-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.4.w3.weight": "model-00005-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.5.w1.weight": "model-00005-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.5.w2.weight": "model-00005-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.5.w3.weight": "model-00006-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.6.w1.weight": "model-00006-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.6.w2.weight": "model-00006-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.6.w3.weight": "model-00006-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.7.w1.weight": "model-00006-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.7.w2.weight": "model-00006-of-00059.safetensors", + "model.layers.4.block_sparse_moe.experts.7.w3.weight": "model-00006-of-00059.safetensors", + "model.layers.4.block_sparse_moe.gate.weight": "model-00005-of-00059.safetensors", + "model.layers.4.input_layernorm.weight": "model-00006-of-00059.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00006-of-00059.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00005-of-00059.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00005-of-00059.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00005-of-00059.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00005-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.0.w1.weight": "model-00042-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.0.w2.weight": "model-00042-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.0.w3.weight": "model-00042-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.1.w1.weight": "model-00042-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.1.w2.weight": "model-00042-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.1.w3.weight": "model-00042-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.2.w1.weight": "model-00042-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.2.w2.weight": "model-00042-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.2.w3.weight": "model-00043-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.3.w1.weight": "model-00043-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.3.w2.weight": "model-00043-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.3.w3.weight": "model-00043-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.4.w1.weight": "model-00043-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.4.w2.weight": "model-00043-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.4.w3.weight": "model-00043-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.5.w1.weight": "model-00043-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.5.w2.weight": "model-00043-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.5.w3.weight": "model-00043-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.6.w1.weight": "model-00043-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.6.w2.weight": "model-00043-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.6.w3.weight": "model-00043-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.7.w1.weight": "model-00043-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.7.w2.weight": "model-00043-of-00059.safetensors", + "model.layers.40.block_sparse_moe.experts.7.w3.weight": "model-00043-of-00059.safetensors", + "model.layers.40.block_sparse_moe.gate.weight": "model-00042-of-00059.safetensors", + "model.layers.40.input_layernorm.weight": "model-00043-of-00059.safetensors", + "model.layers.40.post_attention_layernorm.weight": "model-00043-of-00059.safetensors", + "model.layers.40.self_attn.k_proj.weight": "model-00042-of-00059.safetensors", + "model.layers.40.self_attn.o_proj.weight": "model-00042-of-00059.safetensors", + "model.layers.40.self_attn.q_proj.weight": "model-00042-of-00059.safetensors", + "model.layers.40.self_attn.v_proj.weight": "model-00042-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.0.w1.weight": "model-00043-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.0.w2.weight": "model-00043-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.0.w3.weight": "model-00043-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.1.w1.weight": "model-00043-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.1.w2.weight": "model-00043-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.1.w3.weight": "model-00043-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.2.w1.weight": "model-00043-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.2.w2.weight": "model-00044-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.2.w3.weight": "model-00044-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.3.w1.weight": "model-00044-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.3.w2.weight": "model-00044-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.3.w3.weight": "model-00044-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.4.w1.weight": "model-00044-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.4.w2.weight": "model-00044-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.4.w3.weight": "model-00044-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.5.w1.weight": "model-00044-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.5.w2.weight": "model-00044-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.5.w3.weight": "model-00044-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.6.w1.weight": "model-00044-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.6.w2.weight": "model-00044-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.6.w3.weight": "model-00044-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.7.w1.weight": "model-00044-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.7.w2.weight": "model-00044-of-00059.safetensors", + "model.layers.41.block_sparse_moe.experts.7.w3.weight": "model-00044-of-00059.safetensors", + "model.layers.41.block_sparse_moe.gate.weight": "model-00043-of-00059.safetensors", + "model.layers.41.input_layernorm.weight": "model-00044-of-00059.safetensors", + "model.layers.41.post_attention_layernorm.weight": "model-00044-of-00059.safetensors", + "model.layers.41.self_attn.k_proj.weight": "model-00043-of-00059.safetensors", + "model.layers.41.self_attn.o_proj.weight": "model-00043-of-00059.safetensors", + "model.layers.41.self_attn.q_proj.weight": "model-00043-of-00059.safetensors", + "model.layers.41.self_attn.v_proj.weight": "model-00043-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.0.w1.weight": "model-00044-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.0.w2.weight": "model-00044-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.0.w3.weight": "model-00044-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.1.w1.weight": "model-00044-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.1.w2.weight": "model-00044-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.1.w3.weight": "model-00044-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.2.w1.weight": "model-00045-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.2.w2.weight": "model-00045-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.2.w3.weight": "model-00045-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.3.w1.weight": "model-00045-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.3.w2.weight": "model-00045-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.3.w3.weight": "model-00045-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.4.w1.weight": "model-00045-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.4.w2.weight": "model-00045-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.4.w3.weight": "model-00045-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.5.w1.weight": "model-00045-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.5.w2.weight": "model-00045-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.5.w3.weight": "model-00045-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.6.w1.weight": "model-00045-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.6.w2.weight": "model-00045-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.6.w3.weight": "model-00045-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.7.w1.weight": "model-00045-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.7.w2.weight": "model-00045-of-00059.safetensors", + "model.layers.42.block_sparse_moe.experts.7.w3.weight": "model-00045-of-00059.safetensors", + "model.layers.42.block_sparse_moe.gate.weight": "model-00044-of-00059.safetensors", + "model.layers.42.input_layernorm.weight": "model-00045-of-00059.safetensors", + "model.layers.42.post_attention_layernorm.weight": "model-00045-of-00059.safetensors", + "model.layers.42.self_attn.k_proj.weight": "model-00044-of-00059.safetensors", + "model.layers.42.self_attn.o_proj.weight": "model-00044-of-00059.safetensors", + "model.layers.42.self_attn.q_proj.weight": "model-00044-of-00059.safetensors", + "model.layers.42.self_attn.v_proj.weight": "model-00044-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.0.w1.weight": "model-00045-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.0.w2.weight": "model-00045-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.0.w3.weight": "model-00045-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.1.w1.weight": "model-00045-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.1.w2.weight": "model-00045-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.1.w3.weight": "model-00046-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.2.w1.weight": "model-00046-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.2.w2.weight": "model-00046-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.2.w3.weight": "model-00046-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.3.w1.weight": "model-00046-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.3.w2.weight": "model-00046-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.3.w3.weight": "model-00046-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.4.w1.weight": "model-00046-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.4.w2.weight": "model-00046-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.4.w3.weight": "model-00046-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.5.w1.weight": "model-00046-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.5.w2.weight": "model-00046-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.5.w3.weight": "model-00046-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.6.w1.weight": "model-00046-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.6.w2.weight": "model-00046-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.6.w3.weight": "model-00046-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.7.w1.weight": "model-00046-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.7.w2.weight": "model-00046-of-00059.safetensors", + "model.layers.43.block_sparse_moe.experts.7.w3.weight": "model-00046-of-00059.safetensors", + "model.layers.43.block_sparse_moe.gate.weight": "model-00045-of-00059.safetensors", + "model.layers.43.input_layernorm.weight": "model-00046-of-00059.safetensors", + "model.layers.43.post_attention_layernorm.weight": "model-00046-of-00059.safetensors", + "model.layers.43.self_attn.k_proj.weight": "model-00045-of-00059.safetensors", + "model.layers.43.self_attn.o_proj.weight": "model-00045-of-00059.safetensors", + "model.layers.43.self_attn.q_proj.weight": "model-00045-of-00059.safetensors", + "model.layers.43.self_attn.v_proj.weight": "model-00045-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.0.w1.weight": "model-00046-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.0.w2.weight": "model-00046-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.0.w3.weight": "model-00046-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.1.w1.weight": "model-00046-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.1.w2.weight": "model-00047-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.1.w3.weight": "model-00047-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.2.w1.weight": "model-00047-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.2.w2.weight": "model-00047-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.2.w3.weight": "model-00047-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.3.w1.weight": "model-00047-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.3.w2.weight": "model-00047-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.3.w3.weight": "model-00047-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.4.w1.weight": "model-00047-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.4.w2.weight": "model-00047-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.4.w3.weight": "model-00047-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.5.w1.weight": "model-00047-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.5.w2.weight": "model-00047-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.5.w3.weight": "model-00047-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.6.w1.weight": "model-00047-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.6.w2.weight": "model-00047-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.6.w3.weight": "model-00047-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.7.w1.weight": "model-00047-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.7.w2.weight": "model-00047-of-00059.safetensors", + "model.layers.44.block_sparse_moe.experts.7.w3.weight": "model-00047-of-00059.safetensors", + "model.layers.44.block_sparse_moe.gate.weight": "model-00046-of-00059.safetensors", + "model.layers.44.input_layernorm.weight": "model-00047-of-00059.safetensors", + "model.layers.44.post_attention_layernorm.weight": "model-00047-of-00059.safetensors", + "model.layers.44.self_attn.k_proj.weight": "model-00046-of-00059.safetensors", + "model.layers.44.self_attn.o_proj.weight": "model-00046-of-00059.safetensors", + "model.layers.44.self_attn.q_proj.weight": "model-00046-of-00059.safetensors", + "model.layers.44.self_attn.v_proj.weight": "model-00046-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.0.w1.weight": "model-00047-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.0.w2.weight": "model-00047-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.0.w3.weight": "model-00047-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.1.w1.weight": "model-00048-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.1.w2.weight": "model-00048-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.1.w3.weight": "model-00048-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.2.w1.weight": "model-00048-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.2.w2.weight": "model-00048-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.2.w3.weight": "model-00048-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.3.w1.weight": "model-00048-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.3.w2.weight": "model-00048-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.3.w3.weight": "model-00048-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.4.w1.weight": "model-00048-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.4.w2.weight": "model-00048-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.4.w3.weight": "model-00048-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.5.w1.weight": "model-00048-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.5.w2.weight": "model-00048-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.5.w3.weight": "model-00048-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.6.w1.weight": "model-00048-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.6.w2.weight": "model-00048-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.6.w3.weight": "model-00048-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.7.w1.weight": "model-00048-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.7.w2.weight": "model-00048-of-00059.safetensors", + "model.layers.45.block_sparse_moe.experts.7.w3.weight": "model-00048-of-00059.safetensors", + "model.layers.45.block_sparse_moe.gate.weight": "model-00047-of-00059.safetensors", + "model.layers.45.input_layernorm.weight": "model-00048-of-00059.safetensors", + "model.layers.45.post_attention_layernorm.weight": "model-00048-of-00059.safetensors", + "model.layers.45.self_attn.k_proj.weight": "model-00047-of-00059.safetensors", + "model.layers.45.self_attn.o_proj.weight": "model-00047-of-00059.safetensors", + "model.layers.45.self_attn.q_proj.weight": "model-00047-of-00059.safetensors", + "model.layers.45.self_attn.v_proj.weight": "model-00047-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.0.w1.weight": "model-00048-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.0.w2.weight": "model-00048-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.0.w3.weight": "model-00049-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.1.w1.weight": "model-00049-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.1.w2.weight": "model-00049-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.1.w3.weight": "model-00049-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.2.w1.weight": "model-00049-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.2.w2.weight": "model-00049-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.2.w3.weight": "model-00049-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.3.w1.weight": "model-00049-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.3.w2.weight": "model-00049-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.3.w3.weight": "model-00049-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.4.w1.weight": "model-00049-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.4.w2.weight": "model-00049-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.4.w3.weight": "model-00049-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.5.w1.weight": "model-00049-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.5.w2.weight": "model-00049-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.5.w3.weight": "model-00049-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.6.w1.weight": "model-00049-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.6.w2.weight": "model-00049-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.6.w3.weight": "model-00049-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.7.w1.weight": "model-00049-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.7.w2.weight": "model-00049-of-00059.safetensors", + "model.layers.46.block_sparse_moe.experts.7.w3.weight": "model-00049-of-00059.safetensors", + "model.layers.46.block_sparse_moe.gate.weight": "model-00048-of-00059.safetensors", + "model.layers.46.input_layernorm.weight": "model-00049-of-00059.safetensors", + "model.layers.46.post_attention_layernorm.weight": "model-00049-of-00059.safetensors", + "model.layers.46.self_attn.k_proj.weight": "model-00048-of-00059.safetensors", + "model.layers.46.self_attn.o_proj.weight": "model-00048-of-00059.safetensors", + "model.layers.46.self_attn.q_proj.weight": "model-00048-of-00059.safetensors", + "model.layers.46.self_attn.v_proj.weight": "model-00048-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.0.w1.weight": "model-00049-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.0.w2.weight": "model-00050-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.0.w3.weight": "model-00050-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.1.w1.weight": "model-00050-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.1.w2.weight": "model-00050-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.1.w3.weight": "model-00050-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.2.w1.weight": "model-00050-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.2.w2.weight": "model-00050-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.2.w3.weight": "model-00050-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.3.w1.weight": "model-00050-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.3.w2.weight": "model-00050-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.3.w3.weight": "model-00050-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.4.w1.weight": "model-00050-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.4.w2.weight": "model-00050-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.4.w3.weight": "model-00050-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.5.w1.weight": "model-00050-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.5.w2.weight": "model-00050-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.5.w3.weight": "model-00050-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.6.w1.weight": "model-00050-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.6.w2.weight": "model-00050-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.6.w3.weight": "model-00050-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.7.w1.weight": "model-00050-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.7.w2.weight": "model-00050-of-00059.safetensors", + "model.layers.47.block_sparse_moe.experts.7.w3.weight": "model-00050-of-00059.safetensors", + "model.layers.47.block_sparse_moe.gate.weight": "model-00049-of-00059.safetensors", + "model.layers.47.input_layernorm.weight": "model-00050-of-00059.safetensors", + "model.layers.47.post_attention_layernorm.weight": "model-00050-of-00059.safetensors", + "model.layers.47.self_attn.k_proj.weight": "model-00049-of-00059.safetensors", + "model.layers.47.self_attn.o_proj.weight": "model-00049-of-00059.safetensors", + "model.layers.47.self_attn.q_proj.weight": "model-00049-of-00059.safetensors", + "model.layers.47.self_attn.v_proj.weight": "model-00049-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.0.w1.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.0.w2.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.0.w3.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.1.w1.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.1.w2.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.1.w3.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.2.w1.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.2.w2.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.2.w3.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.3.w1.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.3.w2.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.3.w3.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.4.w1.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.4.w2.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.4.w3.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.5.w1.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.5.w2.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.5.w3.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.6.w1.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.6.w2.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.6.w3.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.7.w1.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.7.w2.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.experts.7.w3.weight": "model-00051-of-00059.safetensors", + "model.layers.48.block_sparse_moe.gate.weight": "model-00050-of-00059.safetensors", + "model.layers.48.input_layernorm.weight": "model-00051-of-00059.safetensors", + "model.layers.48.post_attention_layernorm.weight": "model-00051-of-00059.safetensors", + "model.layers.48.self_attn.k_proj.weight": "model-00050-of-00059.safetensors", + "model.layers.48.self_attn.o_proj.weight": "model-00050-of-00059.safetensors", + "model.layers.48.self_attn.q_proj.weight": "model-00050-of-00059.safetensors", + "model.layers.48.self_attn.v_proj.weight": "model-00050-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.0.w1.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.0.w2.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.0.w3.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.1.w1.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.1.w2.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.1.w3.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.2.w1.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.2.w2.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.2.w3.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.3.w1.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.3.w2.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.3.w3.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.4.w1.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.4.w2.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.4.w3.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.5.w1.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.5.w2.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.5.w3.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.6.w1.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.6.w2.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.6.w3.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.7.w1.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.7.w2.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.experts.7.w3.weight": "model-00052-of-00059.safetensors", + "model.layers.49.block_sparse_moe.gate.weight": "model-00052-of-00059.safetensors", + "model.layers.49.input_layernorm.weight": "model-00052-of-00059.safetensors", + "model.layers.49.post_attention_layernorm.weight": "model-00052-of-00059.safetensors", + "model.layers.49.self_attn.k_proj.weight": "model-00051-of-00059.safetensors", + "model.layers.49.self_attn.o_proj.weight": "model-00052-of-00059.safetensors", + "model.layers.49.self_attn.q_proj.weight": "model-00051-of-00059.safetensors", + "model.layers.49.self_attn.v_proj.weight": "model-00051-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.0.w1.weight": "model-00006-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.0.w2.weight": "model-00006-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.0.w3.weight": "model-00006-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.1.w1.weight": "model-00006-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.1.w2.weight": "model-00006-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.1.w3.weight": "model-00006-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.2.w1.weight": "model-00006-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.2.w2.weight": "model-00006-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.2.w3.weight": "model-00006-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.3.w1.weight": "model-00006-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.3.w2.weight": "model-00006-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.3.w3.weight": "model-00006-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.4.w1.weight": "model-00006-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.4.w2.weight": "model-00006-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.4.w3.weight": "model-00006-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.5.w1.weight": "model-00006-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.5.w2.weight": "model-00007-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.5.w3.weight": "model-00007-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.6.w1.weight": "model-00007-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.6.w2.weight": "model-00007-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.6.w3.weight": "model-00007-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.7.w1.weight": "model-00007-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.7.w2.weight": "model-00007-of-00059.safetensors", + "model.layers.5.block_sparse_moe.experts.7.w3.weight": "model-00007-of-00059.safetensors", + "model.layers.5.block_sparse_moe.gate.weight": "model-00006-of-00059.safetensors", + "model.layers.5.input_layernorm.weight": "model-00007-of-00059.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00007-of-00059.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00006-of-00059.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00006-of-00059.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00006-of-00059.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00006-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.0.w1.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.0.w2.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.0.w3.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.1.w1.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.1.w2.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.1.w3.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.2.w1.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.2.w2.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.2.w3.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.3.w1.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.3.w2.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.3.w3.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.4.w1.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.4.w2.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.4.w3.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.5.w1.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.5.w2.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.5.w3.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.6.w1.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.6.w2.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.6.w3.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.7.w1.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.7.w2.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.experts.7.w3.weight": "model-00053-of-00059.safetensors", + "model.layers.50.block_sparse_moe.gate.weight": "model-00053-of-00059.safetensors", + "model.layers.50.input_layernorm.weight": "model-00053-of-00059.safetensors", + "model.layers.50.post_attention_layernorm.weight": "model-00053-of-00059.safetensors", + "model.layers.50.self_attn.k_proj.weight": "model-00052-of-00059.safetensors", + "model.layers.50.self_attn.o_proj.weight": "model-00053-of-00059.safetensors", + "model.layers.50.self_attn.q_proj.weight": "model-00052-of-00059.safetensors", + "model.layers.50.self_attn.v_proj.weight": "model-00053-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.0.w1.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.0.w2.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.0.w3.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.1.w1.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.1.w2.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.1.w3.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.2.w1.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.2.w2.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.2.w3.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.3.w1.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.3.w2.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.3.w3.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.4.w1.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.4.w2.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.4.w3.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.5.w1.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.5.w2.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.5.w3.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.6.w1.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.6.w2.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.6.w3.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.7.w1.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.7.w2.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.experts.7.w3.weight": "model-00054-of-00059.safetensors", + "model.layers.51.block_sparse_moe.gate.weight": "model-00054-of-00059.safetensors", + "model.layers.51.input_layernorm.weight": "model-00054-of-00059.safetensors", + "model.layers.51.post_attention_layernorm.weight": "model-00054-of-00059.safetensors", + "model.layers.51.self_attn.k_proj.weight": "model-00054-of-00059.safetensors", + "model.layers.51.self_attn.o_proj.weight": "model-00054-of-00059.safetensors", + "model.layers.51.self_attn.q_proj.weight": "model-00053-of-00059.safetensors", + "model.layers.51.self_attn.v_proj.weight": "model-00054-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.0.w1.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.0.w2.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.0.w3.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.1.w1.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.1.w2.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.1.w3.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.2.w1.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.2.w2.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.2.w3.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.3.w1.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.3.w2.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.3.w3.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.4.w1.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.4.w2.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.4.w3.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.5.w1.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.5.w2.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.5.w3.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.6.w1.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.6.w2.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.6.w3.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.7.w1.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.7.w2.weight": "model-00055-of-00059.safetensors", + "model.layers.52.block_sparse_moe.experts.7.w3.weight": "model-00056-of-00059.safetensors", + "model.layers.52.block_sparse_moe.gate.weight": "model-00055-of-00059.safetensors", + "model.layers.52.input_layernorm.weight": "model-00056-of-00059.safetensors", + "model.layers.52.post_attention_layernorm.weight": "model-00056-of-00059.safetensors", + "model.layers.52.self_attn.k_proj.weight": "model-00055-of-00059.safetensors", + "model.layers.52.self_attn.o_proj.weight": "model-00055-of-00059.safetensors", + "model.layers.52.self_attn.q_proj.weight": "model-00055-of-00059.safetensors", + "model.layers.52.self_attn.v_proj.weight": "model-00055-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.0.w1.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.0.w2.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.0.w3.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.1.w1.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.1.w2.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.1.w3.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.2.w1.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.2.w2.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.2.w3.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.3.w1.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.3.w2.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.3.w3.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.4.w1.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.4.w2.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.4.w3.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.5.w1.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.5.w2.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.5.w3.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.6.w1.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.6.w2.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.6.w3.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.7.w1.weight": "model-00056-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.7.w2.weight": "model-00057-of-00059.safetensors", + "model.layers.53.block_sparse_moe.experts.7.w3.weight": "model-00057-of-00059.safetensors", + "model.layers.53.block_sparse_moe.gate.weight": "model-00056-of-00059.safetensors", + "model.layers.53.input_layernorm.weight": "model-00057-of-00059.safetensors", + "model.layers.53.post_attention_layernorm.weight": "model-00057-of-00059.safetensors", + "model.layers.53.self_attn.k_proj.weight": "model-00056-of-00059.safetensors", + "model.layers.53.self_attn.o_proj.weight": "model-00056-of-00059.safetensors", + "model.layers.53.self_attn.q_proj.weight": "model-00056-of-00059.safetensors", + "model.layers.53.self_attn.v_proj.weight": "model-00056-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.0.w1.weight": "model-00057-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.0.w2.weight": "model-00057-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.0.w3.weight": "model-00057-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.1.w1.weight": "model-00057-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.1.w2.weight": "model-00057-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.1.w3.weight": "model-00057-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.2.w1.weight": "model-00057-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.2.w2.weight": "model-00057-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.2.w3.weight": "model-00057-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.3.w1.weight": "model-00057-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.3.w2.weight": "model-00057-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.3.w3.weight": "model-00057-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.4.w1.weight": "model-00057-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.4.w2.weight": "model-00057-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.4.w3.weight": "model-00057-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.5.w1.weight": "model-00057-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.5.w2.weight": "model-00057-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.5.w3.weight": "model-00057-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.6.w1.weight": "model-00057-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.6.w2.weight": "model-00057-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.6.w3.weight": "model-00057-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.7.w1.weight": "model-00058-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.7.w2.weight": "model-00058-of-00059.safetensors", + "model.layers.54.block_sparse_moe.experts.7.w3.weight": "model-00058-of-00059.safetensors", + "model.layers.54.block_sparse_moe.gate.weight": "model-00057-of-00059.safetensors", + "model.layers.54.input_layernorm.weight": "model-00058-of-00059.safetensors", + "model.layers.54.post_attention_layernorm.weight": "model-00058-of-00059.safetensors", + "model.layers.54.self_attn.k_proj.weight": "model-00057-of-00059.safetensors", + "model.layers.54.self_attn.o_proj.weight": "model-00057-of-00059.safetensors", + "model.layers.54.self_attn.q_proj.weight": "model-00057-of-00059.safetensors", + "model.layers.54.self_attn.v_proj.weight": "model-00057-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.0.w1.weight": "model-00058-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.0.w2.weight": "model-00058-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.0.w3.weight": "model-00058-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.1.w1.weight": "model-00058-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.1.w2.weight": "model-00058-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.1.w3.weight": "model-00058-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.2.w1.weight": "model-00058-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.2.w2.weight": "model-00058-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.2.w3.weight": "model-00058-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.3.w1.weight": "model-00058-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.3.w2.weight": "model-00058-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.3.w3.weight": "model-00058-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.4.w1.weight": "model-00058-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.4.w2.weight": "model-00058-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.4.w3.weight": "model-00058-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.5.w1.weight": "model-00058-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.5.w2.weight": "model-00058-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.5.w3.weight": "model-00058-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.6.w1.weight": "model-00058-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.6.w2.weight": "model-00058-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.6.w3.weight": "model-00059-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.7.w1.weight": "model-00059-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.7.w2.weight": "model-00059-of-00059.safetensors", + "model.layers.55.block_sparse_moe.experts.7.w3.weight": "model-00059-of-00059.safetensors", + "model.layers.55.block_sparse_moe.gate.weight": "model-00058-of-00059.safetensors", + "model.layers.55.input_layernorm.weight": "model-00059-of-00059.safetensors", + "model.layers.55.post_attention_layernorm.weight": "model-00059-of-00059.safetensors", + "model.layers.55.self_attn.k_proj.weight": "model-00058-of-00059.safetensors", + "model.layers.55.self_attn.o_proj.weight": "model-00058-of-00059.safetensors", + "model.layers.55.self_attn.q_proj.weight": "model-00058-of-00059.safetensors", + "model.layers.55.self_attn.v_proj.weight": "model-00058-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.0.w1.weight": "model-00007-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.0.w2.weight": "model-00007-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.0.w3.weight": "model-00007-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.1.w1.weight": "model-00007-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.1.w2.weight": "model-00007-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.1.w3.weight": "model-00007-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.2.w1.weight": "model-00007-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.2.w2.weight": "model-00007-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.2.w3.weight": "model-00007-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.3.w1.weight": "model-00007-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.3.w2.weight": "model-00007-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.3.w3.weight": "model-00007-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.4.w1.weight": "model-00007-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.4.w2.weight": "model-00007-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.4.w3.weight": "model-00007-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.5.w1.weight": "model-00008-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.5.w2.weight": "model-00008-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.5.w3.weight": "model-00008-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.6.w1.weight": "model-00008-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.6.w2.weight": "model-00008-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.6.w3.weight": "model-00008-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.7.w1.weight": "model-00008-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.7.w2.weight": "model-00008-of-00059.safetensors", + "model.layers.6.block_sparse_moe.experts.7.w3.weight": "model-00008-of-00059.safetensors", + "model.layers.6.block_sparse_moe.gate.weight": "model-00007-of-00059.safetensors", + "model.layers.6.input_layernorm.weight": "model-00008-of-00059.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00008-of-00059.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00007-of-00059.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00007-of-00059.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00007-of-00059.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00007-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.0.w1.weight": "model-00008-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.0.w2.weight": "model-00008-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.0.w3.weight": "model-00008-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.1.w1.weight": "model-00008-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.1.w2.weight": "model-00008-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.1.w3.weight": "model-00008-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.2.w1.weight": "model-00008-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.2.w2.weight": "model-00008-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.2.w3.weight": "model-00008-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.3.w1.weight": "model-00008-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.3.w2.weight": "model-00008-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.3.w3.weight": "model-00008-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.4.w1.weight": "model-00008-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.4.w2.weight": "model-00008-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.4.w3.weight": "model-00009-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.5.w1.weight": "model-00009-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.5.w2.weight": "model-00009-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.5.w3.weight": "model-00009-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.6.w1.weight": "model-00009-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.6.w2.weight": "model-00009-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.6.w3.weight": "model-00009-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.7.w1.weight": "model-00009-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.7.w2.weight": "model-00009-of-00059.safetensors", + "model.layers.7.block_sparse_moe.experts.7.w3.weight": "model-00009-of-00059.safetensors", + "model.layers.7.block_sparse_moe.gate.weight": "model-00008-of-00059.safetensors", + "model.layers.7.input_layernorm.weight": "model-00009-of-00059.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00009-of-00059.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00008-of-00059.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00008-of-00059.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00008-of-00059.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00008-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.0.w1.weight": "model-00009-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.0.w2.weight": "model-00009-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.0.w3.weight": "model-00009-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.1.w1.weight": "model-00009-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.1.w2.weight": "model-00009-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.1.w3.weight": "model-00009-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.2.w1.weight": "model-00009-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.2.w2.weight": "model-00009-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.2.w3.weight": "model-00009-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.3.w1.weight": "model-00009-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.3.w2.weight": "model-00009-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.3.w3.weight": "model-00009-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.4.w1.weight": "model-00009-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.4.w2.weight": "model-00010-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.4.w3.weight": "model-00010-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.5.w1.weight": "model-00010-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.5.w2.weight": "model-00010-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.5.w3.weight": "model-00010-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.6.w1.weight": "model-00010-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.6.w2.weight": "model-00010-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.6.w3.weight": "model-00010-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.7.w1.weight": "model-00010-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.7.w2.weight": "model-00010-of-00059.safetensors", + "model.layers.8.block_sparse_moe.experts.7.w3.weight": "model-00010-of-00059.safetensors", + "model.layers.8.block_sparse_moe.gate.weight": "model-00009-of-00059.safetensors", + "model.layers.8.input_layernorm.weight": "model-00010-of-00059.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00010-of-00059.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00009-of-00059.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00009-of-00059.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00009-of-00059.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00009-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.0.w1.weight": "model-00010-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.0.w2.weight": "model-00010-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.0.w3.weight": "model-00010-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.1.w1.weight": "model-00010-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.1.w2.weight": "model-00010-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.1.w3.weight": "model-00010-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.2.w1.weight": "model-00010-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.2.w2.weight": "model-00010-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.2.w3.weight": "model-00010-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.3.w1.weight": "model-00010-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.3.w2.weight": "model-00010-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.3.w3.weight": "model-00010-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.4.w1.weight": "model-00011-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.4.w2.weight": "model-00011-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.4.w3.weight": "model-00011-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.5.w1.weight": "model-00011-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.5.w2.weight": "model-00011-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.5.w3.weight": "model-00011-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.6.w1.weight": "model-00011-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.6.w2.weight": "model-00011-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.6.w3.weight": "model-00011-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.7.w1.weight": "model-00011-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.7.w2.weight": "model-00011-of-00059.safetensors", + "model.layers.9.block_sparse_moe.experts.7.w3.weight": "model-00011-of-00059.safetensors", + "model.layers.9.block_sparse_moe.gate.weight": "model-00010-of-00059.safetensors", + "model.layers.9.input_layernorm.weight": "model-00011-of-00059.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00011-of-00059.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00010-of-00059.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00010-of-00059.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00010-of-00059.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00010-of-00059.safetensors", + "model.norm.weight": "model-00059-of-00059.safetensors" + } +}