diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,13367 @@ +{ + "metadata": { + "ParamSize": 885, + "ParamBytes": 40900313088.0, + "BitsPerParam": 4.157646319274502 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 622854144, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 152064, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 622854144, + "byteOffset": 0 + } + ], + "md5sum": "2f8a3672ab7cc8e433de36e1f192d3ee" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 77856768, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 152064, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 77856768, + "byteOffset": 0 + } + ], + "md5sum": "77b57102e7539c441b60cf59d2514e20" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.79.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "d64e72517cc4fb5a743b81e596f3498a" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.79.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "cb968c086d88e025ee1302911ba24066" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.79.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "7f82797d4537782d5a4377fbaa09eb57" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 622854144, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 152064, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 622854144, + "byteOffset": 0 + } + ], + "md5sum": "53e3d0004a69bffaa8da854cf98f9977" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 77856768, + "records": [ + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 152064, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 77856768, + "byteOffset": 0 + } + ], + "md5sum": "831e09c62211879b07e3d618c0073aa7" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "91b2c9af1d2ebc69656227f5958c82d5" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "415dbfcfad792f80410ac065e9d9752b" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.0.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "82962f312b607593130f16bf7d8601fb" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "94c947dfbe652f98a5422307cfdfe563" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "f23959aab7852eaa57579ca91f8bb28c" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 24662016, + "records": [ + { + "name": "model.layers.79.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 0 + }, + { + "name": "model.layers.79.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 16384 + }, + { + "name": "model.layers.79.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15155200 + }, + { + "name": "model.norm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15171584 + }, + { + "name": "model.layers.0.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15187968 + }, + { + "name": "model.layers.0.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15208448 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20451328 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24645632 + } + ], + "md5sum": "26b224dc22b53dcf13fefdda046b0d09" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "5c4610d182bcb4f23691c1e03a3fd446" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "22e57c1acdf163f7fee7067e2b82c50a" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "c57cd52fe46900e38d1f61a151e5ffde" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.1.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "b6b8e3a33346ebe077fd0f2b9543888b" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 30347264, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15155200 + }, + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 15171584 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 30310400 + }, + { + "name": "model.layers.1.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 30326784 + } + ], + "md5sum": "5676a9f745d5d86f1a05e0b095fbea48" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "a082bbb7d0ebc82ba407c5f35317b089" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "08f1e8ad8df5f8243107f7e14a6c2497" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "1436f3f47bbfe31e17c0dd6dd7381954" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "dd4ac404e29cf5cb5dbe9a03f4ca696d" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.2.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "96058e3cbcf026e5fdb367d597dbb8ae" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "6509f402d96bedda6c9538337c0f587c" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 29872128, + "records": [ + { + "name": "model.layers.1.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 0 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 5242880 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 9437184 + }, + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 9453568 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24592384 + }, + { + "name": "model.layers.2.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24608768 + }, + { + "name": "model.layers.2.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24629248 + } + ], + "md5sum": "74edbb75879b061830bf9fe542b049b0" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "0cbb40adf30e066aaa0917b418af5ac1" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "5c417fa53ba369072beab5f8756e1161" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "93d183986cf3f68c4a6d7a9e73491abd" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.10.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "7339dcc57269f056653ce2bc0f9dfac2" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "81eb73d8f422861928ccd028e0110647" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "0de082783c7f3b746ca03cd8605215ae" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 28839936, + "records": [ + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4210688 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19349504 + }, + { + "name": "model.layers.10.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 19365888 + }, + { + "name": "model.layers.10.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 19386368 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 24629248 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 28823552 + } + ], + "md5sum": "324946fddfa35486688c1191064ef58d" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "b9c07636ded1a629c06114b4d4c19697" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "ba893847cd7e01b271df4fc63dae3b71" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.11.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "04eb1cfe7aea17ccced76bbd52872f0b" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "a965086a064b0032b0d5778bbabdaa93" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "6b432a0a6ebda9244163c2780e237758" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.11.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.11.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "636a6d6585f9d0ed311eccd676519f6b" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "e2f074b04163d1a4918f438b00c6ee0d" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "a2e607b3088ac83176b3a29e0fb3a66b" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "5750423e6464274708776aff0a3103c0" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.12.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "14553b18d62011142afee8a2cdabf638" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 30347264, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15155200 + }, + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 15171584 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 30310400 + }, + { + "name": "model.layers.12.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 30326784 + } + ], + "md5sum": "af3d1e8ff1e9ed6658b123843d7c3530" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "a174af48c1399b495282a37143795fd6" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "900e6e24dba2d9582e2a450a266ae8ce" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "0a596a1d79708c68cc3da9ac8e393cb4" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "86c3000637673bd08769ed27a27a58b8" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.13.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "b70a7fcdc7bc5db15ea0e23b3a2c9799" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "79c5e5e0096ee3b2226ce143509b044a" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 29872128, + "records": [ + { + "name": "model.layers.12.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 0 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 5242880 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 9437184 + }, + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 9453568 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24592384 + }, + { + "name": "model.layers.13.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24608768 + }, + { + "name": "model.layers.13.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24629248 + } + ], + "md5sum": "f002da3e36563b8c26fc798eaf989d01" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.14.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "b00e9cc27bd5bd81a48f1f4262b4f3eb" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "b0fc9dc9365ae20fc8d1f38e0370f2b6" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "4a9a41f32de238d5a4390294113fd2b8" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "069c10167ff97ad4c16f73f744f7407f" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "922a22388db55893b949127e0a8d69ab" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "95de3d8cbbe343f146efd48ae9529283" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 28839936, + "records": [ + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.14.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 4194304 + }, + { + "name": "model.layers.14.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 4214784 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 9457664 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 13651968 + }, + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 13668352 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 28807168 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 28823552 + } + ], + "md5sum": "6cd2786b43aef81bb7687fdb40b8e195" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "160fb95202e61526f8af0dbf5a1c5259" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "0c92c73adc24c9232147391f9c7bb64c" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.15.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "43de349f22f9ca1cf129653a9ac1bbfb" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "b516a5f51df4797e367cb205285e5f3e" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "707762343c44569703b9b96fdfc1d9a0" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "b648dea97a8aa7742972c0afeeb54f31" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.16.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "9cc57f34aa47cfee623d6cd74cc72ace" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "234692c7e226f0d83e22177d40796ef3" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 29876224, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.15.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.15.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.16.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24612864 + }, + { + "name": "model.layers.16.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24633344 + } + ], + "md5sum": "fc8a0d25e7e4f1c3db46b03e35e067ac" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "e5a2055170e6cb53f2f2ed8f94f108fb" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "db61e774b8e45b365e9114121cd7e639" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 19382272, + "records": [ + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4210688 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19349504 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19365888 + } + ], + "md5sum": "1cb470986437a7394f9f3b7e4b8fd0f3" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "b08888b5ebb66c833f0922f976ec99c1" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "f34a86d8e2e8ae37f2789f5132429821" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.17.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "1c47d42787be4eeb6a88091b2b8edb78" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "1e2e149cb04712c973e8a7030fdacd10" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "d9522a6f6bc4599e64123f92d9c76b46" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "f6bf14900cba477183e6362eb4b44482" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.18.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "17c9a8ab9b73363a92bebbf3aba9b8fd" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "e474ce06e13eb4bd86fbc4349720cb9d" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 29876224, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.17.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.17.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.18.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24612864 + }, + { + "name": "model.layers.18.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24633344 + } + ], + "md5sum": "d27d07d2eb44b0248b17d58b9196d6c7" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "44e502cfa2cc86ffe3176cca07ede577" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "15066a291ea256f413f112eecb24470e" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 19382272, + "records": [ + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4210688 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19349504 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19365888 + } + ], + "md5sum": "4815b6bee858cb152eef66d6023f87f8" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "5effe98ba80e5f2ad8790c8416b85285" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "5c3c039f0570562f774cbc192acc8e2e" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.19.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "a2a0c39b13c75b917f8fcc518ad66903" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "4e200618d7230aab50695f59436d06aa" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "db76503f52516822cf7694b14725a361" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.19.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.19.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "d4a81ebf5bad29e8df018a56d4454a4c" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "bb23fe109f2cca75b9b261f21acf1124" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "ab32f0a1a367bf129872e0b3551272ac" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.20.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "70762066f1d254a28b924364e6061d5b" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "85a6a80daeef1854eddc5a086d31ccf8" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "6543fe743be40bd2f8f1e5b74ccea0cb" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.20.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.20.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "9e7ee248b7f6a7d82caaef9d454a6336" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "ceeefed13e1fa9c4633332d11a99b1e4" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "317308944e4ce8c8658e06a5267e4e85" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.21.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "31b06ce8189dd85fd384d2f91202d0c6" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "21a4409978eeaa414cc706240f487b80" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "45991d5abff3fe99b63b8345489bf982" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.21.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.21.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "a710cf29e1417261e420e3238ee608f1" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "1627283f532e5e4438d1518d3b668bd0" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "fc42c462299f903046e9c37571abf904" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.22.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "e3e59a1a8dd37a903b2e2d24a8fc1e20" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "9b410840b2ca4577065d573b526dabc5" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.23.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "b4e62b510a64e5cc8c658a6e9c9786a0" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "dd24c6b35b58168c667e37b243c616c3" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 29876224, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.22.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.22.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.23.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24612864 + }, + { + "name": "model.layers.23.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24633344 + } + ], + "md5sum": "67b9eb87b0b42cee0c1d7e0c22821721" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "ef6b34f7d221210cd3f02c8ab0730944" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "6b27592bb6069ea9afe161676b23ffbd" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "7d0bb379df610e9085e4303f323489fb" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "ccc6e487585f0e1cc73e6d371d5b3f35" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 19382272, + "records": [ + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4210688 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19349504 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19365888 + } + ], + "md5sum": "a8d8368365868e97e4bcba2b3581c676" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "b59bf0f6c5cc5bf25929de8ab8dae09e" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "83dd56a77ea74a43ccbfb08203e90496" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.24.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "61f46a38a4274672b2e8129831c9f6bf" + }, + { + "dataPath": "params_shard_114.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "d05e2452942cbab0e33463de5a462ffe" + }, + { + "dataPath": "params_shard_115.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "262154b988c12978297d3b255084fa14" + }, + { + "dataPath": "params_shard_116.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "0f25cf6a1e95b4af9c512d0d46b23fe9" + }, + { + "dataPath": "params_shard_117.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.25.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "3146803e0394abbcd3d3c6a5f27716c2" + }, + { + "dataPath": "params_shard_118.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "08cbcc00ce3b222eedf94dd1616f971e" + }, + { + "dataPath": "params_shard_119.bin", + "format": "raw-shard", + "nbytes": 29876224, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.24.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.24.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.25.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24612864 + }, + { + "name": "model.layers.25.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24633344 + } + ], + "md5sum": "ff76a45de5598c2a3da9f76804939512" + }, + { + "dataPath": "params_shard_120.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "865b2ccbb2afc24864ff16f19dbc9cb6" + }, + { + "dataPath": "params_shard_121.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "1c56aa5377761508e6625d5c3987c816" + }, + { + "dataPath": "params_shard_122.bin", + "format": "raw-shard", + "nbytes": 19382272, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4210688 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19349504 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19365888 + } + ], + "md5sum": "409e8ea170763ca15b6991257bad5f79" + }, + { + "dataPath": "params_shard_123.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "b6b78d1786982dc295d4958887585325" + }, + { + "dataPath": "params_shard_124.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "63177d909543be674ea5bd441ae796dc" + }, + { + "dataPath": "params_shard_125.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.26.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "7c3517bf93a052147411a17542eb8cf2" + }, + { + "dataPath": "params_shard_126.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "483c6ac829daeb1cc9740290ea2caa1a" + }, + { + "dataPath": "params_shard_127.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "3a25b6382926452683b8710e149bc07c" + }, + { + "dataPath": "params_shard_128.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "8f5f36a8673eb5c04ee9d893066b10d8" + }, + { + "dataPath": "params_shard_129.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.27.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "09aa01dcadd2149b3c157f60990d288b" + }, + { + "dataPath": "params_shard_130.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "1ffdb2155ef8d884cb03b30ff96e0784" + }, + { + "dataPath": "params_shard_131.bin", + "format": "raw-shard", + "nbytes": 29876224, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.26.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.26.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.27.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24612864 + }, + { + "name": "model.layers.27.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24633344 + } + ], + "md5sum": "dd64d7ef69ca1087ac094db738d7ae10" + }, + { + "dataPath": "params_shard_132.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "0e082228ab62be94797223446b19bedf" + }, + { + "dataPath": "params_shard_133.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "7e81e0fd49e7cf2d48d526dbc5e56157" + }, + { + "dataPath": "params_shard_134.bin", + "format": "raw-shard", + "nbytes": 19382272, + "records": [ + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4210688 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19349504 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19365888 + } + ], + "md5sum": "71d08916109ea623035f3813d5a19249" + }, + { + "dataPath": "params_shard_135.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "846f72a7e781c5d293aab94a6616bcce" + }, + { + "dataPath": "params_shard_136.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "ed6f3da26f1691fff911cff68922ebff" + }, + { + "dataPath": "params_shard_137.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.28.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "676d5a05a99b24ad9c24ddc497c5cce5" + }, + { + "dataPath": "params_shard_138.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.28.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "7cdb40cd40712e28dedceacd58fd9a26" + }, + { + "dataPath": "params_shard_139.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "ec398f6181f3504eacc57231829b648b" + }, + { + "dataPath": "params_shard_140.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.28.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.28.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "b9c3ae1b5777c91bc46e2c9552857e7f" + }, + { + "dataPath": "params_shard_141.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "a2b641c95496e33dced68d9bae239083" + }, + { + "dataPath": "params_shard_142.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "33ba89df549e891019627165a7406690" + }, + { + "dataPath": "params_shard_143.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.29.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "0ae1c06ed3e00ef04b84d8764daf1d7e" + }, + { + "dataPath": "params_shard_144.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.29.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "64b57a9e6f9a84f93a3861cc15b3c17f" + }, + { + "dataPath": "params_shard_145.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "881f9c4544e864135c756622004db44f" + }, + { + "dataPath": "params_shard_146.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.29.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.29.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "a322b1fc6065725c9195cc2d43f36805" + }, + { + "dataPath": "params_shard_147.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "257a277414497a1aa47bca6a4de908cd" + }, + { + "dataPath": "params_shard_148.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "1f28c4a7df95627a70caa0a9f7b7090a" + }, + { + "dataPath": "params_shard_149.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.3.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "3a4a41d11285f38aca783d6ea60bd77d" + }, + { + "dataPath": "params_shard_150.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "2a31c7bdab10e586a14d5aa21a48cddb" + }, + { + "dataPath": "params_shard_151.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "08481386b8ca5cdc916fb8264c13d8fd" + }, + { + "dataPath": "params_shard_152.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.3.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.3.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "f9d751c16a963a30b45c0e82f3886688" + }, + { + "dataPath": "params_shard_153.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "d9172d9da418b683da29cf7d8246c0a7" + }, + { + "dataPath": "params_shard_154.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "fe63caf69dbd25b97db7044da811c8d0" + }, + { + "dataPath": "params_shard_155.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.4.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "8aaee9efa2bb95ae096aa1ae2557d5d4" + }, + { + "dataPath": "params_shard_156.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "670d1a7243911831c8da8792b7f58afd" + }, + { + "dataPath": "params_shard_157.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.5.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "c5c4e5451ed2357ad9c12280b10ae3fe" + }, + { + "dataPath": "params_shard_158.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "df9b0348572c7c545a666c39be16dab6" + }, + { + "dataPath": "params_shard_159.bin", + "format": "raw-shard", + "nbytes": 29876224, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.4.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.4.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.5.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24612864 + }, + { + "name": "model.layers.5.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24633344 + } + ], + "md5sum": "97d4e9c1b4b28f9f009be14986ffa050" + }, + { + "dataPath": "params_shard_160.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "49ce1ca41c447a823475ff1d84281de0" + }, + { + "dataPath": "params_shard_161.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "3dbb0c7cf4c59c125b6ff87a14de5b5d" + }, + { + "dataPath": "params_shard_162.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "59b3b39bc34e6de15ea586583b6df4d9" + }, + { + "dataPath": "params_shard_163.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.30.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "2de4e5abfb73fa4317810fc097a13c01" + }, + { + "dataPath": "params_shard_164.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.30.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "425f6d846b5e7f34e20a6512c5cf021d" + }, + { + "dataPath": "params_shard_165.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "3df370fed8f427c071d2193df99d577f" + }, + { + "dataPath": "params_shard_166.bin", + "format": "raw-shard", + "nbytes": 28839936, + "records": [ + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.30.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4210688 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19349504 + }, + { + "name": "model.layers.30.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 19365888 + }, + { + "name": "model.layers.30.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 19386368 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 24629248 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 28823552 + } + ], + "md5sum": "4f445f04e568eed445b758734ed64d84" + }, + { + "dataPath": "params_shard_167.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "79afc5d95d734f5c0f0c2d9819c2b7a2" + }, + { + "dataPath": "params_shard_168.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "2eabe83fd968e9753f7a1759484db121" + }, + { + "dataPath": "params_shard_169.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.31.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "a17563a1643861d80f5dad5b652f816a" + }, + { + "dataPath": "params_shard_170.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.31.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "b14c749675d4c709b4d50a546bb49bed" + }, + { + "dataPath": "params_shard_171.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.32.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "1eab69fe809f08dc60f3484ee130644e" + }, + { + "dataPath": "params_shard_172.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.32.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "f3ace20daa413828687289e06b143fae" + }, + { + "dataPath": "params_shard_173.bin", + "format": "raw-shard", + "nbytes": 29876224, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.31.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.31.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.32.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24612864 + }, + { + "name": "model.layers.32.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24633344 + } + ], + "md5sum": "12662ec45c5172285c0ed5c772382ab2" + }, + { + "dataPath": "params_shard_174.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.32.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "444f0dd770fb6b0e90c9492ab1818144" + }, + { + "dataPath": "params_shard_175.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.32.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "366d22ce0877ab40260072f200c9e6e4" + }, + { + "dataPath": "params_shard_176.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.32.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "45b0ed7aabc0d217fdf57324e7655877" + }, + { + "dataPath": "params_shard_177.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.33.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "d2b2e98797c98c652a3366200c5478e0" + }, + { + "dataPath": "params_shard_178.bin", + "format": "raw-shard", + "nbytes": 19382272, + "records": [ + { + "name": "model.layers.32.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.32.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.32.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4210688 + }, + { + "name": "model.layers.32.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19349504 + }, + { + "name": "model.layers.33.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19365888 + } + ], + "md5sum": "bf6d38345d95620b824edcf9393eeb24" + }, + { + "dataPath": "params_shard_179.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.33.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "6f1fd0fbde689acd57fb6042229864b2" + }, + { + "dataPath": "params_shard_180.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.33.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "d42e2bd89fea706ba7b91c6cce489b23" + }, + { + "dataPath": "params_shard_181.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.33.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "7ad48b43b9bbbe36363c108ee41a5714" + }, + { + "dataPath": "params_shard_182.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.33.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "46fc40297b6d4ab71a2982edb75fce29" + }, + { + "dataPath": "params_shard_183.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.34.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "ecb5420b030b7c1f751c21803dac0855" + }, + { + "dataPath": "params_shard_184.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.34.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "1770b3c7a316e1ed18ef7b2583785a9c" + }, + { + "dataPath": "params_shard_185.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.34.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "1f4654158d3c57f24335e4cbde42c584" + }, + { + "dataPath": "params_shard_186.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.34.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "523e927e9173ce3cbfe7cc45363a6536" + }, + { + "dataPath": "params_shard_187.bin", + "format": "raw-shard", + "nbytes": 29876224, + "records": [ + { + "name": "model.layers.33.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.33.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.33.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.33.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.33.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.34.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24612864 + }, + { + "name": "model.layers.34.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24633344 + } + ], + "md5sum": "35ffabae887de0b65986e30dd925b497" + }, + { + "dataPath": "params_shard_188.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.34.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "cadbf190f744621e094b012490d4f535" + }, + { + "dataPath": "params_shard_189.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.35.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "ed0d2a6e4eac2ad244e2757d7f0f49a9" + }, + { + "dataPath": "params_shard_190.bin", + "format": "raw-shard", + "nbytes": 19382272, + "records": [ + { + "name": "model.layers.34.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.34.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.34.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4210688 + }, + { + "name": "model.layers.34.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19349504 + }, + { + "name": "model.layers.35.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19365888 + } + ], + "md5sum": "5861fe8761258a73b11979e77758689b" + }, + { + "dataPath": "params_shard_191.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.35.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "980c3c941074bf6843a988e7669366a3" + }, + { + "dataPath": "params_shard_192.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.35.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "a0a256ae7b28c32fc7b094846db00f4a" + }, + { + "dataPath": "params_shard_193.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.35.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "06ff20586ca4a13c11e6bd3016010dd4" + }, + { + "dataPath": "params_shard_194.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.35.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "89e20162f4ddff2e21ac67d0ad13198e" + }, + { + "dataPath": "params_shard_195.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.36.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "4b1b9334987501cf099bd79c16c72372" + }, + { + "dataPath": "params_shard_196.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.36.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "cdb1d04bf0f465d41b16c3e52ab56e79" + }, + { + "dataPath": "params_shard_197.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.36.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "bd635827fd6f3f21c3889b2681f0bb2e" + }, + { + "dataPath": "params_shard_198.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.36.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "ce705b4fdd23805f320cd6e923574c0c" + }, + { + "dataPath": "params_shard_199.bin", + "format": "raw-shard", + "nbytes": 29876224, + "records": [ + { + "name": "model.layers.35.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.35.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.35.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.35.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.35.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.36.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24612864 + }, + { + "name": "model.layers.36.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24633344 + } + ], + "md5sum": "322f2722fc8c9ab9d500a9eec83dcca0" + }, + { + "dataPath": "params_shard_200.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.36.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "66a5299225898109ed12e9e60b464e22" + }, + { + "dataPath": "params_shard_201.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.37.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "cc7ab9fdbb3728004dfad2a2f6da966c" + }, + { + "dataPath": "params_shard_202.bin", + "format": "raw-shard", + "nbytes": 19382272, + "records": [ + { + "name": "model.layers.36.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.36.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.36.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4210688 + }, + { + "name": "model.layers.36.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19349504 + }, + { + "name": "model.layers.37.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19365888 + } + ], + "md5sum": "5aed39d0d8de984ee0fe0a206acc64c7" + }, + { + "dataPath": "params_shard_203.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.37.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "5a20581b759430fde5014f442c75c3b5" + }, + { + "dataPath": "params_shard_204.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.37.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "6f395db9341c44ebf94604aace6fa67b" + }, + { + "dataPath": "params_shard_205.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.37.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "93e8373bbe22a61dbfcd5527bab490f2" + }, + { + "dataPath": "params_shard_206.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.37.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "6d81616dfa031fffffd48d1eeac9fda4" + }, + { + "dataPath": "params_shard_207.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.38.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "636ff25ab2b4f4b7a2a430742bc1d434" + }, + { + "dataPath": "params_shard_208.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.37.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.37.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.37.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.37.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.37.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.38.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "c6eda978d4b415f4794105fd3eee294c" + }, + { + "dataPath": "params_shard_209.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.38.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "bc6fa509dea70cb5f4f0b407b7ea7141" + }, + { + "dataPath": "params_shard_210.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.38.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "f39533cf489a5a6bf3873efb2757f5fd" + }, + { + "dataPath": "params_shard_211.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.38.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "6daea4522638d34d9d97369ce7834a7d" + }, + { + "dataPath": "params_shard_212.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.38.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "aa9b621d25d6119973d5298f77658605" + }, + { + "dataPath": "params_shard_213.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.39.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "72c635a0155b71835e7014b246d6c874" + }, + { + "dataPath": "params_shard_214.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.38.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.38.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.38.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.38.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.38.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.39.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "673f008a4d702b07023402a19af37430" + }, + { + "dataPath": "params_shard_215.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.39.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "199970431f1e6195acfd867393a99429" + }, + { + "dataPath": "params_shard_216.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.39.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "ddce19c530586e2a5b66986cb7a0d9d5" + }, + { + "dataPath": "params_shard_217.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.39.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "27e0a613addb76a2a0b51daebe9e10af" + }, + { + "dataPath": "params_shard_218.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.39.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "e8de56ca0f9a1749e8ca9a5b546cb7d9" + }, + { + "dataPath": "params_shard_219.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.40.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "f2fc20bd738baa4b510b23afd8b6da7f" + }, + { + "dataPath": "params_shard_220.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.39.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.39.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.39.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.39.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.39.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.40.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "ab94c6b935acec2623e745baabc8c352" + }, + { + "dataPath": "params_shard_221.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.40.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "dc5d1b41f82bd4193a8b5e727381ff20" + }, + { + "dataPath": "params_shard_222.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.40.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "793dbc6f9ff4f8eb8a227f624f6e4c34" + }, + { + "dataPath": "params_shard_223.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.40.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "e91abaade25ee36ceb0fe6801db662c4" + }, + { + "dataPath": "params_shard_224.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.40.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "a7179cdb2dc1496f733ac9010b8791aa" + }, + { + "dataPath": "params_shard_225.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.41.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "be958e4e81379813e4c003665b4a96c9" + }, + { + "dataPath": "params_shard_226.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.41.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "bbeab937ed6f60b58a50c28cfea26205" + }, + { + "dataPath": "params_shard_227.bin", + "format": "raw-shard", + "nbytes": 29876224, + "records": [ + { + "name": "model.layers.40.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.40.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.40.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.40.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.40.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.41.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24612864 + }, + { + "name": "model.layers.41.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24633344 + } + ], + "md5sum": "957a94d31416d10ab00f10540960ac5a" + }, + { + "dataPath": "params_shard_228.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.41.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "e4cd651210acd9d7ea44ac3645b30eff" + }, + { + "dataPath": "params_shard_229.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.41.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "e4f0eeb4f3db416b37df2ce35457e330" + }, + { + "dataPath": "params_shard_230.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.41.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "5ca4cbf5c29ca7bd649a2a831778bd16" + }, + { + "dataPath": "params_shard_231.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.42.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "b85e25b9b51c3abd2c65859dcb12a897" + }, + { + "dataPath": "params_shard_232.bin", + "format": "raw-shard", + "nbytes": 19382272, + "records": [ + { + "name": "model.layers.41.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.41.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.41.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4210688 + }, + { + "name": "model.layers.41.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19349504 + }, + { + "name": "model.layers.42.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19365888 + } + ], + "md5sum": "8829f387f8ed0c2dbf199df78636cc7b" + }, + { + "dataPath": "params_shard_233.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.42.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "9803153c05063000d600fd98a6c5652a" + }, + { + "dataPath": "params_shard_234.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.42.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "dcd7a766781cfed1f709b3560bd7aabd" + }, + { + "dataPath": "params_shard_235.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.42.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "1e8ae0b39dd207b3652bac6edb54b0e5" + }, + { + "dataPath": "params_shard_236.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.42.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "02a5ce870a5c0eaa0eaddde8d8d666a2" + }, + { + "dataPath": "params_shard_237.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.43.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "81e8203db5acb7a05c2c8598e0eba942" + }, + { + "dataPath": "params_shard_238.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.43.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "3a4410beb425eefc6bc0f79230fd966c" + }, + { + "dataPath": "params_shard_239.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.43.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "8c7f34439c4076f3263cc696b8d08a6b" + }, + { + "dataPath": "params_shard_240.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.43.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "d83ddeaf1e90b9ce0b7bf7331bbc3425" + }, + { + "dataPath": "params_shard_241.bin", + "format": "raw-shard", + "nbytes": 29876224, + "records": [ + { + "name": "model.layers.42.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.42.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.42.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.42.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.42.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.43.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24612864 + }, + { + "name": "model.layers.43.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24633344 + } + ], + "md5sum": "893845464acdfa2831edc56ef190327f" + }, + { + "dataPath": "params_shard_242.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.43.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "1c82a837c24e848566a3774455c57605" + }, + { + "dataPath": "params_shard_243.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.44.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "3d3a1c7c5b4078ed609bdfef0cb8db83" + }, + { + "dataPath": "params_shard_244.bin", + "format": "raw-shard", + "nbytes": 19382272, + "records": [ + { + "name": "model.layers.43.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.43.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.43.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4210688 + }, + { + "name": "model.layers.43.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19349504 + }, + { + "name": "model.layers.44.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19365888 + } + ], + "md5sum": "bb8de8d371ebc957dc666e38a689691b" + }, + { + "dataPath": "params_shard_245.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.44.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "39e40c8d348c61fc2d9e11d2453b7072" + }, + { + "dataPath": "params_shard_246.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.44.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "70531661185135dda2e4c6281071f975" + }, + { + "dataPath": "params_shard_247.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.44.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "5c6db7ac8d94f56ea816aded4a69c2e2" + }, + { + "dataPath": "params_shard_248.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.44.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "4053a0167df5539983dc993125fd903b" + }, + { + "dataPath": "params_shard_249.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.45.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "cfe28f99950e1f9e6e9e7c0120ff220a" + }, + { + "dataPath": "params_shard_250.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.45.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "7304cddd6f2549b652f0386213e8db09" + }, + { + "dataPath": "params_shard_251.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.45.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "aa5c2563ab6ce326aaec34a4a197549e" + }, + { + "dataPath": "params_shard_252.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.45.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "458bbdc1ad40ae963093cc1b29e52627" + }, + { + "dataPath": "params_shard_253.bin", + "format": "raw-shard", + "nbytes": 29876224, + "records": [ + { + "name": "model.layers.44.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.44.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.44.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.44.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.44.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.45.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24612864 + }, + { + "name": "model.layers.45.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24633344 + } + ], + "md5sum": "0db0118e4ebb5c32d3a9944cde14ec31" + }, + { + "dataPath": "params_shard_254.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.45.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "12b99a402ad237a632f1f9a12b710561" + }, + { + "dataPath": "params_shard_255.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.46.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "f13bf1a4a2c9842c65e13de494a26b4e" + }, + { + "dataPath": "params_shard_256.bin", + "format": "raw-shard", + "nbytes": 19382272, + "records": [ + { + "name": "model.layers.45.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.45.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.45.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4210688 + }, + { + "name": "model.layers.45.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19349504 + }, + { + "name": "model.layers.46.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19365888 + } + ], + "md5sum": "5cc6f2dcc30adeb1969beb3b0780c4b1" + }, + { + "dataPath": "params_shard_257.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.46.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "dbdf08b7058856d3835bd151e5155253" + }, + { + "dataPath": "params_shard_258.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.46.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "e81e9fbd8da7d46d078aeac61a13f08c" + }, + { + "dataPath": "params_shard_259.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.46.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "9f4b43468dd8d21b4465cb89abe80347" + }, + { + "dataPath": "params_shard_260.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.46.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "887abe843d9458be0394331da9c8d363" + }, + { + "dataPath": "params_shard_261.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.47.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "a97c8a49e4eea4f7637354ca14848dd0" + }, + { + "dataPath": "params_shard_262.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.46.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.46.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.46.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.46.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.46.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.47.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "0b6418b770b711f41bbd5d8860499631" + }, + { + "dataPath": "params_shard_263.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.47.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "2504681e588bddf9690d47bff04eecfb" + }, + { + "dataPath": "params_shard_264.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.47.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "8fb2be7b9c99ef847f9c4cde6cd351a6" + }, + { + "dataPath": "params_shard_265.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.47.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "c7e37bdb93e4df9fff275ca001bbf383" + }, + { + "dataPath": "params_shard_266.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.47.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "682d323f22a3a44d89c6abff0e4271cb" + }, + { + "dataPath": "params_shard_267.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.48.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "2f1449c7d26995f48d7aa5a3dc8874bb" + }, + { + "dataPath": "params_shard_268.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.47.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.47.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.47.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.47.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.47.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.48.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "e268b0f0720703f2be2933761c4efe1a" + }, + { + "dataPath": "params_shard_269.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.48.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "ed325044293067e0da54d590dfef71a3" + }, + { + "dataPath": "params_shard_270.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.48.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "6472c6dc45a4a4009b868313243c72f0" + }, + { + "dataPath": "params_shard_271.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.48.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "82620ac454e23d382358e0012dea5af8" + }, + { + "dataPath": "params_shard_272.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.48.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "4bf9c208f38fb7644525eef2f70edf6a" + }, + { + "dataPath": "params_shard_273.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.49.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "7d3edcc22483c0ab68d0891e11e4558b" + }, + { + "dataPath": "params_shard_274.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.48.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.48.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.48.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.48.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.48.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.49.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "6dc7fb81ad883ff7f33c3cf1edca9d53" + }, + { + "dataPath": "params_shard_275.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.49.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "2b7492011d415eb2c9a68c45eb6b7a58" + }, + { + "dataPath": "params_shard_276.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.49.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "e71f82b437fe24ef9a92741d693ae661" + }, + { + "dataPath": "params_shard_277.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.49.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "61d6ab534d8cdce587d2d0eaa61fd4e0" + }, + { + "dataPath": "params_shard_278.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.49.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "a74570618926f159e9a14443f22acfbd" + }, + { + "dataPath": "params_shard_279.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.50.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "baafea3f032d484f3450150f623bcc03" + }, + { + "dataPath": "params_shard_280.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.50.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "4947b2b1791f2e02eb5d73deb5f828b9" + }, + { + "dataPath": "params_shard_281.bin", + "format": "raw-shard", + "nbytes": 29876224, + "records": [ + { + "name": "model.layers.49.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.49.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.49.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.49.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.49.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.50.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24612864 + }, + { + "name": "model.layers.50.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24633344 + } + ], + "md5sum": "5456fb55aa5dcec34f22a739425f1bce" + }, + { + "dataPath": "params_shard_282.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "d9e98ccece990a9c60f135f1bfebfc3c" + }, + { + "dataPath": "params_shard_283.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "76e3dcc435ad3dc88b74b80fdf2d9fdd" + }, + { + "dataPath": "params_shard_284.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "412469dafed735aedf9d81e3137a5db8" + }, + { + "dataPath": "params_shard_285.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "e894ad96da0508ba5b375eaf3a725906" + }, + { + "dataPath": "params_shard_286.bin", + "format": "raw-shard", + "nbytes": 19382272, + "records": [ + { + "name": "model.layers.50.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4210688 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19349504 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19365888 + } + ], + "md5sum": "9a3dc9403fff17b23a265a0488a69969" + }, + { + "dataPath": "params_shard_287.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "dfd70d19b6efaa9f6cbc3d6cbab3586d" + }, + { + "dataPath": "params_shard_288.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "f1819346d582a27ab453ab2345d5dbfe" + }, + { + "dataPath": "params_shard_289.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.6.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "cc84f05f914f28e731b11a2d1857efc6" + }, + { + "dataPath": "params_shard_290.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "8669229944f212af6c7a57e686292286" + }, + { + "dataPath": "params_shard_291.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "4a8e36ecc6a48a4a28dcb014422e7b41" + }, + { + "dataPath": "params_shard_292.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "58ac484f6067968fb533b8ac4b6dacdf" + }, + { + "dataPath": "params_shard_293.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.7.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "35442fd12568fff47f66d72deeac0b83" + }, + { + "dataPath": "params_shard_294.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "4089828c05082fbb0b4e9b94abb92827" + }, + { + "dataPath": "params_shard_295.bin", + "format": "raw-shard", + "nbytes": 29876224, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.6.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.6.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.7.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24612864 + }, + { + "name": "model.layers.7.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24633344 + } + ], + "md5sum": "004ab15a7d2d290a1338be93f720aa35" + }, + { + "dataPath": "params_shard_296.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.50.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "c1ea99f205341ed4f9539150338c8205" + }, + { + "dataPath": "params_shard_297.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.50.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "6d2e1b7c1007e9cdd301493b0c6e8114" + }, + { + "dataPath": "params_shard_298.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.50.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "dca08ed81a07a9eb70c92e0610b94da1" + }, + { + "dataPath": "params_shard_299.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.51.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "abf02ad634bfca0e86d3c8c807dbaa40" + }, + { + "dataPath": "params_shard_300.bin", + "format": "raw-shard", + "nbytes": 19382272, + "records": [ + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.50.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.50.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4210688 + }, + { + "name": "model.layers.50.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19349504 + }, + { + "name": "model.layers.51.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19365888 + } + ], + "md5sum": "c7d9a7c88b41cc21a4be21faa8d90399" + }, + { + "dataPath": "params_shard_301.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.51.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "08dd3bbf936fa5fdd8994d01ec89d46a" + }, + { + "dataPath": "params_shard_302.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.51.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "d5be546346ebda8dfddbec2d38631f94" + }, + { + "dataPath": "params_shard_303.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.51.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "98823b8cd5acef1cf2835e3a9b2ce819" + }, + { + "dataPath": "params_shard_304.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.51.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "082faef96eb7f1d2eeb49b1440161344" + }, + { + "dataPath": "params_shard_305.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.52.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "39b61062275673bfbe3b371ec02147ed" + }, + { + "dataPath": "params_shard_306.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.52.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "b0c76d60374188cd77c012307b5609aa" + }, + { + "dataPath": "params_shard_307.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.52.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "dec628d72f68852ce448c8071337a791" + }, + { + "dataPath": "params_shard_308.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.52.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "59d5bf25442c2b87957656d46e6143b9" + }, + { + "dataPath": "params_shard_309.bin", + "format": "raw-shard", + "nbytes": 29876224, + "records": [ + { + "name": "model.layers.51.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.51.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.51.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.51.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.51.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.52.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24612864 + }, + { + "name": "model.layers.52.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24633344 + } + ], + "md5sum": "16095e4ef8bc916b6df464fc8bc21bec" + }, + { + "dataPath": "params_shard_310.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.52.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "b04bd6b8edd9bb4ec2d87b9c28dc2f38" + }, + { + "dataPath": "params_shard_311.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.53.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "8237cfb3fd6ed0ddf37ddfef6336bc47" + }, + { + "dataPath": "params_shard_312.bin", + "format": "raw-shard", + "nbytes": 19382272, + "records": [ + { + "name": "model.layers.52.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.52.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.52.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4210688 + }, + { + "name": "model.layers.52.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19349504 + }, + { + "name": "model.layers.53.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19365888 + } + ], + "md5sum": "8ac306ffb975b890336e6b31360d232c" + }, + { + "dataPath": "params_shard_313.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.53.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "6a71b1c3f101a07062e1a026648e5f8b" + }, + { + "dataPath": "params_shard_314.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.53.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "2f87fa83db49bb2daed18f4e20a6e6b1" + }, + { + "dataPath": "params_shard_315.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.53.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "99e7ff21a160c7396f8f3f49097d5bb2" + }, + { + "dataPath": "params_shard_316.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.53.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "01e6c6ae5be10511d7ae836f4b823601" + }, + { + "dataPath": "params_shard_317.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.54.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "57ce8e27766e49e49a3f756cafccdb80" + }, + { + "dataPath": "params_shard_318.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.54.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "3b0a67b627f41b636204a109cd71af52" + }, + { + "dataPath": "params_shard_319.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.54.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "e1675b5669e3ca2e62c862ea18c51df1" + }, + { + "dataPath": "params_shard_320.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.54.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "4e4670545c5b515ce27447afe50633fe" + }, + { + "dataPath": "params_shard_321.bin", + "format": "raw-shard", + "nbytes": 29876224, + "records": [ + { + "name": "model.layers.53.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.53.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.53.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.53.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.53.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.54.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24612864 + }, + { + "name": "model.layers.54.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24633344 + } + ], + "md5sum": "af4c9afbe8f0bee1d8dcb495e9c3d741" + }, + { + "dataPath": "params_shard_322.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.54.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "13623aca9721aaa537c7fdb06f223484" + }, + { + "dataPath": "params_shard_323.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.55.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "6675fac4186f6db14053b3e09787eff8" + }, + { + "dataPath": "params_shard_324.bin", + "format": "raw-shard", + "nbytes": 19382272, + "records": [ + { + "name": "model.layers.54.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.54.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.54.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4210688 + }, + { + "name": "model.layers.54.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19349504 + }, + { + "name": "model.layers.55.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19365888 + } + ], + "md5sum": "612c1082add037fa555cec64b94c13e0" + }, + { + "dataPath": "params_shard_325.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.55.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "de07b013b36564474ff1b20fb42d3cd0" + }, + { + "dataPath": "params_shard_326.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.55.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "72e4c2da96f7a966c6d245a41bddbd7f" + }, + { + "dataPath": "params_shard_327.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.55.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "e8bfe659d1be558100306724f09e1f60" + }, + { + "dataPath": "params_shard_328.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.55.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "0ba7a8135afa19b0e96dafe78bd3145c" + }, + { + "dataPath": "params_shard_329.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.56.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "3311af9e82aa635be2dfbd1c67c37113" + }, + { + "dataPath": "params_shard_330.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.55.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.55.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.55.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.55.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.55.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.56.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "17335e7c2de591d63fc44f6350ade9f9" + }, + { + "dataPath": "params_shard_331.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.56.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "c756d02ed68848af847c6e67913cf914" + }, + { + "dataPath": "params_shard_332.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.56.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "9e384fc6174f245315f37481e035816f" + }, + { + "dataPath": "params_shard_333.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.56.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "73387015664f97bb52610a0ae1376f1e" + }, + { + "dataPath": "params_shard_334.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.56.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "1aba3e3c8ddc27dfcf5bc47710968c49" + }, + { + "dataPath": "params_shard_335.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.57.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "1cb3b7f3489befa2dfb7decbda8ecd9b" + }, + { + "dataPath": "params_shard_336.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.56.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.56.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.56.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.56.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.56.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.57.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "7ac48986fe87010eb3ea131576f4083d" + }, + { + "dataPath": "params_shard_337.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.57.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "6a91c4a7724a8af12f85149ca07f74c3" + }, + { + "dataPath": "params_shard_338.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.57.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "539bcadf0ad983b240f28f457a7c0f30" + }, + { + "dataPath": "params_shard_339.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.57.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "8004bdbada0ceb809a481d5ef402d385" + }, + { + "dataPath": "params_shard_340.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.57.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "399f308541ee0569c71898209e419855" + }, + { + "dataPath": "params_shard_341.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.58.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "1a3498eaeaefbdb460c6d1fd52bf43a1" + }, + { + "dataPath": "params_shard_342.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.57.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.57.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.57.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.57.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.57.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.58.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "66805921f3a88fffb5a652407b7143e1" + }, + { + "dataPath": "params_shard_343.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.58.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "d0de88e79b9251ffb8e7a8c1dde57321" + }, + { + "dataPath": "params_shard_344.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.58.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "14fdfc9a0b0f28ca5b99e236246c9ece" + }, + { + "dataPath": "params_shard_345.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.58.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "384359d435b747f1ad86a0f376437f05" + }, + { + "dataPath": "params_shard_346.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.58.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "91d71bf3f95da41eb7261d92be3a44ba" + }, + { + "dataPath": "params_shard_347.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.59.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "e4703de8018d889569eda0f9a847eb7b" + }, + { + "dataPath": "params_shard_348.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.59.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "ad051dcc8c31372d5f6250ba497c7521" + }, + { + "dataPath": "params_shard_349.bin", + "format": "raw-shard", + "nbytes": 29876224, + "records": [ + { + "name": "model.layers.58.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.58.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.58.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.58.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.58.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.59.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24612864 + }, + { + "name": "model.layers.59.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24633344 + } + ], + "md5sum": "9d8b5f43b5984d75b7b4ded1fbfbbd7d" + }, + { + "dataPath": "params_shard_350.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.59.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "b8756756e044a8a736c195f113256515" + }, + { + "dataPath": "params_shard_351.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.59.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "3204dbf46cff779836596dc0005afedc" + }, + { + "dataPath": "params_shard_352.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.59.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "ac3b1a428631a3f4132664ad844dbb2a" + }, + { + "dataPath": "params_shard_353.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.60.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "1fc4ba9906daecb92c1fc2a5902ee11d" + }, + { + "dataPath": "params_shard_354.bin", + "format": "raw-shard", + "nbytes": 19382272, + "records": [ + { + "name": "model.layers.59.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.59.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.59.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4210688 + }, + { + "name": "model.layers.59.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19349504 + }, + { + "name": "model.layers.60.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19365888 + } + ], + "md5sum": "c3a12d0a5350670fffcb4155fae0ac8f" + }, + { + "dataPath": "params_shard_355.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.60.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "b3e967833f6769d590464528425589b0" + }, + { + "dataPath": "params_shard_356.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.60.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "f994830049ba969c561df8d33953c228" + }, + { + "dataPath": "params_shard_357.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.60.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "603764e0763f1d017e586e4bee82c0bc" + }, + { + "dataPath": "params_shard_358.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.60.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "3585a253cf9542efbfb3319f5c186658" + }, + { + "dataPath": "params_shard_359.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.61.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "dae2949a751b58d0c1e0bda6bcffbc70" + }, + { + "dataPath": "params_shard_360.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.61.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "7a283ad2697d20842de014195222774d" + }, + { + "dataPath": "params_shard_361.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.61.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "378d5756024cf10ab1811f3fd1d94940" + }, + { + "dataPath": "params_shard_362.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.61.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "a52599bdb961e9ec0d86476609b24492" + }, + { + "dataPath": "params_shard_363.bin", + "format": "raw-shard", + "nbytes": 29876224, + "records": [ + { + "name": "model.layers.60.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.60.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.60.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.60.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.60.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.61.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24612864 + }, + { + "name": "model.layers.61.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24633344 + } + ], + "md5sum": "500869f6882003d889855528e8221e86" + }, + { + "dataPath": "params_shard_364.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.61.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "c8e0f0255b42f9832cf1e495e75468be" + }, + { + "dataPath": "params_shard_365.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.62.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "2bc7b58624182fa4f5e98a35c06f96db" + }, + { + "dataPath": "params_shard_366.bin", + "format": "raw-shard", + "nbytes": 19382272, + "records": [ + { + "name": "model.layers.61.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.61.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.61.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4210688 + }, + { + "name": "model.layers.61.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19349504 + }, + { + "name": "model.layers.62.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19365888 + } + ], + "md5sum": "75684dfed5dc31906da5079007c2986d" + }, + { + "dataPath": "params_shard_367.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.62.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "d1b97c20714f3de1efbcea36011bf360" + }, + { + "dataPath": "params_shard_368.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.62.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "860b5aceb17f1958e25eab128ba7505b" + }, + { + "dataPath": "params_shard_369.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.62.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "3852f22941de0850145970ce589c92bc" + }, + { + "dataPath": "params_shard_370.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.62.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "8cd93b79ebd850fe025c03f23b6c40d4" + }, + { + "dataPath": "params_shard_371.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.63.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "861e503b62efd764cafd5775aa0023b2" + }, + { + "dataPath": "params_shard_372.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.63.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "9e6ca3425d6928dc4013adcf80fe848d" + }, + { + "dataPath": "params_shard_373.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.63.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "c051366cf7c12b459a2eec94c4ccd364" + }, + { + "dataPath": "params_shard_374.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.63.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "ba9c29d8ccb40366b10638bc2e9b173c" + }, + { + "dataPath": "params_shard_375.bin", + "format": "raw-shard", + "nbytes": 29876224, + "records": [ + { + "name": "model.layers.62.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.62.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.62.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.62.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.62.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.63.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24612864 + }, + { + "name": "model.layers.63.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24633344 + } + ], + "md5sum": "2f6485a22443c08b4e6c9d54b0877cac" + }, + { + "dataPath": "params_shard_376.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.63.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "2b9ac6ead69f9cbb34d84a01a47a3944" + }, + { + "dataPath": "params_shard_377.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.64.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "f291cb32cd2ad0229c76f879321429a3" + }, + { + "dataPath": "params_shard_378.bin", + "format": "raw-shard", + "nbytes": 19382272, + "records": [ + { + "name": "model.layers.63.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.63.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.63.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4210688 + }, + { + "name": "model.layers.63.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19349504 + }, + { + "name": "model.layers.64.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19365888 + } + ], + "md5sum": "8a72483fdedb148cd73034aa08bb4b4f" + }, + { + "dataPath": "params_shard_379.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.64.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "1366cd66c642bd7f48db7de9ebabde67" + }, + { + "dataPath": "params_shard_380.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.64.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "898eddbbe20f982069438ec7f210c4ab" + }, + { + "dataPath": "params_shard_381.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.64.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "f32acb7eeb015ab1b8698c6b37bc23e3" + }, + { + "dataPath": "params_shard_382.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.64.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "07c182260e47ccb4fa675cace2fb06ae" + }, + { + "dataPath": "params_shard_383.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.65.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "25e0598dedc7b596a71f389c7ef7a706" + }, + { + "dataPath": "params_shard_384.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.64.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.64.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.64.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.64.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.64.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.65.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "9e5e57b9dd7e5486605f686cc7adbc04" + }, + { + "dataPath": "params_shard_385.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.65.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "8102d113eef5a76617caf66b20fc0b5d" + }, + { + "dataPath": "params_shard_386.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.65.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "79c25c2e5848570e1280fbac1207d9fb" + }, + { + "dataPath": "params_shard_387.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.65.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "8c2fd79c1a0daefcd1e1fcedef0d4dda" + }, + { + "dataPath": "params_shard_388.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.65.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "19f037871b43b9dbeefe0d882c5ce9a5" + }, + { + "dataPath": "params_shard_389.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.66.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "c23b1feb32573c54050bece49acd9948" + }, + { + "dataPath": "params_shard_390.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.65.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.65.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.65.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.65.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.65.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.66.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "0e8f43644a3752c38b0cff2170fdcd54" + }, + { + "dataPath": "params_shard_391.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.66.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "c173abb2975eb81dad474fdadc2dbfd5" + }, + { + "dataPath": "params_shard_392.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.66.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "ee2b73d543b168f88461a67ea456b3e7" + }, + { + "dataPath": "params_shard_393.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.66.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "8d5a3ee7a52f446baf5e14137e290605" + }, + { + "dataPath": "params_shard_394.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.66.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "cfcfe4265f350a68b28331dbebd4a0dd" + }, + { + "dataPath": "params_shard_395.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.67.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "9e66778d86b0362c0fc14d9cc9974a2a" + }, + { + "dataPath": "params_shard_396.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.66.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.66.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.66.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.66.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.66.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.67.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "ddf24bfa76a5c960480da31d0db7db59" + }, + { + "dataPath": "params_shard_397.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.67.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "1821dab0bc646191f18298d81279fb8f" + }, + { + "dataPath": "params_shard_398.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.67.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "0e67adb32b1379ab5ef248ce84fa26bc" + }, + { + "dataPath": "params_shard_399.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.67.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "c61e983c17d98c47b09df75d7d0b89d8" + }, + { + "dataPath": "params_shard_400.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.67.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "ade972a27359d4fa55b56f538a90b689" + }, + { + "dataPath": "params_shard_401.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.68.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "ce35c889efb3635e82199b5c0b546036" + }, + { + "dataPath": "params_shard_402.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.68.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "758609781951e9f80e29d032f101f555" + }, + { + "dataPath": "params_shard_403.bin", + "format": "raw-shard", + "nbytes": 29876224, + "records": [ + { + "name": "model.layers.67.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.67.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.67.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.67.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.67.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.68.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24612864 + }, + { + "name": "model.layers.68.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24633344 + } + ], + "md5sum": "f2e0af3a02eefc5136c7b8bf871ba21c" + }, + { + "dataPath": "params_shard_404.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.68.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "12cb97b1183632cb3beedaab30a9b249" + }, + { + "dataPath": "params_shard_405.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.68.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "37048a5faa7b63d7d4bc8feff95fa4e9" + }, + { + "dataPath": "params_shard_406.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.68.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "9a0e10c36669025284f43157d8cc8cd2" + }, + { + "dataPath": "params_shard_407.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.69.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "6d65a9e28bc037b1ba85776558cda71a" + }, + { + "dataPath": "params_shard_408.bin", + "format": "raw-shard", + "nbytes": 19382272, + "records": [ + { + "name": "model.layers.68.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.68.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.68.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4210688 + }, + { + "name": "model.layers.68.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19349504 + }, + { + "name": "model.layers.69.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19365888 + } + ], + "md5sum": "c9f890bcd2340716d4df1061acc3970e" + }, + { + "dataPath": "params_shard_409.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.69.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "08a4cd3d5e03462e13ca49dde3bd12cb" + }, + { + "dataPath": "params_shard_410.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.69.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "7fac27774074cf07600c68646532c62e" + }, + { + "dataPath": "params_shard_411.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.69.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "642b35f308e8ef2f2555973014d2e8bc" + }, + { + "dataPath": "params_shard_412.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.69.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "54d3cd77cec331d82831aca4b55ed3a8" + }, + { + "dataPath": "params_shard_413.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.70.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "e9711e73a346a3563696e4b547c4c690" + }, + { + "dataPath": "params_shard_414.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.70.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "f3815fdf4dfd3b7a2338e23deef3431c" + }, + { + "dataPath": "params_shard_415.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.70.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "85d565823923e6726a2ab7ce09e80b33" + }, + { + "dataPath": "params_shard_416.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.70.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "519448cea1e4f94bcd731ff0bb3f3a82" + }, + { + "dataPath": "params_shard_417.bin", + "format": "raw-shard", + "nbytes": 29876224, + "records": [ + { + "name": "model.layers.69.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.69.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.69.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.69.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.69.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.70.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24612864 + }, + { + "name": "model.layers.70.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24633344 + } + ], + "md5sum": "d3579b98fd56e09cae96f7ddfeae448d" + }, + { + "dataPath": "params_shard_418.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "69216bd81bdaf832fe5c553e8053ed00" + }, + { + "dataPath": "params_shard_419.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "31428ab3b76ab1b47334730878a6c1bd" + }, + { + "dataPath": "params_shard_420.bin", + "format": "raw-shard", + "nbytes": 19382272, + "records": [ + { + "name": "model.layers.70.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4210688 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19349504 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19365888 + } + ], + "md5sum": "b1c86fa8bf11a058b43c84e6bf454385" + }, + { + "dataPath": "params_shard_421.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "e281af21f49d7491e400d8c7441a1d5f" + }, + { + "dataPath": "params_shard_422.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "4f0e476ca223f7f7b2fd3d34d6667272" + }, + { + "dataPath": "params_shard_423.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.8.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "6dffa03b67517c95728f170cb282af45" + }, + { + "dataPath": "params_shard_424.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "a7440242c907a61d2b1e0b4140342ef6" + }, + { + "dataPath": "params_shard_425.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "56f5cc333803fdb0525657fcd964a5e5" + }, + { + "dataPath": "params_shard_426.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "e781cff5fbfff24d8e17eb46c0e1cd58" + }, + { + "dataPath": "params_shard_427.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.9.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "c6a8a2629099fe8f130caaf41edb3169" + }, + { + "dataPath": "params_shard_428.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "be93407f1b986767b57215e11e38a90d" + }, + { + "dataPath": "params_shard_429.bin", + "format": "raw-shard", + "nbytes": 29876224, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.8.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.8.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.9.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24612864 + }, + { + "name": "model.layers.9.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24633344 + } + ], + "md5sum": "0c4594ad123b96aa21f36f34a17b6c9f" + }, + { + "dataPath": "params_shard_430.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.70.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "2d266fd49934dbb26bdb2fa61164f626" + }, + { + "dataPath": "params_shard_431.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.71.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "8f850db3533b29e3776bf2143c8f2927" + }, + { + "dataPath": "params_shard_432.bin", + "format": "raw-shard", + "nbytes": 19382272, + "records": [ + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.70.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.70.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4210688 + }, + { + "name": "model.layers.70.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19349504 + }, + { + "name": "model.layers.71.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19365888 + } + ], + "md5sum": "feb8bf695421fbeea29b1bb2d83aeacd" + }, + { + "dataPath": "params_shard_433.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.71.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "b473aff6d743be92c3ae468947188d34" + }, + { + "dataPath": "params_shard_434.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.71.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "8f867f3374fb2aa629c2394c281f0437" + }, + { + "dataPath": "params_shard_435.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.71.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "511d82073473bc16262816d011782d78" + }, + { + "dataPath": "params_shard_436.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.71.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "7f3ea13087408fa1276248e68429348c" + }, + { + "dataPath": "params_shard_437.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.72.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "82302060fde6a0c1ecb6c90e0a17f2ed" + }, + { + "dataPath": "params_shard_438.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.72.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "7bf74af1247a5512a1796ce29a7f6cd5" + }, + { + "dataPath": "params_shard_439.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.72.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "365f4194972a7985b69121404f858468" + }, + { + "dataPath": "params_shard_440.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.72.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "1c1b2426fac7da7353f31469015e257c" + }, + { + "dataPath": "params_shard_441.bin", + "format": "raw-shard", + "nbytes": 29876224, + "records": [ + { + "name": "model.layers.71.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.71.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.71.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.71.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.71.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.72.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24612864 + }, + { + "name": "model.layers.72.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24633344 + } + ], + "md5sum": "a3e55950eaf59b46267d242c4dcf2021" + }, + { + "dataPath": "params_shard_442.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.72.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "bd6cc29f07c6c547ad7e490be7308d20" + }, + { + "dataPath": "params_shard_443.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.73.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "7cd1683fe424bb788e6aa26250d1f74f" + }, + { + "dataPath": "params_shard_444.bin", + "format": "raw-shard", + "nbytes": 19382272, + "records": [ + { + "name": "model.layers.72.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.72.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.72.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4210688 + }, + { + "name": "model.layers.72.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19349504 + }, + { + "name": "model.layers.73.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19365888 + } + ], + "md5sum": "af50be7c5f704bfbc984ee5fc73e5761" + }, + { + "dataPath": "params_shard_445.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.73.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "d0d417fc42b0b60b98020eff921d25a8" + }, + { + "dataPath": "params_shard_446.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.73.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "ca42b8a35e229e647e0e9ac173028ccc" + }, + { + "dataPath": "params_shard_447.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.73.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "cccd036fa4d0962f9bc421011421b063" + }, + { + "dataPath": "params_shard_448.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.73.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "a535228bc09eb9a13d445c9db46ad1f3" + }, + { + "dataPath": "params_shard_449.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.74.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "2ee9410791d29943422ede710152a9d6" + }, + { + "dataPath": "params_shard_450.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.73.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.73.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.73.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.73.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.73.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.74.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "2a9226e5a3653324967fe85429ba4329" + }, + { + "dataPath": "params_shard_451.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.74.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "a665b0d3faaf7f27de9f410da91d2a36" + }, + { + "dataPath": "params_shard_452.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.74.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "e7000c1e6c2aa0aa20fd5e641dc0e9c7" + }, + { + "dataPath": "params_shard_453.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.74.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "f6317ed839a06e02e67fd2fc76ff676d" + }, + { + "dataPath": "params_shard_454.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.74.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "df6b8ac824ab692ce2da36c809e83f68" + }, + { + "dataPath": "params_shard_455.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.75.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "97046d872b3aadb51abe924866ccdf97" + }, + { + "dataPath": "params_shard_456.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.74.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.74.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.74.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.74.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.74.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.75.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "c01642b435f6779243df70d43d95ee36" + }, + { + "dataPath": "params_shard_457.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.75.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "179fb8fa72ddb6a2d8c7ae9ac74546cc" + }, + { + "dataPath": "params_shard_458.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.75.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "577509f422846b37322412f6ff0861dd" + }, + { + "dataPath": "params_shard_459.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.75.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "ef01adf4e428f2f4a4d565c9f6de6422" + }, + { + "dataPath": "params_shard_460.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.75.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "619fad262d2b8cbde84067031118042b" + }, + { + "dataPath": "params_shard_461.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.76.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "444e4aac5a215328391f507d5347f051" + }, + { + "dataPath": "params_shard_462.bin", + "format": "raw-shard", + "nbytes": 24629248, + "records": [ + { + "name": "model.layers.75.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.75.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.75.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.75.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.75.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.76.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 24612864 + } + ], + "md5sum": "fbac8ffc40d02cb0c3daece0ba1e2be0" + }, + { + "dataPath": "params_shard_463.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.76.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "71244c49fc240a3726c8875fdad997ff" + }, + { + "dataPath": "params_shard_464.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.76.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "0a2ac62f8a5bcbb79a836ac6a128dc05" + }, + { + "dataPath": "params_shard_465.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.76.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "27e04959714a57ba0da09bf993a7c45b" + }, + { + "dataPath": "params_shard_466.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.76.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "1f40752bf6b367ee0b11715539e2cd7a" + }, + { + "dataPath": "params_shard_467.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.77.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "c7eeb9ede25da1f2c08eab1eb80f57c9" + }, + { + "dataPath": "params_shard_468.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.77.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "6b17b6647ca7ea83d306dd422c259b07" + }, + { + "dataPath": "params_shard_469.bin", + "format": "raw-shard", + "nbytes": 29876224, + "records": [ + { + "name": "model.layers.76.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.76.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.76.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.76.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.76.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.77.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24612864 + }, + { + "name": "model.layers.77.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24633344 + } + ], + "md5sum": "3df8a294be75effc8e38cb087ee065b0" + }, + { + "dataPath": "params_shard_470.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.77.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "f1de750c63fd7fbf568e20567097b04f" + }, + { + "dataPath": "params_shard_471.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.77.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "40ced5f814bc16c779899c6d48ddc4b3" + }, + { + "dataPath": "params_shard_472.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.77.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "ae50bd9f83602fb110a22585f599726d" + }, + { + "dataPath": "params_shard_473.bin", + "format": "raw-shard", + "nbytes": 121110528, + "records": [ + { + "name": "model.layers.78.mlp.down_proj.q_weight", + "shape": [ + 8192, + 3696 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 121110528, + "byteOffset": 0 + } + ], + "md5sum": "4b50ba6e95a20fa72ddceecead177e17" + }, + { + "dataPath": "params_shard_474.bin", + "format": "raw-shard", + "nbytes": 19382272, + "records": [ + { + "name": "model.layers.77.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + }, + { + "name": "model.layers.77.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 4194304 + }, + { + "name": "model.layers.77.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 4210688 + }, + { + "name": "model.layers.77.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19349504 + }, + { + "name": "model.layers.78.input_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 19365888 + } + ], + "md5sum": "9721dd0e90f269ed395b823ae2ef8d18" + }, + { + "dataPath": "params_shard_475.bin", + "format": "raw-shard", + "nbytes": 242221056, + "records": [ + { + "name": "model.layers.78.mlp.gate_up_proj.q_weight", + "shape": [ + 59136, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 242221056, + "byteOffset": 0 + } + ], + "md5sum": "fffc18f71faae5177910f84417cbd6ba" + }, + { + "dataPath": "params_shard_476.bin", + "format": "raw-shard", + "nbytes": 30277632, + "records": [ + { + "name": "model.layers.78.mlp.gate_up_proj.q_scale", + "shape": [ + 59136, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 30277632, + "byteOffset": 0 + } + ], + "md5sum": "01c70600cfa286beaa766b86fc954acb" + }, + { + "dataPath": "params_shard_477.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.78.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "6da949382437dde33268eeba4535a023" + }, + { + "dataPath": "params_shard_478.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.78.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "75f8942fce96641b953c1844eb736ba7" + }, + { + "dataPath": "params_shard_479.bin", + "format": "raw-shard", + "nbytes": 41943040, + "records": [ + { + "name": "model.layers.79.self_attn.c_attn.q_weight", + "shape": [ + 10240, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 41943040, + "byteOffset": 0 + } + ], + "md5sum": "3efd2b96df9b8c2162a8c481c29028cb" + }, + { + "dataPath": "params_shard_480.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.79.self_attn.o_proj.q_weight", + "shape": [ + 8192, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "bb37b28b11f6f5d2cbff130bd2ccf285" + }, + { + "dataPath": "params_shard_481.bin", + "format": "raw-shard", + "nbytes": 29876224, + "records": [ + { + "name": "model.layers.78.mlp.down_proj.q_scale", + "shape": [ + 8192, + 924 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 15138816, + "byteOffset": 0 + }, + { + "name": "model.layers.78.post_attention_layernorm.weight", + "shape": [ + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 16384, + "byteOffset": 15138816 + }, + { + "name": "model.layers.78.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 15155200 + }, + { + "name": "model.layers.78.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 15175680 + }, + { + "name": "model.layers.78.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 20418560 + }, + { + "name": "model.layers.79.self_attn.c_attn.bias", + "shape": [ + 10240 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 20480, + "byteOffset": 24612864 + }, + { + "name": "model.layers.79.self_attn.c_attn.q_scale", + "shape": [ + 10240, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5242880, + "byteOffset": 24633344 + } + ], + "md5sum": "930256ddbc0037356c0be0f646c2f7f6" + }, + { + "dataPath": "params_shard_482.bin", + "format": "raw-shard", + "nbytes": 4194304, + "records": [ + { + "name": "model.layers.79.self_attn.o_proj.q_scale", + "shape": [ + 8192, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4194304, + "byteOffset": 0 + } + ], + "md5sum": "bcd19ca778b5296ea020b603cf772f7e" + } + ] +} \ No newline at end of file