diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,10103 @@ +{ + "metadata": { + "ParamSize": 709, + "ParamBytes": 18431289344.0, + "BitsPerParam": 3.2800260261877354 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 389283840, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 152064, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 389283840, + "byteOffset": 0 + } + ], + "md5sum": "85eeee228b7acf1d3573d414cca6a4fa" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 48660480, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 152064, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 48660480, + "byteOffset": 0 + } + ], + "md5sum": "dfe53916f79c91004f856defab88533f" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.63.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "a90808a40fe3ed3c0d9a14b17d1898c2" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.63.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "28e3386c8080abc7f883f18899502951" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 389283840, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 152064, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 389283840, + "byteOffset": 0 + } + ], + "md5sum": "f113a43b7a19a3131d97b0be7e26f57b" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 48660480, + "records": [ + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 152064, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 48660480, + "byteOffset": 0 + } + ], + "md5sum": "40ccdfbe7a5c19e32d5e104a2a4ba983" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "22fceacc6617d230e6f843a148b655fd" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 26583040, + "records": [ + { + "name": "model.layers.63.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 0 + }, + { + "name": "model.layers.63.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 10240 + }, + { + "name": "model.layers.63.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 8857600 + }, + { + "name": "model.layers.63.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 26552320 + }, + { + "name": "model.norm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 26562560 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 26572800 + } + ], + "md5sum": "636d1643c5ffd08c756f9c5e839e446c" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "52acbd1c142fc2815db5035f221fb2da" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.0.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "783f1a64645548527b3aad8d0afe2ad8" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 28860416, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 0 + }, + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 8847360 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 26542080 + }, + { + "name": "model.layers.0.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 26552320 + }, + { + "name": "model.layers.0.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 26566656 + } + ], + "md5sum": "4d7efeda756e2b8a6606a5c17c5ed3b2" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "84168424aa0509cfa58723d6421c8a38" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "3744ee7da8c201e1784f830958992aa1" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "bdc96ce69c763fabda1af3a2d5f8f06a" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.1.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "66674296da7b5325db4a4479104320f6" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.1.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.1.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "48b40918855192038bca9592949eff2e" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "05dc195d101c4303a0cfbe8221eb1119" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "700f031890fcb290c6eddb9767e19f50" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "cd8ebe07bc86bb65d50929714dbecacd" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.2.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "97f9c60f1c0b8e5ff46bb04428684a98" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.2.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.2.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "9e6b5d552594b3d34d6fcc61901e7183" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "f35b2aa9926572a184a5498da2f26c65" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.3.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "0bb0b07eda7a4deb31a6d8f0f6af96bd" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 32454656, + "records": [ + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 14745600 + }, + { + "name": "model.layers.3.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 32440320 + } + ], + "md5sum": "5b9db5e5e08da4f537968ba2dca32d19" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "452135d5e1d360777fec4c8cf80beda2" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "5d3d01b8d9d53c14459297ba6491bed1" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "6f643bf8b3700d6f4cfc503dbc73aa8e" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.10.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "d10a0ce6346bc48699c8bf7e46c73a86" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 28215296, + "records": [ + { + "name": "model.layers.3.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 0 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 2293760 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 15400960 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 17039360 + }, + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 17049600 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25896960 + }, + { + "name": "model.layers.10.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 25907200 + }, + { + "name": "model.layers.10.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 25921536 + } + ], + "md5sum": "79607a453cf36cc2d2ae64ddc966a6e7" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "82b851d3bc4843580512b9d605b90eec" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "c9c7b95d25b4fa0c8610e51194b22879" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "ecfb53a2b74163222988f60730564fef" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.11.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "b6f2c5bac294e41c812e8ee1f5d72f2e" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.11.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.11.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "26a1a14f830cef6c966734b857d65945" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "b7231ada028cea01dbc3bdea79fcb0d7" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "36a67c4f92a1b123faf938f54c1d753c" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "20e8c3bcaa48e5279f4e0c81643a288b" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.12.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "484f68515afd2a8ed4d16c38a2ddd552" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.12.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.12.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "0bfeee1ed855b9cca07ccd7f206d9dcf" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "385b52a7dc908340003b2533bbb00d08" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.13.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "04790d1ddc9245ae4500f109823e7614" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 32454656, + "records": [ + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 14745600 + }, + { + "name": "model.layers.13.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 32440320 + } + ], + "md5sum": "8fd2ce6f06f5d8db2a9cc8cede4086ce" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "f5b3047735b3f77e473f933ba90e8354" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "8147a0668271de2d7f2639b7bf9e9e44" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "9889cb559a97df0b7d92bd6edda1ba5c" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "8d5654d5014a9119940dd3c1dd9a2e15" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 25917440, + "records": [ + { + "name": "model.layers.13.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 0 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 2293760 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 15400960 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 17039360 + }, + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 17049600 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25896960 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25907200 + } + ], + "md5sum": "7931e8168faf6f9a16c30adb49c38fd3" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "2c92a48e686c3803c5c5469446862c3f" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.9.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "456c2fa585e684bc801201f29dbb1250" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 28860416, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 0 + }, + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 8847360 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 26542080 + }, + { + "name": "model.layers.9.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 26552320 + }, + { + "name": "model.layers.9.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 26566656 + } + ], + "md5sum": "ef4f405667fca151db355ad8511ff4bc" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "c6409b2d9c564b0ef9c18135a3ee57a7" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "7a80e3456dddf1724a452ac04000259b" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "20efebf5931d2a646624557ea6d44070" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "66ee978e4f744a69b986e7a384e22ce4" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.14.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "bf91e788b1e51294a567ee0bf8ff1de8" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 32495616, + "records": [ + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23613440 + }, + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 23623680 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32471040 + }, + { + "name": "model.layers.14.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 32481280 + } + ], + "md5sum": "f45bab583b9c319edbea51303aac1408" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "fefcdccb814f8a6da64ba249949c26de" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "b1388d3dee93947029bb5fd016a12dee" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "0a451786ed91c8f545339c5e0ee59163" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.15.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "79592e47cfd115b14360dc7c6c253b41" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 28215296, + "records": [ + { + "name": "model.layers.14.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 0 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 2293760 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 15400960 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 17039360 + }, + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 17049600 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25896960 + }, + { + "name": "model.layers.15.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 25907200 + }, + { + "name": "model.layers.15.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 25921536 + } + ], + "md5sum": "d7a55f1e2e7427c8a7ddcebdab354e53" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "0c4bb8eeb5db94cdfc479051e64cc543" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "ef64a97791cd8d8270f150fe179e5943" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "50bc74bc4f9abd8a6baff46fefa2a891" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.16.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "43d291f90d056acf085e50ef8d1b060a" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.16.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.16.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "d41f3bf8fc5c27ef8baeb1c18225189c" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "f915ebb44b8d2e5774ada7c19332b6a4" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "92a290b59585a24cf61d24fc3442fcc3" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "1a7637fdbc7665f21b252c8ca1d134c4" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.17.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "645451a6533122d0a5ed88f55a4ab375" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.17.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.17.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "230a1292dd846bcbb9385a7e75408ee0" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "3d69055690987f100c2a6b138d1ad630" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.18.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "c9ef1c0d2b9155f5c8b47fefe7627ebe" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 32454656, + "records": [ + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 14745600 + }, + { + "name": "model.layers.18.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 32440320 + } + ], + "md5sum": "ed46cfc5fc0bee9b456f0a1651d91ba9" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "29c8284a5664d79490aa01f1a997444b" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "05367f15633879b94d4f9d18885f6089" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 25917440, + "records": [ + { + "name": "model.layers.18.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 0 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 2293760 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 15400960 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 17039360 + }, + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 17049600 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25896960 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25907200 + } + ], + "md5sum": "5fd49d39ae58132d15e5bf08778ddbb8" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "1536b285a863d2383e7c17285dca4912" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.19.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "a9e68a7704444c42585fbe330e392acf" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 28860416, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 0 + }, + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 8847360 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 26542080 + }, + { + "name": "model.layers.19.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 26552320 + }, + { + "name": "model.layers.19.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 26566656 + } + ], + "md5sum": "fe35cb7c76ff5f04e465f6a0712a8b41" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "3ef91a00d81bf95bb9536989cdd18a56" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "407702f552cd1a08e37c5e05a5eb02bd" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "50500ca1e1a84e72508669f940f0059f" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.20.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "dbcbd85f0334606e02c31bfe36a509b6" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.20.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.20.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "b733756763aade2d0fd6eaa7dfc874e0" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "d69b81cd89c5cc73c3e18aa1c3191cda" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "ea5a002a35f0bfcd0daed351e96d1636" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "c8404f2aaa61ddfe14bed73167c4786b" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.21.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "b168102bf74a8f92a720520e4141e3fe" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.21.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.21.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "24a1bb0e1c08c5661162e45e1152bcf4" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "093389255b5b2478bdab4e79c4798d8b" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "30d3ffcbecfe0ed099d72d5fbb9d569a" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "c665abca93b69686af4b093938f95d87" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.22.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "fcc7310d8723760bee0aba6512d77653" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.22.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.22.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "440c1aa1847a33415cde46727aa7a29c" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "ff6143df6b3577854a032aca19ee13a1" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.23.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "ec405a2f35120db60f2828bf44295f35" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 32454656, + "records": [ + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 14745600 + }, + { + "name": "model.layers.23.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 32440320 + } + ], + "md5sum": "44533c2d66c5b56b2b70d65b3e77bf16" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "8bbd9ded38e23907b6dd44ae2794e19b" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "8e9d7448f76b69827bf5e535be74489c" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 25917440, + "records": [ + { + "name": "model.layers.23.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 0 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 2293760 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 15400960 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 17039360 + }, + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 17049600 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25896960 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25907200 + } + ], + "md5sum": "ef1b9828c8d9ddc18a8b1cbae9f00af7" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "46d8db0a1f7339370949d470ac3ae027" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.24.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "8a8c6fe96908be697533eb597b21a892" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 28860416, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 0 + }, + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 8847360 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 26542080 + }, + { + "name": "model.layers.24.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 26552320 + }, + { + "name": "model.layers.24.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 26566656 + } + ], + "md5sum": "d61a5847373bf85aeecfb353ab25cc7d" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "c888de72f0ee54b8b2676bc61fd03938" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "2ea0e88b40d1162eb6217d2cd4ea27a8" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "3680b0b5a5d1c899bbc45d35e36202cb" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.25.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "3e85fd828e1dffb045f9376320a21e9a" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.25.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.25.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "2424725e51c38699badaa04c37f68f8d" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "55864e3c2228f1479cc34434a00cd9cb" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "0bf8dd1019650a45d734e1cffb882b28" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "6a44ec6dd8d585a0f5a4eca63d87e868" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.26.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "340fe96ce2fdab1c557ebae93453abed" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.26.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.26.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "bbcff15c2e7d67c88b3b481a0e346a06" + }, + { + "dataPath": "params_shard_114.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "302673dc86afc6021293f3708967994f" + }, + { + "dataPath": "params_shard_115.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "8ac421d4126e619f6abc96f37516a474" + }, + { + "dataPath": "params_shard_116.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "941ea30b2ad24f70028d85db9b5e7501" + }, + { + "dataPath": "params_shard_117.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.27.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "6c05a907ff8f1513d430de73f0b43154" + }, + { + "dataPath": "params_shard_118.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.27.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.27.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "085cb3b87dbb134841c0e973ebbba8ea" + }, + { + "dataPath": "params_shard_119.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "cb5ba6d655f0857acd083b27bc61253a" + }, + { + "dataPath": "params_shard_120.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.28.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "4b6a09dff649fe19fb55285152e45145" + }, + { + "dataPath": "params_shard_121.bin", + "format": "raw-shard", + "nbytes": 32454656, + "records": [ + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.28.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 14745600 + }, + { + "name": "model.layers.28.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 32440320 + } + ], + "md5sum": "1546ef219b395101199bab076fb21878" + }, + { + "dataPath": "params_shard_122.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "3f698c87e688258da14da6523c6e1d5c" + }, + { + "dataPath": "params_shard_123.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "f77b1adbbe045c95eee7efb63cf12785" + }, + { + "dataPath": "params_shard_124.bin", + "format": "raw-shard", + "nbytes": 25917440, + "records": [ + { + "name": "model.layers.28.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 0 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 2293760 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 15400960 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 17039360 + }, + { + "name": "model.layers.28.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 17049600 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25896960 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25907200 + } + ], + "md5sum": "345c535ca37913c72750967416de3da4" + }, + { + "dataPath": "params_shard_125.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "35a773d6a5d6a8760b9df27645effbee" + }, + { + "dataPath": "params_shard_126.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.29.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "040100c6b53e63dd38f45377b6cc7020" + }, + { + "dataPath": "params_shard_127.bin", + "format": "raw-shard", + "nbytes": 28860416, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 0 + }, + { + "name": "model.layers.29.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 8847360 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 26542080 + }, + { + "name": "model.layers.29.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 26552320 + }, + { + "name": "model.layers.29.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 26566656 + } + ], + "md5sum": "ee9587811c73551e28e41f71097cedf6" + }, + { + "dataPath": "params_shard_128.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "4ee0ed533993b9a998c38adfcd285483" + }, + { + "dataPath": "params_shard_129.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "d992bab848969ca6c14501635905bca7" + }, + { + "dataPath": "params_shard_130.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "198c3af06aab33d72cdbac8fac1d1727" + }, + { + "dataPath": "params_shard_131.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.30.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "92463a89855d23ab2272d12e36084a6a" + }, + { + "dataPath": "params_shard_132.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.29.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.30.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.30.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.30.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "01d2bfa6a62a0efdbc50ee10ef9db9d2" + }, + { + "dataPath": "params_shard_133.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "846bd82df5cd8d7f2df5cfb1ba0f1807" + }, + { + "dataPath": "params_shard_134.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "7916a8d920dd7d82eb95c6926cc2433d" + }, + { + "dataPath": "params_shard_135.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "09937f56fb49648c50213ae254837f52" + }, + { + "dataPath": "params_shard_136.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.31.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "25e86594abf5b73e77dd7d60748f38c2" + }, + { + "dataPath": "params_shard_137.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.30.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.31.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.31.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.31.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "ba6ffb3169ded873674b8c9f72c6ba4d" + }, + { + "dataPath": "params_shard_138.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.32.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "d062fd2416632ba0ca47bf0754a9687a" + }, + { + "dataPath": "params_shard_139.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.32.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "93861e30f2f154bfbf25949f98449a39" + }, + { + "dataPath": "params_shard_140.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.32.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "edca9c989f62df96325dac33d2eecde9" + }, + { + "dataPath": "params_shard_141.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.32.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "baed023ad3a41b837368700e18ce56fa" + }, + { + "dataPath": "params_shard_142.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.31.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.32.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.32.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.32.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.32.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.32.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "b2bc910035146b5c6053b46e9c952324" + }, + { + "dataPath": "params_shard_143.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.33.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "1bd103c82c7161b088b91ae61b8faceb" + }, + { + "dataPath": "params_shard_144.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.33.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "211f9c8aec9e21282704b5c338ba34fe" + }, + { + "dataPath": "params_shard_145.bin", + "format": "raw-shard", + "nbytes": 32454656, + "records": [ + { + "name": "model.layers.32.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.32.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.33.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 14745600 + }, + { + "name": "model.layers.33.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 32440320 + } + ], + "md5sum": "021c5d979694959396b4b938be6d09bf" + }, + { + "dataPath": "params_shard_146.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "416a38e4c55c6d911c3671f3b90218e8" + }, + { + "dataPath": "params_shard_147.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "c6cec2dc3608d689acd2c60be3d7321a" + }, + { + "dataPath": "params_shard_148.bin", + "format": "raw-shard", + "nbytes": 25917440, + "records": [ + { + "name": "model.layers.33.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 0 + }, + { + "name": "model.layers.33.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 2293760 + }, + { + "name": "model.layers.33.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 15400960 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 17039360 + }, + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 17049600 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25896960 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25907200 + } + ], + "md5sum": "4d55def659bd1deaa89e67ce8fe480fc" + }, + { + "dataPath": "params_shard_149.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "026b928754b6f281c966a4afb24c5ec2" + }, + { + "dataPath": "params_shard_150.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.4.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "21949021bde1394eed48e792a68c9bc9" + }, + { + "dataPath": "params_shard_151.bin", + "format": "raw-shard", + "nbytes": 28860416, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 0 + }, + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 8847360 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 26542080 + }, + { + "name": "model.layers.4.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 26552320 + }, + { + "name": "model.layers.4.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 26566656 + } + ], + "md5sum": "06ea052a98f16b863c6a3a22680ce176" + }, + { + "dataPath": "params_shard_152.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "98644b298b7b2db0f82a18fe2120c8ae" + }, + { + "dataPath": "params_shard_153.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "694e57be31cc3c8421e3a69322a25cf6" + }, + { + "dataPath": "params_shard_154.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "f530217ee5412f7e7c62cd5e1f1c0b5c" + }, + { + "dataPath": "params_shard_155.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.5.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "0b9c76ebe23087031b6c72fd22af15ad" + }, + { + "dataPath": "params_shard_156.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.5.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.5.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "03ff9cf405fa287dd4ed037bb80d65bb" + }, + { + "dataPath": "params_shard_157.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "0d4b6cd3fd8f65afcdb243e45177edb6" + }, + { + "dataPath": "params_shard_158.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "48309a79083cfacd6130ea4957679f36" + }, + { + "dataPath": "params_shard_159.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "156f01c777032928fa825aeb600a4e36" + }, + { + "dataPath": "params_shard_160.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.6.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "b4591a9d6b121278ee3accd6ca1a8e3f" + }, + { + "dataPath": "params_shard_161.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.6.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.6.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "ab12c81681a4bb1963990d3bc85f57c7" + }, + { + "dataPath": "params_shard_162.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "c356a01c6864fae29f1fd012f1c4f817" + }, + { + "dataPath": "params_shard_163.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "b090a38249a8088db3291c7fbd99b248" + }, + { + "dataPath": "params_shard_164.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "4eff0f365249f5ae6ed486c1af6eb28b" + }, + { + "dataPath": "params_shard_165.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.7.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "59d21afa968bbab76f47e0bf27b4e129" + }, + { + "dataPath": "params_shard_166.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.7.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.7.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "798177e031be9bd9478e805b2a933788" + }, + { + "dataPath": "params_shard_167.bin", + "format": "raw-shard", + "nbytes": 33110016, + "records": [ + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.8.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 14745600 + }, + { + "name": "model.layers.8.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 14759936 + } + ], + "md5sum": "01f0907b69a65d2c79de8a1183d96375" + }, + { + "dataPath": "params_shard_168.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.33.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "04f6e41018990e5052ee61518578413b" + }, + { + "dataPath": "params_shard_169.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.34.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "b213a0551376618cb782744e57437759" + }, + { + "dataPath": "params_shard_170.bin", + "format": "raw-shard", + "nbytes": 25917440, + "records": [ + { + "name": "model.layers.8.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 0 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 2293760 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 15400960 + }, + { + "name": "model.layers.33.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 17039360 + }, + { + "name": "model.layers.33.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 17049600 + }, + { + "name": "model.layers.33.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25896960 + }, + { + "name": "model.layers.34.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25907200 + } + ], + "md5sum": "1f7ba37c305ce969087a474fe7ea9e43" + }, + { + "dataPath": "params_shard_171.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.34.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "127ee7635a72edef08396cf030294e80" + }, + { + "dataPath": "params_shard_172.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.34.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "20d065e36c0919de51859a7195b4cf17" + }, + { + "dataPath": "params_shard_173.bin", + "format": "raw-shard", + "nbytes": 28860416, + "records": [ + { + "name": "model.layers.34.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 0 + }, + { + "name": "model.layers.34.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 8847360 + }, + { + "name": "model.layers.34.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 26542080 + }, + { + "name": "model.layers.34.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 26552320 + }, + { + "name": "model.layers.34.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 26566656 + } + ], + "md5sum": "f17a2be14927274f3b54554028e98cbd" + }, + { + "dataPath": "params_shard_174.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.35.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "bd3666d592e9012ba122902461010202" + }, + { + "dataPath": "params_shard_175.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.35.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "e53517de1ea9cf79705cbbd29ae72807" + }, + { + "dataPath": "params_shard_176.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.35.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "e18c8725e45895c512e62d1fd1313891" + }, + { + "dataPath": "params_shard_177.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.35.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "407c7f98dc7eef57b8077dcc5fe05957" + }, + { + "dataPath": "params_shard_178.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.34.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.34.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.35.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.35.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.35.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.35.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.35.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "1327775a06a7fb81cbe43b08cdfe5d5c" + }, + { + "dataPath": "params_shard_179.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.36.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "3ea9b55bc162ddab9f350d6237fabe71" + }, + { + "dataPath": "params_shard_180.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.36.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "afbc3b0e4721d1fbbe8e87104a4aa729" + }, + { + "dataPath": "params_shard_181.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.36.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "580d3d7757a1d8b154d1e1ad5e5ab8cc" + }, + { + "dataPath": "params_shard_182.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.36.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "db64eb3a944831be487fa50c3fd20679" + }, + { + "dataPath": "params_shard_183.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.35.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.35.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.36.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.36.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.36.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.36.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.36.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "412921d0d93e46e5246b090360793a69" + }, + { + "dataPath": "params_shard_184.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.37.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "eed940d2e829b0418ce444564dd582bb" + }, + { + "dataPath": "params_shard_185.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.37.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "3eb9dfc0b1475f21cda3152e414a385c" + }, + { + "dataPath": "params_shard_186.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.37.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "44f5a92ff129b6aded3ea276c79cc4f4" + }, + { + "dataPath": "params_shard_187.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.37.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "37288aadeebd00308efd7e36b59c11e4" + }, + { + "dataPath": "params_shard_188.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.36.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.36.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.37.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.37.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.37.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.37.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.37.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "71e2a1444447caa599d472f1e1bebdd5" + }, + { + "dataPath": "params_shard_189.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.38.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "605200a32fd0377389d4e1d057e54e3c" + }, + { + "dataPath": "params_shard_190.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.38.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "3de6b39c115544abb723f51e131e6055" + }, + { + "dataPath": "params_shard_191.bin", + "format": "raw-shard", + "nbytes": 32454656, + "records": [ + { + "name": "model.layers.37.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.37.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.38.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 14745600 + }, + { + "name": "model.layers.38.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 32440320 + } + ], + "md5sum": "8f06f9e738d561b21abebb000ee6eb67" + }, + { + "dataPath": "params_shard_192.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.38.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "41d10770185340f9a5524f7f00efcb1d" + }, + { + "dataPath": "params_shard_193.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.39.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "d04809513be9424ee5f3f81a24c0f562" + }, + { + "dataPath": "params_shard_194.bin", + "format": "raw-shard", + "nbytes": 25917440, + "records": [ + { + "name": "model.layers.38.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 0 + }, + { + "name": "model.layers.38.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 2293760 + }, + { + "name": "model.layers.38.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 15400960 + }, + { + "name": "model.layers.38.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 17039360 + }, + { + "name": "model.layers.38.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 17049600 + }, + { + "name": "model.layers.38.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25896960 + }, + { + "name": "model.layers.39.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25907200 + } + ], + "md5sum": "c6db86a469fcc7251a0683d742a11da7" + }, + { + "dataPath": "params_shard_195.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.39.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "826f51f4bedcebc145dc0d20c0167bb5" + }, + { + "dataPath": "params_shard_196.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.39.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "034b987573e4efa5ce460dda9368cc80" + }, + { + "dataPath": "params_shard_197.bin", + "format": "raw-shard", + "nbytes": 28860416, + "records": [ + { + "name": "model.layers.39.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 0 + }, + { + "name": "model.layers.39.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 8847360 + }, + { + "name": "model.layers.39.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 26542080 + }, + { + "name": "model.layers.39.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 26552320 + }, + { + "name": "model.layers.39.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 26566656 + } + ], + "md5sum": "2b947c28c0781045c3c0ad98a9d812e3" + }, + { + "dataPath": "params_shard_198.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.40.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "be5bebad52a72d537777cd47c4124515" + }, + { + "dataPath": "params_shard_199.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.40.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "5fd679c0c1242831eaf1e1f8a4997652" + }, + { + "dataPath": "params_shard_200.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.40.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "8371ba26e6febe4e55cda2f57c3c552c" + }, + { + "dataPath": "params_shard_201.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.40.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "db0fc56f86760013f77fdab203d7d72b" + }, + { + "dataPath": "params_shard_202.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.39.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.39.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.40.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.40.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.40.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.40.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.40.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "c1e07d69c511dfe62f6dea877f165581" + }, + { + "dataPath": "params_shard_203.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.41.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "c26f9512cb7f47921e0e250a46422949" + }, + { + "dataPath": "params_shard_204.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.41.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "a7c9330d96981535644f861385f2f228" + }, + { + "dataPath": "params_shard_205.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.41.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "e6507cc50b49f17cb0712aa6ade6b3ae" + }, + { + "dataPath": "params_shard_206.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.41.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "0c902f40cfbd73dc410376dcd9868ddd" + }, + { + "dataPath": "params_shard_207.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.40.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.40.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.41.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.41.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.41.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.41.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.41.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "6c83cb4b44ae913f963c0309dc9dcf24" + }, + { + "dataPath": "params_shard_208.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.42.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "b75a635aba39f4dceba49c18b4dd811c" + }, + { + "dataPath": "params_shard_209.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.42.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "c4306b0ad5a566ac9562bf7455d8e4fe" + }, + { + "dataPath": "params_shard_210.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.42.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "b6032401d675be4f5bb041a5b3cd0daf" + }, + { + "dataPath": "params_shard_211.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.42.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "ce6c966f0545af437429718f8b64cdcb" + }, + { + "dataPath": "params_shard_212.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.41.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.41.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.42.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.42.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.42.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.42.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.42.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "81aaebd77d6fcbd28cacb229b664d92e" + }, + { + "dataPath": "params_shard_213.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.43.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "6d62429ad7fb15be83fce0c0967c07e2" + }, + { + "dataPath": "params_shard_214.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.43.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "9863264e0d8c9cbdbad4daed0b68bdff" + }, + { + "dataPath": "params_shard_215.bin", + "format": "raw-shard", + "nbytes": 32454656, + "records": [ + { + "name": "model.layers.42.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.42.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.43.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 14745600 + }, + { + "name": "model.layers.43.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 32440320 + } + ], + "md5sum": "dd26bee0b2ee85bb9929bc70b588fb11" + }, + { + "dataPath": "params_shard_216.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.43.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "9f611e6343ab86efb98113753f1642f3" + }, + { + "dataPath": "params_shard_217.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.44.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "f456eab162db784502b19145563b7998" + }, + { + "dataPath": "params_shard_218.bin", + "format": "raw-shard", + "nbytes": 25917440, + "records": [ + { + "name": "model.layers.43.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 0 + }, + { + "name": "model.layers.43.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 2293760 + }, + { + "name": "model.layers.43.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 15400960 + }, + { + "name": "model.layers.43.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 17039360 + }, + { + "name": "model.layers.43.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 17049600 + }, + { + "name": "model.layers.43.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25896960 + }, + { + "name": "model.layers.44.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25907200 + } + ], + "md5sum": "dcd96dbe92f948e1bfeb4d7848e1266b" + }, + { + "dataPath": "params_shard_219.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.44.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "ba0352a2292e4173491e51bbae330d5a" + }, + { + "dataPath": "params_shard_220.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.44.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "8195f9a68e664e0b3f8fadcd08d63e11" + }, + { + "dataPath": "params_shard_221.bin", + "format": "raw-shard", + "nbytes": 28860416, + "records": [ + { + "name": "model.layers.44.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 0 + }, + { + "name": "model.layers.44.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 8847360 + }, + { + "name": "model.layers.44.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 26542080 + }, + { + "name": "model.layers.44.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 26552320 + }, + { + "name": "model.layers.44.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 26566656 + } + ], + "md5sum": "d2ed40bf7c7954aa2516321d22838ada" + }, + { + "dataPath": "params_shard_222.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.45.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "b9d6d7b25fc49f1eeea498ea07c9f1e4" + }, + { + "dataPath": "params_shard_223.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.45.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "2b8273a17a28c7647c278c773c5a2699" + }, + { + "dataPath": "params_shard_224.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.45.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "a4ef78b12120b96efce2c513e6073639" + }, + { + "dataPath": "params_shard_225.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.45.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "756ac7f17bf591e948749cc5fdad87cf" + }, + { + "dataPath": "params_shard_226.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.44.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.44.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.45.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.45.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.45.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.45.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.45.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "4cc8646614814768e8914834153bfd77" + }, + { + "dataPath": "params_shard_227.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.46.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "13f26544152f1c34b32e3f8c461c0df8" + }, + { + "dataPath": "params_shard_228.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.46.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "ebc43e483847d6ee34773d11947c517a" + }, + { + "dataPath": "params_shard_229.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.46.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "321afed44dad91a6be3ceec4fb3359d3" + }, + { + "dataPath": "params_shard_230.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.46.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "606247ee979a0574ae9e60971e422af1" + }, + { + "dataPath": "params_shard_231.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.45.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.45.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.46.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.46.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.46.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.46.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.46.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "9f34784589280b192f7698b13e660983" + }, + { + "dataPath": "params_shard_232.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.47.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "fc584e097d63536471ec07864c1225b9" + }, + { + "dataPath": "params_shard_233.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.47.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "079c7af479096d539e7b2fc08152e309" + }, + { + "dataPath": "params_shard_234.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.47.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "dd45bdf005227efd0191a8092986009b" + }, + { + "dataPath": "params_shard_235.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.47.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "21c5022db8bb4019840560c4857d42b3" + }, + { + "dataPath": "params_shard_236.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.46.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.46.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.47.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.47.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.47.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.47.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.47.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "317cd4539733db9e070b60ece0b6e1b2" + }, + { + "dataPath": "params_shard_237.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.48.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "37dd0e460e7108955b223f1e71db6b77" + }, + { + "dataPath": "params_shard_238.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.48.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "9658b781bed78fd6a1c1af21acc53525" + }, + { + "dataPath": "params_shard_239.bin", + "format": "raw-shard", + "nbytes": 32454656, + "records": [ + { + "name": "model.layers.47.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.47.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.48.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 14745600 + }, + { + "name": "model.layers.48.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 32440320 + } + ], + "md5sum": "12e88b2a8926d189c2c865a73e0ad1bd" + }, + { + "dataPath": "params_shard_240.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.48.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "c59e75b029d2371ef2aee2ce05bf8f0a" + }, + { + "dataPath": "params_shard_241.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.49.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "cf989ffb4d8fcc9250d20611d262de1b" + }, + { + "dataPath": "params_shard_242.bin", + "format": "raw-shard", + "nbytes": 25917440, + "records": [ + { + "name": "model.layers.48.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 0 + }, + { + "name": "model.layers.48.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 2293760 + }, + { + "name": "model.layers.48.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 15400960 + }, + { + "name": "model.layers.48.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 17039360 + }, + { + "name": "model.layers.48.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 17049600 + }, + { + "name": "model.layers.48.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25896960 + }, + { + "name": "model.layers.49.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25907200 + } + ], + "md5sum": "b189e6770f039cc542913359de2987c0" + }, + { + "dataPath": "params_shard_243.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.49.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "5c8788fb74acec05330e244d5e7a43f4" + }, + { + "dataPath": "params_shard_244.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.49.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "4c8b54da4ae49bfb2fa034f1fb60b3ba" + }, + { + "dataPath": "params_shard_245.bin", + "format": "raw-shard", + "nbytes": 28860416, + "records": [ + { + "name": "model.layers.49.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 0 + }, + { + "name": "model.layers.49.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 8847360 + }, + { + "name": "model.layers.49.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 26542080 + }, + { + "name": "model.layers.49.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 26552320 + }, + { + "name": "model.layers.49.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 26566656 + } + ], + "md5sum": "cc84d50dcd7c6c57064ec4670a90e717" + }, + { + "dataPath": "params_shard_246.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.50.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "f9ee7aae16e1c0c6937a1d99e4572b15" + }, + { + "dataPath": "params_shard_247.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.50.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "9b554543a117ceb28f4cde2b9206fe5c" + }, + { + "dataPath": "params_shard_248.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.50.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "da490a0dcfdad0df800b09aaf479ebdf" + }, + { + "dataPath": "params_shard_249.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.50.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "372d3f89e7e33cf70f958743340f89ae" + }, + { + "dataPath": "params_shard_250.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.49.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.49.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.50.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.50.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.50.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.50.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.50.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "973169553682bdf9ee3f2934081e7c2f" + }, + { + "dataPath": "params_shard_251.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.51.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "f1f4ea06779f794902636f940fce4d18" + }, + { + "dataPath": "params_shard_252.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.51.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "0abce49c740e60c7e57d308ff93887ec" + }, + { + "dataPath": "params_shard_253.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.51.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "d91d9d25d01d40534b69252ae937ae62" + }, + { + "dataPath": "params_shard_254.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.51.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "a791493d37987bd37aa9a0aac3d5a3db" + }, + { + "dataPath": "params_shard_255.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.50.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.50.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.51.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.51.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.51.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.51.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.51.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "cc615ac6d42685b8e67ebcc8cbcfb760" + }, + { + "dataPath": "params_shard_256.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.52.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "226c0952d0ba3af1022f96e3f7142c7e" + }, + { + "dataPath": "params_shard_257.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.52.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "c5cde0220354d6cc7585be5885b566fc" + }, + { + "dataPath": "params_shard_258.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.52.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "c825e827b441718203fafa790c1f38d3" + }, + { + "dataPath": "params_shard_259.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.52.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "f5e951a2eac9a386ca565fea8fe977ec" + }, + { + "dataPath": "params_shard_260.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.51.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.51.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.52.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.52.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.52.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.52.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.52.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "5169c748f32b91a5af0713ea6463f906" + }, + { + "dataPath": "params_shard_261.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.53.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "16fdc07c74ffb20a42e5e85564c34d8a" + }, + { + "dataPath": "params_shard_262.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.53.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "3d2e0336739f08ddbb31055a2b46150b" + }, + { + "dataPath": "params_shard_263.bin", + "format": "raw-shard", + "nbytes": 32454656, + "records": [ + { + "name": "model.layers.52.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.52.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.53.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 14745600 + }, + { + "name": "model.layers.53.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 32440320 + } + ], + "md5sum": "589951c3a7cab670fb07818d4c3ad58a" + }, + { + "dataPath": "params_shard_264.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.53.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "b4f4c840bef5279104f06d64c449cd58" + }, + { + "dataPath": "params_shard_265.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.54.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "70ff3a5d8f67688e43ece99c4762936d" + }, + { + "dataPath": "params_shard_266.bin", + "format": "raw-shard", + "nbytes": 25917440, + "records": [ + { + "name": "model.layers.53.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 0 + }, + { + "name": "model.layers.53.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 2293760 + }, + { + "name": "model.layers.53.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 15400960 + }, + { + "name": "model.layers.53.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 17039360 + }, + { + "name": "model.layers.53.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 17049600 + }, + { + "name": "model.layers.53.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25896960 + }, + { + "name": "model.layers.54.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25907200 + } + ], + "md5sum": "e73d16c54599de8adc19e7843776191a" + }, + { + "dataPath": "params_shard_267.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.54.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "85e7c46beeeee9557cc119770be89285" + }, + { + "dataPath": "params_shard_268.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.54.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "b4ac1f0bf728c6f4139fdf546402aba4" + }, + { + "dataPath": "params_shard_269.bin", + "format": "raw-shard", + "nbytes": 28860416, + "records": [ + { + "name": "model.layers.54.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 0 + }, + { + "name": "model.layers.54.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 8847360 + }, + { + "name": "model.layers.54.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 26542080 + }, + { + "name": "model.layers.54.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 26552320 + }, + { + "name": "model.layers.54.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 26566656 + } + ], + "md5sum": "37ab07701a3779c5c9ea7ddd1dcfa7af" + }, + { + "dataPath": "params_shard_270.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.55.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "3a72bf621d32c9750f4bc18fe86bcc1b" + }, + { + "dataPath": "params_shard_271.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.55.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "0ad6c110eb17e10f236e2c2bb4d424f3" + }, + { + "dataPath": "params_shard_272.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.55.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "a787ec1a4a657249861070f635fbe942" + }, + { + "dataPath": "params_shard_273.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.55.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "e3a0edb3d6d72abb384b608fb0051df8" + }, + { + "dataPath": "params_shard_274.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.54.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.54.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.55.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.55.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.55.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.55.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.55.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "040cda2c007fbd175435bd4321eed5b2" + }, + { + "dataPath": "params_shard_275.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.56.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "2e65f508490e5d6d0813e6e3143c7123" + }, + { + "dataPath": "params_shard_276.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.56.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "f4d31f4d710e8a53d5134cd4f14b2ef0" + }, + { + "dataPath": "params_shard_277.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.56.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "61b549108552ec735beebd8038f09457" + }, + { + "dataPath": "params_shard_278.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.56.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "0aa4a6d53530d2e86713b507b2fd8ab7" + }, + { + "dataPath": "params_shard_279.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.55.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.55.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.56.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.56.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.56.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.56.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.56.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "dfb96902de296f8908064e76fdda82bf" + }, + { + "dataPath": "params_shard_280.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.57.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "d1135638981f95ca80d9584073a1380d" + }, + { + "dataPath": "params_shard_281.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.57.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "a5787499087d639928ce44207b7d46ec" + }, + { + "dataPath": "params_shard_282.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.57.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "48464a8eba3331f5074a667d9ed08f4f" + }, + { + "dataPath": "params_shard_283.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.57.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "5aeb35f9d93c85d401cd9ec3b69f1cea" + }, + { + "dataPath": "params_shard_284.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.56.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.56.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.57.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.57.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.57.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.57.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.57.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "507d68326febebf9ac15cdd83e9cdb6a" + }, + { + "dataPath": "params_shard_285.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.58.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "3ae35714e3ba586740b99ca57f52b766" + }, + { + "dataPath": "params_shard_286.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.58.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "807d2bf041134199a2ff6be7da39f0f6" + }, + { + "dataPath": "params_shard_287.bin", + "format": "raw-shard", + "nbytes": 32454656, + "records": [ + { + "name": "model.layers.57.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.57.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.58.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 14745600 + }, + { + "name": "model.layers.58.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 32440320 + } + ], + "md5sum": "dc6eae71f505301aacb6fef2075ca821" + }, + { + "dataPath": "params_shard_288.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.58.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "b63ba91e8391691d72a507450ac7a880" + }, + { + "dataPath": "params_shard_289.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.59.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "99d11dca6ac45e18a8a9db47c5450b66" + }, + { + "dataPath": "params_shard_290.bin", + "format": "raw-shard", + "nbytes": 25917440, + "records": [ + { + "name": "model.layers.58.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 0 + }, + { + "name": "model.layers.58.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 2293760 + }, + { + "name": "model.layers.58.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 15400960 + }, + { + "name": "model.layers.58.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 17039360 + }, + { + "name": "model.layers.58.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 17049600 + }, + { + "name": "model.layers.58.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25896960 + }, + { + "name": "model.layers.59.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25907200 + } + ], + "md5sum": "88d257879d91136139a9618f18f7bced" + }, + { + "dataPath": "params_shard_291.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.59.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "32d530daa9b5ae0173f78045077902bb" + }, + { + "dataPath": "params_shard_292.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.59.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "80849a7065311fa72aeff65379a17d79" + }, + { + "dataPath": "params_shard_293.bin", + "format": "raw-shard", + "nbytes": 28860416, + "records": [ + { + "name": "model.layers.59.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 0 + }, + { + "name": "model.layers.59.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 8847360 + }, + { + "name": "model.layers.59.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 26542080 + }, + { + "name": "model.layers.59.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 26552320 + }, + { + "name": "model.layers.59.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 26566656 + } + ], + "md5sum": "2ef89df0a695e4b78c50d57cd44f4e1e" + }, + { + "dataPath": "params_shard_294.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.60.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "cd743d35ea2afeb28f3443a677d2e061" + }, + { + "dataPath": "params_shard_295.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.60.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "74b0168e9af68ebba083f09686dd0f4d" + }, + { + "dataPath": "params_shard_296.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.60.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "b5b0e3055c38cd784869613cff7637ea" + }, + { + "dataPath": "params_shard_297.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.60.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "a98604dbcaa2e1034cc86496ce016fda" + }, + { + "dataPath": "params_shard_298.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.59.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.59.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.60.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.60.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.60.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.60.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.60.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "d42f8f4ab6232929ae90b958f4fa25f6" + }, + { + "dataPath": "params_shard_299.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.61.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "362064a5d71a035f2114f11dd95f3db0" + }, + { + "dataPath": "params_shard_300.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.61.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "166dcc956f5582adf31fb00d48fd4de7" + }, + { + "dataPath": "params_shard_301.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.61.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "e73eea9b4258f788f24403eb3baeb9f2" + }, + { + "dataPath": "params_shard_302.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.61.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "79c6d60a8376b091d5a3581cc8dc2adf" + }, + { + "dataPath": "params_shard_303.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.60.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.60.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.61.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.61.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.61.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.61.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.61.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "f092a176420452cb12f86d306190a3f9" + }, + { + "dataPath": "params_shard_304.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.62.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "34bf9fc03eea17df4c318fd52ac9a4ca" + }, + { + "dataPath": "params_shard_305.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.62.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "1c2885467e1e33ed0fbd9dbdfe14daf2" + }, + { + "dataPath": "params_shard_306.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.62.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "a38c36e077eb1318f9842059c76878fc" + }, + { + "dataPath": "params_shard_307.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.62.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "befe7519130998f9ffdb34a386c487bc" + }, + { + "dataPath": "params_shard_308.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.61.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.61.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.62.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.62.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.62.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.62.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.62.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "dd30a16d3458286c3b66494b3def1734" + }, + { + "dataPath": "params_shard_309.bin", + "format": "raw-shard", + "nbytes": 33110016, + "records": [ + { + "name": "model.layers.62.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.62.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.63.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 14745600 + }, + { + "name": "model.layers.63.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 14759936 + } + ], + "md5sum": "3dac58ee2b1ae2acfe865ccc615342dd" + }, + { + "dataPath": "params_shard_310.bin", + "format": "raw-shard", + "nbytes": 17039360, + "records": [ + { + "name": "model.layers.63.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 0 + }, + { + "name": "model.layers.63.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 2293760 + }, + { + "name": "model.layers.63.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 15400960 + } + ], + "md5sum": "abfdfd40d2173aea2474cb20f863f116" + } + ] +} \ No newline at end of file