{ "metadata": { "ParamSize": 325, "ParamBytes": 3048549376.0, "BitsPerParam": 2.5538732071354877 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 52736000, "records": [ { "name": "lm_head.q_weight", "shape": [ 32000, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52736000, "byteOffset": 0 } ], "md5sum": "7f67cb8c6e5505c50ee5c8e1970a09ef" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 22016, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "b1fdf1cf62f1d2e47081d4aa066d4a61" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 22016, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "fcf3a5fb21c9ecacfea204f40b316621" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 31500800, "records": [ { "name": "lm_head.q_scale", "shape": [ 32000, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6592000, "byteOffset": 0 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 6592000 }, { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 6600192 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11135488 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11143680 }, { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 4096, 1104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 11151872 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 29239808 } ], "md5sum": "2b0c03123e554936c92a8cf67f6997be" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 27325440, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 0 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4535296 }, { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 12288, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 4543488 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 24794112 } ], "md5sum": "7cc9313ab5a9c4de5be3acf2d5aca401" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 22016, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "006055412f9c85c4e9987f49960cdbd2" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 20250624, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 12288, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 0 } ], "md5sum": "71791370902ffeffc4008daac3286da8" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 32494592, "records": [ { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 0 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 6750208 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7593984 }, { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 4096, 1104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 7602176 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 25690112 }, { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27951104 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32486400 } ], "md5sum": "92d42491cab29547e76bdcd56c3f2b26" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 22016, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "5c8c8dbfb8f8e65ec45dca1ea002935e" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 30482432, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 0 }, { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 2531328 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 9281536 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10125312 }, { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 4096, 1104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 10133504 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 28221440 } ], "md5sum": "700cc613612daa123563c4043956f996" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 27325440, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 0 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4535296 }, { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 12288, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 4543488 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 24794112 } ], "md5sum": "3e954e202c23d3e78e81c833b4dde922" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 22016, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "e158b29ab770a8444d75888e44fa9557" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 20250624, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 12288, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 0 } ], "md5sum": "580f79470116fb050e2cc40db2e1d3d1" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 32494592, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 6750208 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7593984 }, { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 4096, 1104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 7602176 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 25690112 }, { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27951104 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32486400 } ], "md5sum": "115f8bbee9577fa147c999c52a28790f" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 22016, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "7d81b8f15fc193c11b88795c5713334e" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 30482432, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 0 }, { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 2531328 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 9281536 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10125312 }, { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 4096, 1104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 10133504 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 28221440 } ], "md5sum": "2f0d7cf3803c0a92410ce72db71dedcc" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 27325440, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 0 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4535296 }, { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 12288, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 4543488 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 24794112 } ], "md5sum": "7d823963e3868f4b5f6990c6fd11741d" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 22016, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "335419cc588b2367c41ecdfca33ab78f" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 20250624, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 12288, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 0 } ], "md5sum": "562bc79851a408c4ee973418ca980f77" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 32494592, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 0 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 6750208 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7593984 }, { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 4096, 1104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 7602176 }, { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 25690112 }, { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27951104 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32486400 } ], "md5sum": "292ef0fdf9bcb0a65faa07439d63605d" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 22016, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "08667ce83e35e8378dcfdb3793808040" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 30482432, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 0 }, { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 2531328 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 9281536 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10125312 }, { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 4096, 1104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 10133504 }, { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 28221440 } ], "md5sum": "9d48a18a65dec911b63af99666aa5178" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 27325440, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 0 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4535296 }, { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 12288, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 4543488 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 24794112 } ], "md5sum": "ce9c3905bfd5724b422a34cbf7b73efe" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 22016, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "9f2674ff4f5f95d89146ba9e2d742db7" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 20250624, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 12288, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 0 } ], "md5sum": "1071add4d623739bb71058e48527ad04" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 32494592, "records": [ { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 0 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 6750208 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7593984 }, { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 4096, 1104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 7602176 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 25690112 }, { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27951104 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32486400 } ], "md5sum": "a815120a564e4cb20d1ef765a6a5ac81" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 22016, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "7317a4b2f3e5c44fa33ee96f349a6afa" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 30482432, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 0 }, { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 2531328 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 9281536 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10125312 }, { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 4096, 1104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 10133504 }, { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 28221440 } ], "md5sum": "6d066bfa24437c68365bcc5cf34bc4d9" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 27325440, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 0 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4535296 }, { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 12288, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 4543488 }, { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 24794112 } ], "md5sum": "4082f2e536ace9ce9d34be40c3b73068" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 52736000, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 32000, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52736000, "byteOffset": 0 } ], "md5sum": "95f3558b32a5d7aef7d33f4755b1cd04" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 32290304, "records": [ { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 0 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 6750208 }, { "name": "model.norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7593984 }, { "name": "model.embed_tokens.q_scale", "shape": [ 32000, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6592000, "byteOffset": 7602176 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 14194176 }, { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 4096, 1104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 14202368 } ], "md5sum": "86834f48c8e88499de3f0d74ba3dfc6d" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 22016, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "37d20af406cdab61d449bb2e4aecb8bb" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 29586432, "records": [ { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 0 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 2260992 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 6796288 }, { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 12288, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 6804480 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 27055104 } ], "md5sum": "b9e281d8ea72ebc6f05294de99a299aa" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 22016, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "ba6dcaee7d1e957b29f145a71ec8aadb" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 20250624, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 12288, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 0 } ], "md5sum": "181bf34e268bc2464bf5144e8b876a66" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 32494592, "records": [ { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 6750208 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7593984 }, { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 4096, 1104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 7602176 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 25690112 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27951104 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32486400 } ], "md5sum": "ec4b51058311dfa0b54f087d14bfc164" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 22016, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "4d7e3057e30a978cf3cd4d6794c415f8" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 30482432, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 2531328 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 9281536 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10125312 }, { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 4096, 1104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 10133504 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 28221440 } ], "md5sum": "fb1f21157ac043fd9e72eee134078542" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 27325440, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 0 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4535296 }, { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 12288, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 4543488 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 24794112 } ], "md5sum": "db3a0d0acc25acbb12a96caedaf83a0f" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 18087936, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 4096, 1104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 0 } ], "md5sum": "5f821d2be3d18039c8d54192eca45537" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 22016, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "c5a84aad16f4bda812e9bf1a6511941b" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 32645120, "records": [ { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 6750208 }, { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 12288, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 7593984 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 27844608 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 30375936 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 30384128 } ], "md5sum": "9ad04c6d7b23b84c6da9c52c98aae625" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 27325440, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 0 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4535296 }, { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 12288, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 4543488 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 24794112 } ], "md5sum": "37bd0b9c2d6628bb1985b4ddd9000847" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 22016, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "e1260d6df2c16b20156e08dd68887e77" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 20250624, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 12288, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 0 } ], "md5sum": "d6b35e885f43d59f2b0c9b8b2c2d3ad8" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 32494592, "records": [ { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 6750208 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7593984 }, { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 4096, 1104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 7602176 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 25690112 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27951104 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32486400 } ], "md5sum": "7d4d2e3ad8cc2cc7933e7f4cb415bc63" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 22016, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "efb2f4caf5045106641fcc446a6f3d8f" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 30482432, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 2531328 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 9281536 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10125312 }, { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 4096, 1104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 10133504 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 28221440 } ], "md5sum": "861cca4ea264d3cf99e5d91849918bcf" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 27325440, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 0 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4535296 }, { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 12288, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 4543488 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 24794112 } ], "md5sum": "d9efdfbc9f0b85b97fe8558aa36f7cfb" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 22016, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "b062eab64137f8d8226042898dd8c79e" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 20250624, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 12288, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 0 } ], "md5sum": "a56f35c0f1c8f442df75712429a34279" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 32494592, "records": [ { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 6750208 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7593984 }, { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 4096, 1104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 7602176 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 25690112 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27951104 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32486400 } ], "md5sum": "24d466a9777fbaed22fd76609702f5ff" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 22016, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "9e8be5e9a64a07a26c0894ee50ed7a81" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 30482432, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 2531328 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 9281536 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10125312 }, { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 4096, 1104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 10133504 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 28221440 } ], "md5sum": "d7e32d815061fa7f1b7d2b7bc60040d5" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 27325440, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 0 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4535296 }, { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 12288, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 4543488 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 24794112 } ], "md5sum": "a9800661d63deb0eade5568382fc59e8" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 22016, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "aba1d5a2a4f2705d463b123c0ae0a47b" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 20250624, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 12288, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 0 } ], "md5sum": "223d105d59b48d99c4aec9af6b035bd1" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 32494592, "records": [ { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 6750208 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7593984 }, { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 4096, 1104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 7602176 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 25690112 }, { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27951104 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32486400 } ], "md5sum": "87b81ed623c47ba1f097e2937fb4c5f7" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 22016, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "a517eb04bd33442ad87032c69f65d16a" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 30482432, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 2531328 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 9281536 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10125312 }, { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 4096, 1104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 10133504 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 28221440 } ], "md5sum": "448175d0a5ef0c1cc348001ff2b91583" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 27325440, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 0 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4535296 }, { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 12288, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 4543488 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 24794112 } ], "md5sum": "ada764d26b22c88cd2f3d7700c9054d9" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 22016, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "37c166684a4bb901e88713357146cea5" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 20250624, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 12288, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 0 } ], "md5sum": "db6904af780776a12a1689a6e04522b2" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 32494592, "records": [ { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 6750208 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7593984 }, { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 4096, 1104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 7602176 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 25690112 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27951104 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32486400 } ], "md5sum": "2250e259c481aff65d3bd111ea357242" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 22016, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "02a76d2abb10e41fe652d94254f7b4f1" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 30482432, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 2531328 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 9281536 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10125312 }, { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 4096, 1104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 10133504 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 28221440 } ], "md5sum": "d22a3c07b839aea9e8bbd2bb293e8c0c" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 22016, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "75cc8b0e12855ee1e0a893cfb63f10c7" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 32494592, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 0 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4535296 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 4543488 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 11293696 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 12137472 }, { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 4096, 1104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 12145664 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 30233600 } ], "md5sum": "83bf6a1c58c7a5833fed9d1596c30238" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 27325440, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 0 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4535296 }, { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 12288, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 4543488 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 24794112 } ], "md5sum": "9feee3ea088590eddb23d1087196e2f5" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 22016, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "dfb46741d6935083e31a53fbb3ab5dbd" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 20250624, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 12288, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 0 } ], "md5sum": "035aadc265c894e2eecd420964efcdcc" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 32494592, "records": [ { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 6750208 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7593984 }, { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 4096, 1104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 7602176 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 25690112 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27951104 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32486400 } ], "md5sum": "60cdcff99209c52c1ecbc0787020f338" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 22016, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "d771721dbbed49a3e35fcf72e51238a4" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 30482432, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 2531328 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 9281536 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10125312 }, { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 4096, 1104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 10133504 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 28221440 } ], "md5sum": "2b9aa24dde37c8fc7e6f564723e2f9b2" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 27325440, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 0 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4535296 }, { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 12288, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 4543488 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 24794112 } ], "md5sum": "5bc7cb1ce49af013c51c1239e3a9f4c9" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 22016, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "2b9ee4cd1dfbeec260c904043c43f63a" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 20250624, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 12288, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 0 } ], "md5sum": "367ffe3b027415311194de0650f979ce" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 32494592, "records": [ { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 6750208 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7593984 }, { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 4096, 1104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 7602176 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 25690112 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27951104 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32486400 } ], "md5sum": "d7297071db694cbaa0d67a9dc6f674e4" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 22016, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "db1b360554f8dc2af73506a285a67da4" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 30482432, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 2531328 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 9281536 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10125312 }, { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 4096, 1104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 10133504 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 28221440 } ], "md5sum": "aa43ac8a9ae4bf7e298b85a239763258" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 27325440, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 0 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4535296 }, { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 12288, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 4543488 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 24794112 } ], "md5sum": "d56b497f4fb640bb650dc402834d1d6e" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 22016, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "9772dbed16ae9bfa9137fde1a089e96c" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 20250624, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 12288, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 0 } ], "md5sum": "5d51fa0f8f85b0b60b2ce56033a391ed" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 32494592, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 0 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 6750208 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7593984 }, { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 4096, 1104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 7602176 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 25690112 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27951104 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32486400 } ], "md5sum": "7cd3b362ee9fa9273f2a7838d2f1cf75" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 22016, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "f77a11d1917c2be5c6ce2f3f41e8260b" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 30482432, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 0 }, { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 2531328 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 9281536 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10125312 }, { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 4096, 1104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 10133504 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 28221440 } ], "md5sum": "a78e69995e9516c3c1ac6d33c098de61" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 27325440, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 0 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4535296 }, { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 12288, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 4543488 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 24794112 } ], "md5sum": "0e7b4ee4854039dde8072cadee3ede75" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 22016, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "fc293b47f270f590db9d64d7387be2b6" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 20250624, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 12288, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 0 } ], "md5sum": "52962b364ac517716157337e351e72c6" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 32494592, "records": [ { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 6750208 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7593984 }, { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 4096, 1104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 7602176 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 25690112 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27951104 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32486400 } ], "md5sum": "e86ce7ab92b12c6c7a5b0c804895126b" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 22016, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "831b1395d2fb9be8b06918ff6449a5af" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 30482432, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 0 }, { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 2531328 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 9281536 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10125312 }, { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 4096, 1104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 10133504 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 28221440 } ], "md5sum": "d2ce979022a5e600b9d135e7e1db82c9" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 27325440, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 0 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4535296 }, { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 12288, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 4543488 }, { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 24794112 } ], "md5sum": "5442fe4f578202a66a5657d7ce6826e5" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 22016, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "52bd3a883285026f50ee18dbc98a4845" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 20250624, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 12288, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 0 } ], "md5sum": "5a6fb54ee7e5e4f006fc6dfd74a7aa85" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 32494592, "records": [ { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 6750208 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7593984 }, { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 4096, 1104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 7602176 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 25690112 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 22016, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27951104 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32486400 } ], "md5sum": "d01558f6d7176cb33c1876d1a15b7bf7" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 20250624, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 12288, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 0 } ], "md5sum": "26fe36ea755a154224f104e519a6fac7" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 33005568, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 2531328 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 9281536 }, { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 4096, 1104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 10125312 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 4096, 276 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 28213248 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 12288, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 30474240 } ], "md5sum": "e134b4b74571f3871ac7944a5a32e93d" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 7593984, "records": [ { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 4096, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 0 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 4096, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 6750208 } ], "md5sum": "18726bb1bca7c943f4967bcc38dde874" } ] }