airoboros-l2-7b-3.0-w3a16g40sym / ndarray-cache.json
numen-tech's picture
Add weights
7778704
{
"metadata": {
"ParamSize": 325,
"ParamBytes": 3048549376.0,
"BitsPerParam": 2.5538732071354877
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 52736000,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
32000,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52736000,
"byteOffset": 0
}
],
"md5sum": "7f67cb8c6e5505c50ee5c8e1970a09ef"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "b1fdf1cf62f1d2e47081d4aa066d4a61"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "fcf3a5fb21c9ecacfea204f40b316621"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 31500800,
"records": [
{
"name": "lm_head.q_scale",
"shape": [
32000,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6592000,
"byteOffset": 0
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 6592000
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 6600192
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11135488
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 11143680
},
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 11151872
},
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 29239808
}
],
"md5sum": "2b0c03123e554936c92a8cf67f6997be"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "7cc9313ab5a9c4de5be3acf2d5aca401"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "006055412f9c85c4e9987f49960cdbd2"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.24.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "71791370902ffeffc4008daac3286da8"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "92d42491cab29547e76bdcd56c3f2b26"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "5c8c8dbfb8f8e65ec45dca1ea002935e"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.24.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 10133504
},
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 28221440
}
],
"md5sum": "700cc613612daa123563c4043956f996"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "3e954e202c23d3e78e81c833b4dde922"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "e158b29ab770a8444d75888e44fa9557"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.26.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "580f79470116fb050e2cc40db2e1d3d1"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "115f8bbee9577fa147c999c52a28790f"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "7d81b8f15fc193c11b88795c5713334e"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.26.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 10133504
},
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 28221440
}
],
"md5sum": "2f0d7cf3803c0a92410ce72db71dedcc"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.27.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.27.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "7d823963e3868f4b5f6990c6fd11741d"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "335419cc588b2367c41ecdfca33ab78f"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.28.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "562bc79851a408c4ee973418ca980f77"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.28.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.28.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.28.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "292ef0fdf9bcb0a65faa07439d63605d"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "08667ce83e35e8378dcfdb3793808040"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.28.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.28.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.28.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.layers.29.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 10133504
},
{
"name": "model.layers.29.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 28221440
}
],
"md5sum": "9d48a18a65dec911b63af99666aa5178"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "ce9c3905bfd5724b422a34cbf7b73efe"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "9f2674ff4f5f95d89146ba9e2d742db7"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.30.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "1071add4d623739bb71058e48527ad04"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.29.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.29.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.30.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.30.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.30.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "a815120a564e4cb20d1ef765a6a5ac81"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "7317a4b2f3e5c44fa33ee96f349a6afa"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.30.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.30.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.30.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.layers.31.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 10133504
},
{
"name": "model.layers.31.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 28221440
}
],
"md5sum": "6d066bfa24437c68365bcc5cf34bc4d9"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.31.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.31.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "4082f2e536ace9ce9d34be40c3b73068"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 52736000,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
32000,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 52736000,
"byteOffset": 0
}
],
"md5sum": "95f3558b32a5d7aef7d33f4755b1cd04"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 32290304,
"records": [
{
"name": "model.layers.31.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.31.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.embed_tokens.q_scale",
"shape": [
32000,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6592000,
"byteOffset": 7602176
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 14194176
},
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 14202368
}
],
"md5sum": "86834f48c8e88499de3f0d74ba3dfc6d"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "37d20af406cdab61d449bb2e4aecb8bb"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 29586432,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 0
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 2260992
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 6796288
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 6804480
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 27055104
}
],
"md5sum": "b9e281d8ea72ebc6f05294de99a299aa"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "ba6dcaee7d1e957b29f145a71ec8aadb"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.1.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "181bf34e268bc2464bf5144e8b876a66"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "ec4b51058311dfa0b54f087d14bfc164"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "4d7e3057e30a978cf3cd4d6794c415f8"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.1.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 10133504
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 28221440
}
],
"md5sum": "fb1f21157ac043fd9e72eee134078542"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "db3a0d0acc25acbb12a96caedaf83a0f"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 18087936,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 0
}
],
"md5sum": "5f821d2be3d18039c8d54192eca45537"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "c5a84aad16f4bda812e9bf1a6511941b"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 32645120,
"records": [
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 7593984
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 27844608
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 30375936
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 30384128
}
],
"md5sum": "9ad04c6d7b23b84c6da9c52c98aae625"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "37bd0b9c2d6628bb1985b4ddd9000847"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "e1260d6df2c16b20156e08dd68887e77"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.3.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "d6b35e885f43d59f2b0c9b8b2c2d3ad8"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "7d4d2e3ad8cc2cc7933e7f4cb415bc63"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "efb2f4caf5045106641fcc446a6f3d8f"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.3.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 10133504
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 28221440
}
],
"md5sum": "861cca4ea264d3cf99e5d91849918bcf"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "d9efdfbc9f0b85b97fe8558aa36f7cfb"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "b062eab64137f8d8226042898dd8c79e"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.5.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "a56f35c0f1c8f442df75712429a34279"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "24d466a9777fbaed22fd76609702f5ff"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "9e8be5e9a64a07a26c0894ee50ed7a81"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.5.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 10133504
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 28221440
}
],
"md5sum": "d7e32d815061fa7f1b7d2b7bc60040d5"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "a9800661d63deb0eade5568382fc59e8"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "aba1d5a2a4f2705d463b123c0ae0a47b"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.7.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "223d105d59b48d99c4aec9af6b035bd1"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "87b81ed623c47ba1f097e2937fb4c5f7"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "a517eb04bd33442ad87032c69f65d16a"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.7.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 10133504
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 28221440
}
],
"md5sum": "448175d0a5ef0c1cc348001ff2b91583"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "ada764d26b22c88cd2f3d7700c9054d9"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "37c166684a4bb901e88713357146cea5"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.9.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "db6904af780776a12a1689a6e04522b2"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "2250e259c481aff65d3bd111ea357242"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "02a76d2abb10e41fe652d94254f7b4f1"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.9.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 10133504
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 28221440
}
],
"md5sum": "d22a3c07b839aea9e8bbd2bb293e8c0c"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "75cc8b0e12855ee1e0a893cfb63f10c7"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 4543488
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 11293696
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 12137472
},
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 12145664
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 30233600
}
],
"md5sum": "83bf6a1c58c7a5833fed9d1596c30238"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "9feee3ea088590eddb23d1087196e2f5"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "dfb46741d6935083e31a53fbb3ab5dbd"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.13.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "035aadc265c894e2eecd420964efcdcc"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "60cdcff99209c52c1ecbc0787020f338"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "d771721dbbed49a3e35fcf72e51238a4"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.13.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 10133504
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 28221440
}
],
"md5sum": "2b9aa24dde37c8fc7e6f564723e2f9b2"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "5bc7cb1ce49af013c51c1239e3a9f4c9"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "2b9ee4cd1dfbeec260c904043c43f63a"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "367ffe3b027415311194de0650f979ce"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "d7297071db694cbaa0d67a9dc6f674e4"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "db1b360554f8dc2af73506a285a67da4"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 10133504
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 28221440
}
],
"md5sum": "aa43ac8a9ae4bf7e298b85a239763258"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "d56b497f4fb640bb650dc402834d1d6e"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "9772dbed16ae9bfa9137fde1a089e96c"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.17.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "5d51fa0f8f85b0b60b2ce56033a391ed"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "7cd3b362ee9fa9273f2a7838d2f1cf75"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "f77a11d1917c2be5c6ce2f3f41e8260b"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.17.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 10133504
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 28221440
}
],
"md5sum": "a78e69995e9516c3c1ac6d33c098de61"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.18.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.18.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "0e7b4ee4854039dde8072cadee3ede75"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "fc293b47f270f590db9d64d7387be2b6"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.19.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "52962b364ac517716157337e351e72c6"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "e86ce7ab92b12c6c7a5b0c804895126b"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "831b1395d2fb9be8b06918ff6449a5af"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 30482432,
"records": [
{
"name": "model.layers.19.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 10125312
},
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 10133504
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 28221440
}
],
"md5sum": "d2ce979022a5e600b9d135e7e1db82c9"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 27325440,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 0
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 4535296
},
{
"name": "model.layers.20.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 4543488
},
{
"name": "model.layers.20.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 24794112
}
],
"md5sum": "5442fe4f578202a66a5657d7ce6826e5"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 36282368,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
22016,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 36282368,
"byteOffset": 0
}
],
"md5sum": "52bd3a883285026f50ee18dbc98a4845"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.21.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "5a6fb54ee7e5e4f006fc6dfd74a7aa85"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 32494592,
"records": [
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 6750208
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 7593984
},
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 7602176
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 25690112
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
22016,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4535296,
"byteOffset": 27951104
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32486400
}
],
"md5sum": "d01558f6d7176cb33c1876d1a15b7bf7"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.22.self_attn.qkv_proj.q_weight",
"shape": [
12288,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 20250624,
"byteOffset": 0
}
],
"md5sum": "26fe36ea755a154224f104e519a6fac7"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 33005568,
"records": [
{
"name": "model.layers.21.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 0
},
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 2531328
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 9281536
},
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
4096,
1104
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18087936,
"byteOffset": 10125312
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
4096,
276
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2260992,
"byteOffset": 28213248
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_scale",
"shape": [
12288,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2531328,
"byteOffset": 30474240
}
],
"md5sum": "e134b4b74571f3871ac7944a5a32e93d"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 7593984,
"records": [
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
4096,
412
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6750208,
"byteOffset": 0
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
4096,
103
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 843776,
"byteOffset": 6750208
}
],
"md5sum": "18726bb1bca7c943f4967bcc38dde874"
}
]
}