numen-tech's picture
Add weights
31d5434
raw
history blame contribute delete
No virus
139 kB
{
"metadata": {
"ParamSize": 324,
"ParamBytes": 2116752384.0,
"BitsPerParam": 4.367023615335193
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 49250304,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
32064,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 49250304,
"byteOffset": 0
}
],
"md5sum": "eb6c6a847fbfd1162066cadde20450ea"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "9dcd83ba6cc2da4602b028d5ff8681bf"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 33168384,
"records": [
{
"name": "lm_head.q_scale",
"shape": [
32064,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1539072,
"byteOffset": 0
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 1539072
},
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 1545216
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 14128128
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14521344
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14527488
},
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 14533632
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 27116544
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
16384,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 27509760
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 28296192
},
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 28302336
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 33020928
}
],
"md5sum": "bfae5f7a344c1aad102bce5ef63e9500"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "9c3ab06d477d88573262b9a70b7b6b7b"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "model.layers.21.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_scale",
"shape": [
9216,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 14155776
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14598144
},
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 14604288
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 27187200
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
16384,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 27580416
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 28366848
},
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 28372992
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 33091584
}
],
"md5sum": "03f4024b17239efa016dacc77313c2c9"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "d1c7d8bbbb6803fe0c39741849c3851f"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "model.layers.22.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_scale",
"shape": [
9216,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 14155776
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14598144
},
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 14604288
},
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 27187200
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
16384,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 27580416
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 28366848
},
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 28372992
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 33091584
}
],
"md5sum": "4ff2be1bbcf73d44290b87c9f16323a4"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "250d94a745dfab54f6f605dcd4a8d405"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "model.layers.23.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_scale",
"shape": [
9216,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 14155776
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14598144
},
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 14604288
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 27187200
},
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
16384,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 27580416
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 28366848
},
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 28372992
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 33091584
}
],
"md5sum": "b9e6e8a8b853dd03ad2ded505d50d3e2"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "d32218f3d992b6ef85b5de894ce59cb4"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "model.layers.24.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.24.self_attn.qkv_proj.q_scale",
"shape": [
9216,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 14155776
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14598144
},
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 14604288
},
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 27187200
},
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
16384,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 27580416
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 28366848
},
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 28372992
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 33091584
}
],
"md5sum": "3c123792dc6d3ec96db18ca34736ef6d"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "89c8e6c223e3e6be0ac21dd472733cd9"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "model.layers.25.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_scale",
"shape": [
9216,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 14155776
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14598144
},
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 14604288
},
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 27187200
},
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
16384,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 27580416
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 28366848
},
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 28372992
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 33091584
}
],
"md5sum": "cf203197f93f3e7090201d7d8f5df2c7"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "cfec215a36937a5b10c6976126f15eaf"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "model.layers.26.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.26.self_attn.qkv_proj.q_scale",
"shape": [
9216,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 14155776
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14598144
},
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 14604288
},
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 27187200
},
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
16384,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 27580416
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 28366848
},
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 28372992
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 33091584
}
],
"md5sum": "3ed09bc6884c4016d875b84aff73537c"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "e57133568be6810dc7b75ec698ffd40b"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "model.layers.27.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.27.self_attn.qkv_proj.q_scale",
"shape": [
9216,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 14155776
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14598144
},
{
"name": "model.layers.28.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 14604288
},
{
"name": "model.layers.28.mlp.down_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 27187200
},
{
"name": "model.layers.28.mlp.gate_up_proj.q_scale",
"shape": [
16384,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 27580416
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 28366848
},
{
"name": "model.layers.28.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 28372992
},
{
"name": "model.layers.28.self_attn.o_proj.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 33091584
}
],
"md5sum": "2838fa91f3dcd897e9a57416ace7271b"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "f1984ead10e178fdb2ce897e9c70fb69"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "model.layers.28.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.28.self_attn.qkv_proj.q_scale",
"shape": [
9216,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 14155776
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14598144
},
{
"name": "model.layers.29.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 14604288
},
{
"name": "model.layers.29.mlp.down_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 27187200
},
{
"name": "model.layers.29.mlp.gate_up_proj.q_scale",
"shape": [
16384,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 27580416
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 28366848
},
{
"name": "model.layers.29.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 28372992
},
{
"name": "model.layers.29.self_attn.o_proj.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 33091584
}
],
"md5sum": "8c8c01c1b5e2dae5220f81f3eab8d84b"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "4a11605e4d0347b59a3f6e57bd19aadc"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "model.layers.29.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_scale",
"shape": [
9216,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 14155776
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14598144
},
{
"name": "model.layers.30.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 14604288
},
{
"name": "model.layers.30.mlp.down_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 27187200
},
{
"name": "model.layers.30.mlp.gate_up_proj.q_scale",
"shape": [
16384,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 27580416
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 28366848
},
{
"name": "model.layers.30.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 28372992
},
{
"name": "model.layers.30.self_attn.o_proj.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 33091584
}
],
"md5sum": "e48e15cbc99643fcacacafbea114cdb7"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "9b12f9f3e3ecda1d0410bff99d320f5a"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "model.layers.30.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.30.self_attn.qkv_proj.q_scale",
"shape": [
9216,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 14155776
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14598144
},
{
"name": "model.layers.31.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 14604288
},
{
"name": "model.layers.31.mlp.down_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 27187200
},
{
"name": "model.layers.31.mlp.gate_up_proj.q_scale",
"shape": [
16384,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 27580416
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 28366848
},
{
"name": "model.layers.31.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 28372992
},
{
"name": "model.layers.31.self_attn.o_proj.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 33091584
}
],
"md5sum": "e53a7cb60d5ada8921afdcc3dc99912c"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 197001216,
"records": [
{
"name": "model.embed_tokens.weight",
"shape": [
32064,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 197001216,
"byteOffset": 0
}
],
"md5sum": "2794efed4f90da2679835636c49453c1"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "c3483af8dc05e420e7ac88497c12e687"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 33245184,
"records": [
{
"name": "model.layers.31.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.31.self_attn.qkv_proj.q_scale",
"shape": [
9216,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 14155776
},
{
"name": "model.norm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14598144
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14604288
},
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 14610432
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 27193344
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
16384,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 27586560
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 28372992
},
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 28379136
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 33097728
}
],
"md5sum": "298bd88a3dc9bd82aaad66e7d429b6d4"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "82cea4138f70c3e288551cccce0abfe1"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "model.layers.0.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_scale",
"shape": [
9216,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 14155776
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14598144
},
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 14604288
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 27187200
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
16384,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 27580416
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 28366848
},
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 28372992
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 33091584
}
],
"md5sum": "99b9996b9ea9765cc77e83a575c2e687"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "4eb62b974f18d05ac126361b76605855"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "model.layers.1.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_scale",
"shape": [
9216,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 14155776
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14598144
},
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 14604288
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 27187200
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
16384,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 27580416
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 28366848
},
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 28372992
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 33091584
}
],
"md5sum": "e3b164fef67391d847b5801195eeca83"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "90368299f3ef0852bb7b36aa1d0785b9"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "model.layers.10.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_scale",
"shape": [
9216,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 14155776
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14598144
},
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 14604288
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 27187200
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
16384,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 27580416
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 28366848
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 28372992
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 33091584
}
],
"md5sum": "f39c3ccbc411cb24e5a04c5b3b1fce3c"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "609eb5b22ec8e52c4a9077acc50623a4"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_scale",
"shape": [
9216,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 14155776
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14598144
},
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 14604288
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 27187200
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
16384,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 27580416
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 28366848
},
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 28372992
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 33091584
}
],
"md5sum": "102665d0dea49c633f278e0f2850da1b"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "60810dda62252b577eec03e7670dfc8f"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "model.layers.12.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_scale",
"shape": [
9216,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 14155776
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14598144
},
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 14604288
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 27187200
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
16384,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 27580416
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 28366848
},
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 28372992
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 33091584
}
],
"md5sum": "ca75bf18d1fb47bbabd3cf24bcbcc6f7"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "40eeb8dbb5e286b0918095b08f8f7d5b"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "model.layers.13.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_scale",
"shape": [
9216,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 14155776
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14598144
},
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 14604288
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 27187200
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
16384,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 27580416
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 28366848
},
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 28372992
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 33091584
}
],
"md5sum": "8eb599eb34cea16daccd54398b92b895"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "edd7b4d21047012c873b03a5bef3ac78"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "model.layers.14.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_scale",
"shape": [
9216,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 14155776
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14598144
},
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 14604288
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 27187200
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
16384,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 27580416
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 28366848
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 28372992
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 33091584
}
],
"md5sum": "04f513642ed9ec608849ac611038e49e"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "0366418f757dc1114218ef2d44502219"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.15.self_attn.qkv_proj.q_scale",
"shape": [
9216,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 14155776
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14598144
},
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 14604288
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 27187200
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
16384,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 27580416
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 28366848
},
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 28372992
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 33091584
}
],
"md5sum": "38a7f923f036bb2e87fce2a7dd078c09"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "5b2364266696874048e33ee642c111dd"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "model.layers.16.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_scale",
"shape": [
9216,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 14155776
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14598144
},
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 14604288
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 27187200
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
16384,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 27580416
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 28366848
},
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 28372992
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 33091584
}
],
"md5sum": "bb1bb07eada8bec0595c5efe50ac8eed"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "0aada696957e209ef6f14abb162a42c8"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "model.layers.17.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_scale",
"shape": [
9216,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 14155776
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14598144
},
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 14604288
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 27187200
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
16384,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 27580416
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 28366848
},
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 28372992
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 33091584
}
],
"md5sum": "b0b82783a9ce16fb6a593a987c2fda36"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "695940a50f249c0bdb56a712f47700d7"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "model.layers.18.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.18.self_attn.qkv_proj.q_scale",
"shape": [
9216,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 14155776
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14598144
},
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 14604288
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 27187200
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
16384,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 27580416
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 28366848
},
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 28372992
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 33091584
}
],
"md5sum": "fd485e50285b69feae8468f8db432750"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "d867f7ac55b9e061b3e9393d9381374f"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "model.layers.19.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.19.self_attn.qkv_proj.q_scale",
"shape": [
9216,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 14155776
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14598144
},
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 14604288
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 27187200
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
16384,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 27580416
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 28366848
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 28372992
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 33091584
}
],
"md5sum": "0c11c5b356db2871cd28f0e76bf7ac01"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "9e6d37f163d8c5ca62a9d14f444829e8"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 20250624,
"records": [
{
"name": "model.layers.2.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_scale",
"shape": [
9216,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 14155776
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
16384,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 14598144
},
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 15384576
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 20103168
}
],
"md5sum": "7d21e060f06ba21a79903f0ac9c332b5"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "f019120983eb8729e970a3493e9a52b5"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "model.layers.20.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.20.self_attn.qkv_proj.q_scale",
"shape": [
9216,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 14155776
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14598144
},
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 14604288
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 27187200
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
16384,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 27580416
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 28366848
},
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 28372992
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 33091584
}
],
"md5sum": "e2da567ea9c4518e2821208a3e0570cd"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "e4b7ea01c4d96dd2356b825a0541ad4b"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "model.layers.3.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_scale",
"shape": [
9216,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 14155776
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14598144
},
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 14604288
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 27187200
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
16384,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 27580416
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 28366848
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 28372992
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 33091584
}
],
"md5sum": "0892a5a295f5c24dedd8d3bfc6096e10"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "628dac82bf6ed95b24d369512d2d93e8"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "model.layers.4.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_scale",
"shape": [
9216,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 14155776
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14598144
},
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 14604288
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 27187200
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
16384,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 27580416
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 28366848
},
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 28372992
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 33091584
}
],
"md5sum": "1727dfeaa1fc121fd0d8f5d8a20545ab"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "901504385f4fbd7de458667fa7d350c1"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "model.layers.5.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_scale",
"shape": [
9216,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 14155776
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14598144
},
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 14604288
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 27187200
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
16384,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 27580416
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 28366848
},
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 28372992
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 33091584
}
],
"md5sum": "7a71c3ccebf573f770d8797f0192ab4e"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "f4939c0908ebefe2b01d5ec423e0ca13"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "model.layers.6.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_scale",
"shape": [
9216,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 14155776
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14598144
},
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 14604288
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 27187200
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
16384,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 27580416
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 28366848
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 28372992
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 33091584
}
],
"md5sum": "c691f65e4fc08125f52b0433729fee77"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "556e6d7e9c28af21609c9b8110db9162"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "model.layers.7.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_scale",
"shape": [
9216,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 14155776
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14598144
},
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 14604288
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 27187200
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
16384,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 27580416
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 28366848
},
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 28372992
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 33091584
}
],
"md5sum": "349678264e8b868fbdb9a895629992fc"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "fe3cd78c182582c45726c551e461c434"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 33239040,
"records": [
{
"name": "model.layers.8.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_scale",
"shape": [
9216,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 14155776
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14598144
},
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 14604288
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 27187200
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
16384,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 27580416
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 28366848
},
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 28372992
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 33091584
}
],
"md5sum": "1b0467b756f37d1ccf7fe489da168ad5"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 14598144,
"records": [
{
"name": "model.layers.9.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_scale",
"shape": [
9216,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 14155776
}
],
"md5sum": "bae445579028850e0937547896a6e378"
}
]
}