numen-tech's picture
Add weights
df5211c
{
"metadata": {
"ParamSize": 325,
"ParamBytes": 1970540544.0,
"BitsPerParam": 4.065377299522369
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "compressed-shard",
"nbytes": 49250304,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
32064,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 49250304,
"byteOffset": 0
}
],
"md5sum": "eb6c6a847fbfd1162066cadde20450ea"
},
{
"dataPath": "params_shard_1.bin",
"format": "compressed-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "5a6c20a7a3a52aa726c286a3b6893aaf"
},
{
"dataPath": "params_shard_2.bin",
"format": "compressed-shard",
"nbytes": 33168384,
"records": [
{
"name": "lm_head.q_scale",
"shape": [
32064,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1539072,
"byteOffset": 0
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 1539072
},
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 1545216
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 14128128
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14521344
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14527488
},
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 14533632
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 27116544
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
16384,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 27509760
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 28296192
},
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 28302336
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 33020928
}
],
"md5sum": "04f8bf09d2af9918b7df89b4edd21197"
},
{
"dataPath": "params_shard_3.bin",
"format": "compressed-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "d0a8e4c0b9c12960a589676812dc848a"
},
{
"dataPath": "params_shard_4.bin",
"format": "compressed-shard",
"nbytes": 33239040,
"records": [
{
"name": "model.layers.21.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_scale",
"shape": [
9216,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 14155776
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14598144
},
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 14604288
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 27187200
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
16384,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 27580416
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 28366848
},
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 28372992
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 33091584
}
],
"md5sum": "3267f29e7482663ef0619967060a06b5"
},
{
"dataPath": "params_shard_5.bin",
"format": "compressed-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "324ee43533eb5b47c7c79b9defc623c0"
},
{
"dataPath": "params_shard_6.bin",
"format": "compressed-shard",
"nbytes": 33239040,
"records": [
{
"name": "model.layers.22.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_scale",
"shape": [
9216,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 14155776
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14598144
},
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 14604288
},
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 27187200
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
16384,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 27580416
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 28366848
},
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 28372992
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 33091584
}
],
"md5sum": "6e47b938d5f649b8b79e7fbf88e486d8"
},
{
"dataPath": "params_shard_7.bin",
"format": "compressed-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "0ef6d1599484d65121b786cc53521e8e"
},
{
"dataPath": "params_shard_8.bin",
"format": "compressed-shard",
"nbytes": 33239040,
"records": [
{
"name": "model.layers.23.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_scale",
"shape": [
9216,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 14155776
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14598144
},
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 14604288
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 27187200
},
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
16384,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 27580416
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 28366848
},
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 28372992
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 33091584
}
],
"md5sum": "289061b98404c9abec8f91c9251d3072"
},
{
"dataPath": "params_shard_9.bin",
"format": "compressed-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "278b3f3a19b2d42e9b9f41ffd29d82e5"
},
{
"dataPath": "params_shard_10.bin",
"format": "compressed-shard",
"nbytes": 33239040,
"records": [
{
"name": "model.layers.24.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.24.self_attn.qkv_proj.q_scale",
"shape": [
9216,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 14155776
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14598144
},
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 14604288
},
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 27187200
},
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
16384,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 27580416
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 28366848
},
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 28372992
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 33091584
}
],
"md5sum": "7c5a2401e6b50b14c34530a3770ceb7a"
},
{
"dataPath": "params_shard_11.bin",
"format": "compressed-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "26fa9208e19337b3f57fb900f021fe19"
},
{
"dataPath": "params_shard_12.bin",
"format": "compressed-shard",
"nbytes": 33239040,
"records": [
{
"name": "model.layers.25.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_scale",
"shape": [
9216,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 14155776
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14598144
},
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 14604288
},
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 27187200
},
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
16384,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 27580416
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 28366848
},
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 28372992
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 33091584
}
],
"md5sum": "e3b3a486e7e322ac7da2486342ce3a48"
},
{
"dataPath": "params_shard_13.bin",
"format": "compressed-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "e6474757b726f342cf4ecd4cc3012ff6"
},
{
"dataPath": "params_shard_14.bin",
"format": "compressed-shard",
"nbytes": 33239040,
"records": [
{
"name": "model.layers.26.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.26.self_attn.qkv_proj.q_scale",
"shape": [
9216,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 14155776
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14598144
},
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 14604288
},
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 27187200
},
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
16384,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 27580416
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 28366848
},
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 28372992
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 33091584
}
],
"md5sum": "f3738d8a47e8264cf84db7605cecae6b"
},
{
"dataPath": "params_shard_15.bin",
"format": "compressed-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "3c5d38dff4cafe22256be044e955b63a"
},
{
"dataPath": "params_shard_16.bin",
"format": "compressed-shard",
"nbytes": 33239040,
"records": [
{
"name": "model.layers.27.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.27.self_attn.qkv_proj.q_scale",
"shape": [
9216,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 14155776
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14598144
},
{
"name": "model.layers.28.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 14604288
},
{
"name": "model.layers.28.mlp.down_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 27187200
},
{
"name": "model.layers.28.mlp.gate_up_proj.q_scale",
"shape": [
16384,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 27580416
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 28366848
},
{
"name": "model.layers.28.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 28372992
},
{
"name": "model.layers.28.self_attn.o_proj.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 33091584
}
],
"md5sum": "37af502bc0ffcd65dbd2c7a0f406fbd9"
},
{
"dataPath": "params_shard_17.bin",
"format": "compressed-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "e5dfd707c17d916c784066518e69ad02"
},
{
"dataPath": "params_shard_18.bin",
"format": "compressed-shard",
"nbytes": 33239040,
"records": [
{
"name": "model.layers.28.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.28.self_attn.qkv_proj.q_scale",
"shape": [
9216,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 14155776
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14598144
},
{
"name": "model.layers.29.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 14604288
},
{
"name": "model.layers.29.mlp.down_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 27187200
},
{
"name": "model.layers.29.mlp.gate_up_proj.q_scale",
"shape": [
16384,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 27580416
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 28366848
},
{
"name": "model.layers.29.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 28372992
},
{
"name": "model.layers.29.self_attn.o_proj.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 33091584
}
],
"md5sum": "d17aec96f008a12993552c8d7d71969b"
},
{
"dataPath": "params_shard_19.bin",
"format": "compressed-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "ba2e3ea6a8059582b9f0934342978a58"
},
{
"dataPath": "params_shard_20.bin",
"format": "compressed-shard",
"nbytes": 33239040,
"records": [
{
"name": "model.layers.29.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_scale",
"shape": [
9216,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 14155776
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14598144
},
{
"name": "model.layers.30.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 14604288
},
{
"name": "model.layers.30.mlp.down_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 27187200
},
{
"name": "model.layers.30.mlp.gate_up_proj.q_scale",
"shape": [
16384,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 27580416
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 28366848
},
{
"name": "model.layers.30.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 28372992
},
{
"name": "model.layers.30.self_attn.o_proj.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 33091584
}
],
"md5sum": "699613dda6f0fe135194b4578318776d"
},
{
"dataPath": "params_shard_21.bin",
"format": "compressed-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "658fc2482db51babf8b021661117444a"
},
{
"dataPath": "params_shard_22.bin",
"format": "compressed-shard",
"nbytes": 33239040,
"records": [
{
"name": "model.layers.30.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.30.self_attn.qkv_proj.q_scale",
"shape": [
9216,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 14155776
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14598144
},
{
"name": "model.layers.31.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 14604288
},
{
"name": "model.layers.31.mlp.down_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 27187200
},
{
"name": "model.layers.31.mlp.gate_up_proj.q_scale",
"shape": [
16384,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 27580416
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 28366848
},
{
"name": "model.layers.31.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 28372992
},
{
"name": "model.layers.31.self_attn.o_proj.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 33091584
}
],
"md5sum": "b387fb00bae80f7dbb8a7da706b35f0c"
},
{
"dataPath": "params_shard_23.bin",
"format": "compressed-shard",
"nbytes": 49250304,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
32064,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 49250304,
"byteOffset": 0
}
],
"md5sum": "f02ac04b7253cb42f68bf29676b2ad61"
},
{
"dataPath": "params_shard_24.bin",
"format": "compressed-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "ecd23c123f7b1c0a2ea15716c9fe645a"
},
{
"dataPath": "params_shard_25.bin",
"format": "compressed-shard",
"nbytes": 29918208,
"records": [
{
"name": "model.layers.31.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.31.self_attn.qkv_proj.q_scale",
"shape": [
9216,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 14155776
},
{
"name": "model.norm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14598144
},
{
"name": "model.embed_tokens.q_scale",
"shape": [
32064,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1539072,
"byteOffset": 14604288
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 16143360
},
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 16149504
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 28732416
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
16384,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 29125632
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 29912064
}
],
"md5sum": "5cfa6cf8db84c96c8451a877ed6b132f"
},
{
"dataPath": "params_shard_26.bin",
"format": "compressed-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "1327ccf0b27d40726a2a4b05ea217d3d"
},
{
"dataPath": "params_shard_27.bin",
"format": "compressed-shard",
"nbytes": 33239040,
"records": [
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 4718592
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 4866048
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_scale",
"shape": [
9216,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 19021824
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19464192
},
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 19470336
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 32053248
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
16384,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 32446464
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 33232896
}
],
"md5sum": "80a2041d77a3a10fbe1ae6a417c41cec"
},
{
"dataPath": "params_shard_28.bin",
"format": "compressed-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "c4543861c7594597845249e6024ce611"
},
{
"dataPath": "params_shard_29.bin",
"format": "compressed-shard",
"nbytes": 33239040,
"records": [
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 4718592
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 4866048
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_scale",
"shape": [
9216,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 19021824
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19464192
},
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 19470336
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 32053248
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
16384,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 32446464
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 33232896
}
],
"md5sum": "b3fd82de236c17f7c1461f2316bb50cf"
},
{
"dataPath": "params_shard_30.bin",
"format": "compressed-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "b7b4f7a768891add157e66dd2d80f10d"
},
{
"dataPath": "params_shard_31.bin",
"format": "compressed-shard",
"nbytes": 33239040,
"records": [
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 4718592
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 4866048
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_scale",
"shape": [
9216,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 19021824
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19464192
},
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 19470336
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 32053248
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
16384,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 32446464
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 33232896
}
],
"md5sum": "36a9942dd3d5cb4d0da338e95415a337"
},
{
"dataPath": "params_shard_32.bin",
"format": "compressed-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "546bacf3a84749c40fbd6fdd196ee82d"
},
{
"dataPath": "params_shard_33.bin",
"format": "compressed-shard",
"nbytes": 33239040,
"records": [
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 4718592
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 4866048
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_scale",
"shape": [
9216,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 19021824
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19464192
},
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 19470336
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 32053248
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
16384,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 32446464
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 33232896
}
],
"md5sum": "97ee9ffe41d5dc61fe03a22550c25e14"
},
{
"dataPath": "params_shard_34.bin",
"format": "compressed-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "76443f5ab2569d636a70c99b8d230557"
},
{
"dataPath": "params_shard_35.bin",
"format": "compressed-shard",
"nbytes": 33239040,
"records": [
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 4718592
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 4866048
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_scale",
"shape": [
9216,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 19021824
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19464192
},
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 19470336
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 32053248
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
16384,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 32446464
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 33232896
}
],
"md5sum": "5570358fa56ffee02f8a38fce5605bef"
},
{
"dataPath": "params_shard_36.bin",
"format": "compressed-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "3498da9e7aa57567fef5d6c5511a99fa"
},
{
"dataPath": "params_shard_37.bin",
"format": "compressed-shard",
"nbytes": 33239040,
"records": [
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 4718592
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 4866048
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_scale",
"shape": [
9216,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 19021824
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19464192
},
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 19470336
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 32053248
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
16384,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 32446464
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 33232896
}
],
"md5sum": "73ee37dbf2be510ee2671d2f078d4001"
},
{
"dataPath": "params_shard_38.bin",
"format": "compressed-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "60980736534d9a273d12f6f54e5d9852"
},
{
"dataPath": "params_shard_39.bin",
"format": "compressed-shard",
"nbytes": 33239040,
"records": [
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 4718592
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 4866048
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_scale",
"shape": [
9216,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 19021824
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19464192
},
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 19470336
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 32053248
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
16384,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 32446464
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 33232896
}
],
"md5sum": "6824f6a6f2dc85083a173c69d94d3a45"
},
{
"dataPath": "params_shard_40.bin",
"format": "compressed-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "50d4bbb0e029eeeb4631629f6f82ef77"
},
{
"dataPath": "params_shard_41.bin",
"format": "compressed-shard",
"nbytes": 33239040,
"records": [
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 4718592
},
{
"name": "model.layers.15.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 4866048
},
{
"name": "model.layers.15.self_attn.qkv_proj.q_scale",
"shape": [
9216,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 19021824
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19464192
},
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 19470336
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 32053248
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
16384,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 32446464
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 33232896
}
],
"md5sum": "9af2a2c272f4847d7042034ca87fc2f3"
},
{
"dataPath": "params_shard_42.bin",
"format": "compressed-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "cfaa75ebbe74e4d44ed5860dca67ab85"
},
{
"dataPath": "params_shard_43.bin",
"format": "compressed-shard",
"nbytes": 33239040,
"records": [
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 4718592
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 4866048
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_scale",
"shape": [
9216,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 19021824
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19464192
},
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 19470336
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 32053248
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
16384,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 32446464
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 33232896
}
],
"md5sum": "55664ac3a81a159147293599e2143f0c"
},
{
"dataPath": "params_shard_44.bin",
"format": "compressed-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "795fe155723c6e6b303a38276139d5a4"
},
{
"dataPath": "params_shard_45.bin",
"format": "compressed-shard",
"nbytes": 33239040,
"records": [
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 4718592
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 4866048
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_scale",
"shape": [
9216,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 19021824
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19464192
},
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 19470336
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 32053248
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
16384,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 32446464
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 33232896
}
],
"md5sum": "697979e9f22919e556f1c1767b777090"
},
{
"dataPath": "params_shard_46.bin",
"format": "compressed-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "92c41702d1a27cfd5413b709130b0f65"
},
{
"dataPath": "params_shard_47.bin",
"format": "compressed-shard",
"nbytes": 33239040,
"records": [
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 4718592
},
{
"name": "model.layers.18.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 4866048
},
{
"name": "model.layers.18.self_attn.qkv_proj.q_scale",
"shape": [
9216,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 19021824
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19464192
},
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 19470336
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 32053248
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
16384,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 32446464
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 33232896
}
],
"md5sum": "ffce584e4e06ff7995aadd464ec9517c"
},
{
"dataPath": "params_shard_48.bin",
"format": "compressed-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "ebd717793f5ff9f9fdf7fbdc20c8bde1"
},
{
"dataPath": "params_shard_49.bin",
"format": "compressed-shard",
"nbytes": 33239040,
"records": [
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 4718592
},
{
"name": "model.layers.19.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 4866048
},
{
"name": "model.layers.19.self_attn.qkv_proj.q_scale",
"shape": [
9216,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 19021824
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19464192
},
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 19470336
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 32053248
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
16384,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 32446464
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 33232896
}
],
"md5sum": "46342309ceda3dcbada8249b01a960cb"
},
{
"dataPath": "params_shard_50.bin",
"format": "compressed-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "a1200557e9f66f5933f6160c4f04d14f"
},
{
"dataPath": "params_shard_51.bin",
"format": "compressed-shard",
"nbytes": 25116672,
"records": [
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 4718592
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 4866048
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_scale",
"shape": [
9216,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 19021824
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
16384,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 19464192
},
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 20250624
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 24969216
}
],
"md5sum": "c35d7c187ba746eb6d417b228dd58791"
},
{
"dataPath": "params_shard_52.bin",
"format": "compressed-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "51bf4fe0a7a5364129dbc090c1aefc6c"
},
{
"dataPath": "params_shard_53.bin",
"format": "compressed-shard",
"nbytes": 33239040,
"records": [
{
"name": "model.layers.20.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.20.self_attn.qkv_proj.q_scale",
"shape": [
9216,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 14155776
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14598144
},
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 14604288
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 27187200
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
16384,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 27580416
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 28366848
},
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 28372992
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 33091584
}
],
"md5sum": "37f714b4329b775a252ddd90c915c050"
},
{
"dataPath": "params_shard_54.bin",
"format": "compressed-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "6970fa2d6083948b5260c93efde0f0ab"
},
{
"dataPath": "params_shard_55.bin",
"format": "compressed-shard",
"nbytes": 33239040,
"records": [
{
"name": "model.layers.3.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_scale",
"shape": [
9216,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 14155776
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14598144
},
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 14604288
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 27187200
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
16384,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 27580416
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 28366848
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 28372992
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 33091584
}
],
"md5sum": "1e5561efb495ca6b5a3c008a9fe76448"
},
{
"dataPath": "params_shard_56.bin",
"format": "compressed-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "12686e6735769e9d38a6475da606b34f"
},
{
"dataPath": "params_shard_57.bin",
"format": "compressed-shard",
"nbytes": 33239040,
"records": [
{
"name": "model.layers.4.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_scale",
"shape": [
9216,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 14155776
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14598144
},
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 14604288
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 27187200
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
16384,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 27580416
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 28366848
},
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 28372992
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 33091584
}
],
"md5sum": "b3dcbc2bc1fce6afd07a096c19054ab4"
},
{
"dataPath": "params_shard_58.bin",
"format": "compressed-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "b8445fd0dd0a24aa10e2668c91a9209d"
},
{
"dataPath": "params_shard_59.bin",
"format": "compressed-shard",
"nbytes": 33239040,
"records": [
{
"name": "model.layers.5.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_scale",
"shape": [
9216,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 14155776
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14598144
},
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 14604288
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 27187200
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
16384,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 27580416
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 28366848
},
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 28372992
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 33091584
}
],
"md5sum": "fdf4c04b457cbe027b645d07a1cb61ac"
},
{
"dataPath": "params_shard_60.bin",
"format": "compressed-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "5266e32e8571ce1b29a9490e44000399"
},
{
"dataPath": "params_shard_61.bin",
"format": "compressed-shard",
"nbytes": 33239040,
"records": [
{
"name": "model.layers.6.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_scale",
"shape": [
9216,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 14155776
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14598144
},
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 14604288
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 27187200
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
16384,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 27580416
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 28366848
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 28372992
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 33091584
}
],
"md5sum": "a7a2fdc1efa6bdc3e8309c9f44ee9540"
},
{
"dataPath": "params_shard_62.bin",
"format": "compressed-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "5812dc0fe676983b77c6b6bb61fa80f2"
},
{
"dataPath": "params_shard_63.bin",
"format": "compressed-shard",
"nbytes": 33239040,
"records": [
{
"name": "model.layers.7.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_scale",
"shape": [
9216,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 14155776
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14598144
},
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 14604288
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 27187200
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
16384,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 27580416
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 28366848
},
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 28372992
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 33091584
}
],
"md5sum": "d4c2509aba9f77c24c8cb923397a515a"
},
{
"dataPath": "params_shard_64.bin",
"format": "compressed-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
16384,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "567ca9d87204922b24bc0398ba218365"
},
{
"dataPath": "params_shard_65.bin",
"format": "compressed-shard",
"nbytes": 33239040,
"records": [
{
"name": "model.layers.8.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_scale",
"shape": [
9216,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 14155776
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14598144
},
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
3072,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 14604288
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
3072,
64
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 27187200
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
16384,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 27580416
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 28366848
},
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
3072,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 28372992
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
3072,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 33091584
}
],
"md5sum": "e0fd8ae41c165f3427fbe2af1ede03cd"
},
{
"dataPath": "params_shard_66.bin",
"format": "compressed-shard",
"nbytes": 14598144,
"records": [
{
"name": "model.layers.9.self_attn.qkv_proj.q_weight",
"shape": [
9216,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_scale",
"shape": [
9216,
24
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 14155776
}
],
"md5sum": "107303b26fb0af34df9916e4a2301fe1"
}
]
}