CodeLlama-34b-Instruct-hf-q4f32_1-MLC / ndarray-cache-b16.json
CharlieFRuan's picture
Initial commit
0f9f82a verified
{
"metadata": {
"ParamSize": 485,
"ParamBytes": 21092663296.0,
"BitsPerParam": 5.000635812792825
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 131072000,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
32000,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 131072000,
"byteOffset": 0
}
],
"md5sum": "51c289afd9bb1224b7bcd4e0a2e62729"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.41.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "fac0b594cf401f15e4136b8202972c6f"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.42.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "01c0e66dd0e0da43434dcf6812f9b0e7"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 27705344,
"records": [
{
"name": "lm_head.q_scale",
"shape": [
32000,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384000,
"byteOffset": 0
},
{
"name": "model.layers.41.input_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 16384000
},
{
"name": "model.layers.41.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 11272192,
"byteOffset": 16400384
},
{
"name": "model.layers.41.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 27672576
},
{
"name": "model.layers.42.input_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 27688960
}
],
"md5sum": "5ddffcf244c3147b65daa87b503af5f5"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.42.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "d4c2e3b257caa36627d1ae752cd90db4"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.42.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "222c18f10d289eb8564687c03b1eead6"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.42.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "b2509047cc2145aef50c4e9effbb9284"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.42.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "b559daf0394320091d2f849c28d57f4f"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.43.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "8cc21e276d0dc8a910f2163da0718ff0"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.43.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "9150aac827b0799502534a00aa200ef0"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.43.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "90937e2fb8bb1612579af5e6e9afff6d"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.43.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "ce1d631c44cb5e5b90a08366597896ca"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 32030720,
"records": [
{
"name": "model.layers.42.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.42.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 11272192
},
{
"name": "model.layers.42.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 11288576
},
{
"name": "model.layers.42.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4194304,
"byteOffset": 16531456
},
{
"name": "model.layers.43.input_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.43.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 11272192,
"byteOffset": 20742144
},
{
"name": "model.layers.43.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 32014336
}
],
"md5sum": "9c014a17bc2c69a74954cb2b19777f21"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.43.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "7c593465fee733873a8972554a0f4a23"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.44.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "1a88ccc4baadf1a4fa6319e617914ef9"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.44.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "670509b9c97496ed016b0d0a5d252449"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.44.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "39fabe2c60cee5a4d0aca64cc82685ab"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.44.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "93f54305b1211bf7cebaf89f4fe1eeeb"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.44.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "170148ad68bca3d11926ef5ed759518c"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.45.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "4058d608c801ccef2b2b21aac799bcf3"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 30195712,
"records": [
{
"name": "model.layers.43.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.43.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.44.input_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 9437184
},
{
"name": "model.layers.44.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 11272192,
"byteOffset": 9453568
},
{
"name": "model.layers.44.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.44.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 20742144
},
{
"name": "model.layers.44.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4194304,
"byteOffset": 25985024
},
{
"name": "model.layers.45.input_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 30179328
}
],
"md5sum": "acb9023c360beaccb07fa7c900a582ed"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.45.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "0ca7b4fd9d7ca72e024537c8a635842d"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.45.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "27989a1dee7cc179b29cfae71acef416"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.45.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "c55e78f0b50d16827534c4a0d02fe5a2"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.45.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "e9fb0e5e3a81a5001a673bb00573ae41"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.46.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "79978c3005c205040cad0a035794740c"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.46.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "27d6b988aa0ae5a10e120f5aecaea60c"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.46.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "fb9bf20c6563a640d2bd43e1e71b61bd"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.46.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "6abe2d2c675178e57adc58c2cee85beb"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 32030720,
"records": [
{
"name": "model.layers.45.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.45.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 11272192
},
{
"name": "model.layers.45.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 11288576
},
{
"name": "model.layers.45.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4194304,
"byteOffset": 16531456
},
{
"name": "model.layers.46.input_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.46.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 11272192,
"byteOffset": 20742144
},
{
"name": "model.layers.46.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 32014336
}
],
"md5sum": "d7869b0af78202ef138f8a8ca4731824"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.46.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "86cb33e044276bd29a6cb8a14eda2cd8"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.47.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "8853959bbc6c5e40840e638be5a0a229"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.47.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "742fada0d490595dbd55f3474b38017e"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.47.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "39adad76a4b70299c7de8b5d587a58fe"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.47.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "e5308eca49ef12cc04959e907c2c90f9"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.47.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "765a1a37129049f99a74e24fab726072"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 131072000,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
32000,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 131072000,
"byteOffset": 0
}
],
"md5sum": "6ffec9aa4b64ea12346ce7ff8cefc502"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 30195712,
"records": [
{
"name": "model.layers.46.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.46.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.47.input_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 9437184
},
{
"name": "model.layers.47.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 11272192,
"byteOffset": 9453568
},
{
"name": "model.layers.47.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.47.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 20742144
},
{
"name": "model.layers.47.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4194304,
"byteOffset": 25985024
},
{
"name": "model.norm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 30179328
}
],
"md5sum": "bbce586412801428b53a715f0347d9b4"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "20447ad46bdb295611e41a21e4b6dab5"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "d9d5d80f9030bc72421a1fdea0094cb9"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "4607cd8068931a1cf786371702a77b46"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.0.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "8b8b9a4acf5b98a9640bb0b1a9370cc8"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "8db07262d8523745176138d0a9daca91"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 32931840,
"records": [
{
"name": "model.embed_tokens.q_scale",
"shape": [
32000,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384000,
"byteOffset": 0
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 16384000
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 11272192,
"byteOffset": 16400384
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 27672576
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 27688960
}
],
"md5sum": "da0f7f86165ea3fe362939c74b284b23"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "a942d1f9f092672fffb82fc4d2d7154f"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "6171366bb13395fa365cae3aa23d0af1"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "45bf48e549d2307f0d9910485127b2c6"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.1.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "8a60d426d986962eec02915ee9b29638"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "9520f82d2ea0c0874e05e0613f679277"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "cbb34cdd3444dd24a4164d122e0cdcbe"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 24952832,
"records": [
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4194304,
"byteOffset": 0
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 4194304
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 11272192,
"byteOffset": 4210688
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 15482880
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 15499264
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4194304,
"byteOffset": 20742144
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 24936448
}
],
"md5sum": "480be28e2b96b974833b376ca9c2b3ed"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "e80c44d63023fabeb68fa6b5cccbc65f"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "c3509e1f874adb8cb29c21c22f2b7d20"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.2.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "90661ff0aa11c8fc0fbd4e5b32221cc3"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "b2fd0dfd56de5d877c23a85841363c87"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "6cff45883241c0aaba1b907d2d4dd7de"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "f06d1671307b4792b594b9564405c7ff"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "9fd76ca6a4619b30119dbc671111e44b"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.3.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "ad279169b0407bee409a98a99ce606bd"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 32030720,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 11272192
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 11288576
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4194304,
"byteOffset": 16531456
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 11272192,
"byteOffset": 20742144
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 32014336
}
],
"md5sum": "17ea57e605e6b34e550f8bec34a9e2ad"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "9a64dad17d7c68fba727bcd38831f1d9"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "78f4df090b1f6a8b95d12adb65b0f289"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "e13b970ea2eba03147e3d4421aedf0de"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "ac1f29649aa3bf044383051eb8b518e8"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.4.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "301df633322aa27069273fb9e32b658d"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "403d51c9b60939f9c32ed2132de3c121"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "46c0010a19d7600954946fdd166b3790"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 30195712,
"records": [
{
"name": "model.layers.3.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 9437184
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 11272192,
"byteOffset": 9453568
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 20742144
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4194304,
"byteOffset": 25985024
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 30179328
}
],
"md5sum": "e87d341d25e2d16685c28c096367c853"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "99073cae51496f5f5667caf29be60fb8"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "d13d342543642fbeacb4f6113d6371ac"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.5.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "b8a8563f709ea96f5ef70c61f7907754"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "a9c7846ea31ef612ebb0b539753a1b5f"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "586030e623ba8da8b0acc9dfc904a98c"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "fe2d1ebbba5538a94913495b8b9ea770"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.6.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "a4520c0d263d1113bbfd66f6c26aab85"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "f944ca5932bcf0948c554623c0762c8f"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "05c08640a2887aad56a9875672ceb716"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 30179328,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 11272192
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 11288576
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4194304,
"byteOffset": 16531456
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 20725760
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4194304,
"byteOffset": 25968640
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 30162944
}
],
"md5sum": "b5d02ff4796c05598385578cb1761d14"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "3d2a1e50775b74297730f27209c6e9f4"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "f5d4177748c4624bf88220b1dcb32895"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.10.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "0eb2186235cbcade2beae2f840680e1c"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "d5b5a74da0c51e50d19444c72b9e3221"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "64a51dc0dc45ebc29f477b97eaff6bfa"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "2b66649025f482d70ec48a6361620cd9"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "3339d50e963e2f3d10a34c9e1da53377"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "c8942d99a3474348fae79d6d24889802"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 32030720,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 11272192
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 11288576
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4194304,
"byteOffset": 16531456
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 11272192,
"byteOffset": 20742144
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 32014336
}
],
"md5sum": "d4d528531839b78fecdb053808aef423"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "e01d7b219f5bbdbd2d9c4346710a8b8a"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "2b7fc8c46585c4af1f2074db5206cddb"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "d9f1a749fd4d5f64b17198dbe103ceb4"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "67c3be9dc9b54c16d72a3a9fe598dec7"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.12.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "8af97d582d717ac1da97554f18bd32e3"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "be88631f5a513c73bb360d3aca3e5510"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "8ff9be5b7dc7205336b959428d32cd3d"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "69b095128caed3b629c5c28f5e278b74"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.13.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "e6c9bf0f5f1aa8acc04270282537717b"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 30179328,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 9437184
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 11272192,
"byteOffset": 9453568
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 20742144
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4194304,
"byteOffset": 25985024
}
],
"md5sum": "7be6a0efaf6f3df8f338bae33d148862"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "9c010dc016a78d3b226cb19575074636"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "a64b1da3868428d40aff11012b925ec5"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "f4420f464ca22f6e8d550a865057e88e"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "a68c2f5c0e1d547f0683e9082f4c2f55"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "58e5e89449c36f45e32b5cdddefe5462"
},
{
"dataPath": "params_shard_102.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.7.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "2d7af53ab2686df0d6692b72f59b2da9"
},
{
"dataPath": "params_shard_103.bin",
"format": "raw-shard",
"nbytes": 32047104,
"records": [
{
"name": "model.layers.13.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 9437184
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 11272192,
"byteOffset": 9453568
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 20742144
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 11272192,
"byteOffset": 20758528
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 32030720
}
],
"md5sum": "76e46dc43bc0b7e78b5c25723701fff3"
},
{
"dataPath": "params_shard_104.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "cfa66a77c8352ca107ba3e17e0f59192"
},
{
"dataPath": "params_shard_105.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "7c974b606f9061878fd776a508e4c9f8"
},
{
"dataPath": "params_shard_106.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "105a290d7866088bc54ce3a6fd7f5b6c"
},
{
"dataPath": "params_shard_107.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "61488efc9fc8846a5b3103eec0f1b14a"
},
{
"dataPath": "params_shard_108.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.8.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "2dd7800534b7e11a32007a362036cef1"
},
{
"dataPath": "params_shard_109.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "408c06f3d3e6cc1feca770b5a1d2b71e"
},
{
"dataPath": "params_shard_110.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "e9782cda2d14ce2c493aade04c8d58e1"
},
{
"dataPath": "params_shard_111.bin",
"format": "raw-shard",
"nbytes": 30195712,
"records": [
{
"name": "model.layers.7.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 9437184
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 11272192,
"byteOffset": 9453568
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 20742144
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4194304,
"byteOffset": 25985024
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 30179328
}
],
"md5sum": "b124566679a20ad9961f1d44b52f7e80"
},
{
"dataPath": "params_shard_112.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "831e42789d775a5fab48975380bfb810"
},
{
"dataPath": "params_shard_113.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "f2174d80ef52605d40c66520deb7405a"
},
{
"dataPath": "params_shard_114.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.9.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "2f3b31d25d5baabd24bf09743dc41354"
},
{
"dataPath": "params_shard_115.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "25022d4cb7f29cd400d9acc568dc1f90"
},
{
"dataPath": "params_shard_116.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "3a5cb5e0b5b1e5afa15ab04182ddbe91"
},
{
"dataPath": "params_shard_117.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "f6c4f2d2a8837c1e36f381d950098607"
},
{
"dataPath": "params_shard_118.bin",
"format": "raw-shard",
"nbytes": 32047104,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 11272192
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 11288576
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4194304,
"byteOffset": 16531456
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 11272192,
"byteOffset": 20742144
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 32014336
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 32030720
}
],
"md5sum": "60a7d777aa6b4aeedba3c095b2992487"
},
{
"dataPath": "params_shard_119.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "12c4726f5a18b4a254dac7a7b8835590"
},
{
"dataPath": "params_shard_120.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "0eff33136c8f371cce12c514d936eebf"
},
{
"dataPath": "params_shard_121.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.14.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "82d1533796d6f5611c8bbe2a70de1fca"
},
{
"dataPath": "params_shard_122.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "10dc8b9a048bf4835de857029f62447c"
},
{
"dataPath": "params_shard_123.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "fb06fb11dc641cf6a7feaf74fb437c17"
},
{
"dataPath": "params_shard_124.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "8cbb8a55a9ad6ce27df35e426a09ace9"
},
{
"dataPath": "params_shard_125.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "953edc08d3d31d75b1c3dc0b4d58d171"
},
{
"dataPath": "params_shard_126.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "6a9be1ebcbbde3544c0492ee558755f7"
},
{
"dataPath": "params_shard_127.bin",
"format": "raw-shard",
"nbytes": 32030720,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 11272192
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 11288576
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4194304,
"byteOffset": 16531456
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 11272192,
"byteOffset": 20742144
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 32014336
}
],
"md5sum": "2d180bde7b621c476d2f43b0c47160f8"
},
{
"dataPath": "params_shard_128.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "45d9ca1847beeaa611f5bdbb0a8b2421"
},
{
"dataPath": "params_shard_129.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "3e88259421b2fac642af46875cac75e0"
},
{
"dataPath": "params_shard_130.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "eb32a3648b74ce4d8a2131b4c19b75d8"
},
{
"dataPath": "params_shard_131.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "f8147a1d60290675dccb926407c10bf1"
},
{
"dataPath": "params_shard_132.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.16.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "fdc9bc3c32666eb7bbfff0b3a6afce1a"
},
{
"dataPath": "params_shard_133.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "f30a48636f5b254c7b765c214cbaac5a"
},
{
"dataPath": "params_shard_134.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "ec316084e13e44f57a7cdf42fcfadc90"
},
{
"dataPath": "params_shard_135.bin",
"format": "raw-shard",
"nbytes": 30195712,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 9437184
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 11272192,
"byteOffset": 9453568
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 20742144
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4194304,
"byteOffset": 25985024
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 30179328
}
],
"md5sum": "e38b72f47dfc414d3858aad5173bd960"
},
{
"dataPath": "params_shard_136.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "086ea592c4abe062288368197258cc94"
},
{
"dataPath": "params_shard_137.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "a8f911873c895e162b62b493973452a0"
},
{
"dataPath": "params_shard_138.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.17.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "ae7695ee1ab8ab305bcab0264d6933fa"
},
{
"dataPath": "params_shard_139.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "cae8965ed1c978a693539674eafce3d4"
},
{
"dataPath": "params_shard_140.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "dc3f4044d69ff36728269a73a024f423"
},
{
"dataPath": "params_shard_141.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "ffe1fb4ad95b9c9708dc72ae958e75ea"
},
{
"dataPath": "params_shard_142.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "08890436effc1f7bf64541c86fe871af"
},
{
"dataPath": "params_shard_143.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.18.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "19fa149495e7fab7887ec39f39a80e6a"
},
{
"dataPath": "params_shard_144.bin",
"format": "raw-shard",
"nbytes": 32030720,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 11272192
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 11288576
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4194304,
"byteOffset": 16531456
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 11272192,
"byteOffset": 20742144
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 32014336
}
],
"md5sum": "8936b243cfd6ef39f893542b7874f8e5"
},
{
"dataPath": "params_shard_145.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "191f40b5f46285ff5cc50b2b17ebdb14"
},
{
"dataPath": "params_shard_146.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "6f1de1d80995ed698295062c227f96b3"
},
{
"dataPath": "params_shard_147.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "14e7700710a33faba8200d7d8e0bd04e"
},
{
"dataPath": "params_shard_148.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "4a1de74f1fb499bd17aae0f09f8c4889"
},
{
"dataPath": "params_shard_149.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.19.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "d76f620738ed66a47a1413c8df82226a"
},
{
"dataPath": "params_shard_150.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "5032145dd39299426bbfc0c9bf8cb330"
},
{
"dataPath": "params_shard_151.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "6d38f94bfaf167a27ae89fdf508b3472"
},
{
"dataPath": "params_shard_152.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "3ace85d9374c5186b90cd10d3a072853"
},
{
"dataPath": "params_shard_153.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.20.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "be879c68ab66c100a92569e2c945a5fb"
},
{
"dataPath": "params_shard_154.bin",
"format": "raw-shard",
"nbytes": 30179328,
"records": [
{
"name": "model.layers.18.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 9437184
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 11272192,
"byteOffset": 9453568
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.19.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 20742144
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4194304,
"byteOffset": 25985024
}
],
"md5sum": "dbfbe5959b7f3561f2ed0e3d1f1a9951"
},
{
"dataPath": "params_shard_155.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "20aed52c1996c9f204938181b3dceee3"
},
{
"dataPath": "params_shard_156.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "58ce3a34170fb93ab86ea3d2aa729dbe"
},
{
"dataPath": "params_shard_157.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "0262b29467caabd991c1bf0983ec6491"
},
{
"dataPath": "params_shard_158.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "24c180907a301ebc962e72576287e694"
},
{
"dataPath": "params_shard_159.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "4d8164bd2ba44a198abb350ccb4204be"
},
{
"dataPath": "params_shard_160.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.21.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "bb01c7587518862995a32401a12a08a6"
},
{
"dataPath": "params_shard_161.bin",
"format": "raw-shard",
"nbytes": 32047104,
"records": [
{
"name": "model.layers.20.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 9437184
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 11272192,
"byteOffset": 9453568
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 20742144
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 11272192,
"byteOffset": 20758528
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 32030720
}
],
"md5sum": "ac1f386e03f79b282f4fb5c168d03033"
},
{
"dataPath": "params_shard_162.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "4b84d1f342363b3649e8c064e09f243e"
},
{
"dataPath": "params_shard_163.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "6ec0bdf4fc155f845b4d699b14c0af6d"
},
{
"dataPath": "params_shard_164.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "988126b28ca872fe3aa3760c7dbad17b"
},
{
"dataPath": "params_shard_165.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "2f93fdbaf87dd05adf1b98ae9df949e1"
},
{
"dataPath": "params_shard_166.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.22.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "1d39ac33c1ee82ab03f7486a184dbb6c"
},
{
"dataPath": "params_shard_167.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "c33be460d3c492b4d58e5b45622062d6"
},
{
"dataPath": "params_shard_168.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "09a9894c025905e44535dd5eb96b67c0"
},
{
"dataPath": "params_shard_169.bin",
"format": "raw-shard",
"nbytes": 30195712,
"records": [
{
"name": "model.layers.21.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 9437184
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 11272192,
"byteOffset": 9453568
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 20742144
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4194304,
"byteOffset": 25985024
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 30179328
}
],
"md5sum": "7d79dd0409ad7f6d7bcf016546d8241f"
},
{
"dataPath": "params_shard_170.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "4743faa2e1528b03d43b927c787658f1"
},
{
"dataPath": "params_shard_171.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "ae81cd9a0217584b18125cf702440868"
},
{
"dataPath": "params_shard_172.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.23.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "2ad313d41a074078137068049c09c02c"
},
{
"dataPath": "params_shard_173.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "726a681b652425537e23c04f74d7d803"
},
{
"dataPath": "params_shard_174.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "f5cf4a750dfe9210a7b35d79194d877f"
},
{
"dataPath": "params_shard_175.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "316d32afcce008e1423770c761e4dc3f"
},
{
"dataPath": "params_shard_176.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "915b0cc669b30652108e987b5578a360"
},
{
"dataPath": "params_shard_177.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.24.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "af3232587448232c98b80017bdbe31e9"
},
{
"dataPath": "params_shard_178.bin",
"format": "raw-shard",
"nbytes": 32030720,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 11272192
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 11288576
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4194304,
"byteOffset": 16531456
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 11272192,
"byteOffset": 20742144
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 32014336
}
],
"md5sum": "9ff5b348bf6214d0bbf598add0eba907"
},
{
"dataPath": "params_shard_179.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "fdddef8e6736cc85d197976cfc18e57a"
},
{
"dataPath": "params_shard_180.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "9de93485ae651a3efe0b8d5537c5c6f0"
},
{
"dataPath": "params_shard_181.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "1c8101007d1ca2303506167fa3fbfbb4"
},
{
"dataPath": "params_shard_182.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "f960367f9c7456fc87149e6dcc52e8f7"
},
{
"dataPath": "params_shard_183.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.25.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "735269649640623a359515ded69a38fa"
},
{
"dataPath": "params_shard_184.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "1ba000ee946c8dc0a53dac1a59e1c2a8"
},
{
"dataPath": "params_shard_185.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "56571ee9fce4672df5883fb2093b4975"
},
{
"dataPath": "params_shard_186.bin",
"format": "raw-shard",
"nbytes": 30195712,
"records": [
{
"name": "model.layers.24.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 9437184
},
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 11272192,
"byteOffset": 9453568
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 20742144
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4194304,
"byteOffset": 25985024
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 30179328
}
],
"md5sum": "4aa701af11ffb5689b78e5f9aa64b568"
},
{
"dataPath": "params_shard_187.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "008a2de95ba2cd104e582775070d192e"
},
{
"dataPath": "params_shard_188.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "45974b446b9d1fde9076c6bd7866f772"
},
{
"dataPath": "params_shard_189.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.26.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "a5e28ef3fbe3a3295adb229be2bd4c84"
},
{
"dataPath": "params_shard_190.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "e4d0257fffc384d664a5090fab885b1a"
},
{
"dataPath": "params_shard_191.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "418a99b13d1259bdc2033f6f66fdb77a"
},
{
"dataPath": "params_shard_192.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "acf0afc06ef8cb3989dee34a2f1d69a5"
},
{
"dataPath": "params_shard_193.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.27.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "548ea58d131f6b6c468a537f625053c4"
},
{
"dataPath": "params_shard_194.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "1b2652efd4f1f8cc7ec3cf87dc9ea221"
},
{
"dataPath": "params_shard_195.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "d343c13ed18f417c9c547ec2f4de8565"
},
{
"dataPath": "params_shard_196.bin",
"format": "raw-shard",
"nbytes": 30179328,
"records": [
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 11272192
},
{
"name": "model.layers.26.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 11288576
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4194304,
"byteOffset": 16531456
},
{
"name": "model.layers.27.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 20725760
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4194304,
"byteOffset": 25968640
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 30162944
}
],
"md5sum": "791c59e363c4461b0292926989f2a87f"
},
{
"dataPath": "params_shard_197.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.28.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "46991ba4e5971e5659223e9f15482841"
},
{
"dataPath": "params_shard_198.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "af1ab61408381d31679ae9b4d1a5673f"
},
{
"dataPath": "params_shard_199.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "b77832a4c63ba3feef59afccda0e065f"
},
{
"dataPath": "params_shard_200.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.28.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "b32b36a3963f8d435affbe3786d61467"
},
{
"dataPath": "params_shard_201.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.28.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "99a22fd8fe240ca9472265d31815e47c"
},
{
"dataPath": "params_shard_202.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.29.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "506f3c1dd398acab938ddfe5d83c07d6"
},
{
"dataPath": "params_shard_203.bin",
"format": "raw-shard",
"nbytes": 32047104,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 11272192
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 11288576
},
{
"name": "model.layers.28.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 11272192,
"byteOffset": 11304960
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 22577152
},
{
"name": "model.layers.28.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 22593536
},
{
"name": "model.layers.28.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4194304,
"byteOffset": 27836416
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 32030720
}
],
"md5sum": "ff2a4986dae7c9180ee33ada2876fddb"
},
{
"dataPath": "params_shard_204.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "b239b77b6e66d3ce9ffc95a682e4db46"
},
{
"dataPath": "params_shard_205.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "91da5dd56cf7fef88e8154761f256d74"
},
{
"dataPath": "params_shard_206.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.29.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "a1e45abb3551477b4e5b892a3e06b8fc"
},
{
"dataPath": "params_shard_207.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.29.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "1bbde9a6c3daed56707d59d26ff75287"
},
{
"dataPath": "params_shard_208.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.30.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "0308901564a349f966c78978645bbe4d"
},
{
"dataPath": "params_shard_209.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "d1e90e3b3578872cf689284a35ef6a6a"
},
{
"dataPath": "params_shard_210.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "8c028f2de4e7da9ae35527b6ff1edabd"
},
{
"dataPath": "params_shard_211.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.30.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "b487ddc055b977d27b9783f92ac4e9f3"
},
{
"dataPath": "params_shard_212.bin",
"format": "raw-shard",
"nbytes": 32030720,
"records": [
{
"name": "model.layers.29.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 11272192
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 11288576
},
{
"name": "model.layers.29.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4194304,
"byteOffset": 16531456
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.30.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 11272192,
"byteOffset": 20742144
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 32014336
}
],
"md5sum": "a55545b805a73cbd7666f0f5679e7ad0"
},
{
"dataPath": "params_shard_213.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.30.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "058f114b8620d5708517dc72762ddf5d"
},
{
"dataPath": "params_shard_214.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.31.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "00a5ae011f6015555e9e715d3542743c"
},
{
"dataPath": "params_shard_215.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "a0f0347aa5c20a3406f709a48b6e7ebd"
},
{
"dataPath": "params_shard_216.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "5cef43a1b10c6188645d63ab3c328f34"
},
{
"dataPath": "params_shard_217.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.31.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "434dc4407d7fd85865583c7b3c027084"
},
{
"dataPath": "params_shard_218.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.31.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "f0d5712929f954041ffd4b3d1db613f8"
},
{
"dataPath": "params_shard_219.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.32.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "936b91e9da8f23031bce918efc6725df"
},
{
"dataPath": "params_shard_220.bin",
"format": "raw-shard",
"nbytes": 30195712,
"records": [
{
"name": "model.layers.30.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.30.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 9437184
},
{
"name": "model.layers.31.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 11272192,
"byteOffset": 9453568
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.31.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 20742144
},
{
"name": "model.layers.31.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4194304,
"byteOffset": 25985024
},
{
"name": "model.layers.32.input_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 30179328
}
],
"md5sum": "bfe4e8b29580d52a810a46f255a145e1"
},
{
"dataPath": "params_shard_221.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.32.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "59275eed62d2c2d448370b6b91561a21"
},
{
"dataPath": "params_shard_222.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.32.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "a8e43c3bf9c2a9f618597cc1d9978d62"
},
{
"dataPath": "params_shard_223.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.32.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "94d9ab055e331c223b0764ce42e7de02"
},
{
"dataPath": "params_shard_224.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.32.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "5be577502cd869f3b1b9a75d011f80cd"
},
{
"dataPath": "params_shard_225.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.33.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "16b9cafce4eb75963bfbf7c8871b1990"
},
{
"dataPath": "params_shard_226.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.33.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "21dee41c213d0ad5d90f31ac1b350f00"
},
{
"dataPath": "params_shard_227.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.33.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "2a3b0d7cfadafc7334fbc9a90a0447e5"
},
{
"dataPath": "params_shard_228.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.33.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "62288327511dbc253eb866df9b84553c"
},
{
"dataPath": "params_shard_229.bin",
"format": "raw-shard",
"nbytes": 32030720,
"records": [
{
"name": "model.layers.32.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.32.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 11272192
},
{
"name": "model.layers.32.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 11288576
},
{
"name": "model.layers.32.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4194304,
"byteOffset": 16531456
},
{
"name": "model.layers.33.input_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.33.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 11272192,
"byteOffset": 20742144
},
{
"name": "model.layers.33.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 32014336
}
],
"md5sum": "7e2de06796d7290d80e93fa17caf523d"
},
{
"dataPath": "params_shard_230.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.33.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "5dbf676004b1e3209b7cd05e63182ec4"
},
{
"dataPath": "params_shard_231.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.34.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "fcebe85bf2921111a393b9b9ed403b38"
},
{
"dataPath": "params_shard_232.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.34.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "165350fafd5978a6845d548a5891e713"
},
{
"dataPath": "params_shard_233.bin",
"format": "raw-shard",
"nbytes": 31981568,
"records": [
{
"name": "model.layers.33.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.33.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.34.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 22544384,
"byteOffset": 9437184
}
],
"md5sum": "48f95b4c6bd4da70e265bb4a1c7a653d"
},
{
"dataPath": "params_shard_234.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.34.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "f6ff9e7ce4d631b4ac72d523162e1f8c"
},
{
"dataPath": "params_shard_235.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.34.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "f1eb1309ae0ec7def775873f5fbed23d"
},
{
"dataPath": "params_shard_236.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.35.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "e566567ba6a607f5e7b91adcbd6d599f"
},
{
"dataPath": "params_shard_237.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.35.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "c6b047fb09196ae846648071ccd00131"
},
{
"dataPath": "params_shard_238.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.35.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "1c71657126abf1bab0137b12b6fbd27c"
},
{
"dataPath": "params_shard_239.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.35.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "aadaf8594cb1ab9048488abdb1529074"
},
{
"dataPath": "params_shard_240.bin",
"format": "raw-shard",
"nbytes": 32047104,
"records": [
{
"name": "model.layers.34.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.34.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.34.input_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 9437184
},
{
"name": "model.layers.34.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 11272192,
"byteOffset": 9453568
},
{
"name": "model.layers.34.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.35.input_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 20742144
},
{
"name": "model.layers.35.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 11272192,
"byteOffset": 20758528
},
{
"name": "model.layers.35.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 32030720
}
],
"md5sum": "741a481d703effec6badfea46a8fd13f"
},
{
"dataPath": "params_shard_241.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.35.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "7912ad7409bf8231d1d472b4e60c974a"
},
{
"dataPath": "params_shard_242.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.36.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "58dc551ed9b14d1e7706b644a5bf62fb"
},
{
"dataPath": "params_shard_243.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.36.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "bc174d96b59a75c517606cab00c239e2"
},
{
"dataPath": "params_shard_244.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.36.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "d3a967e37015f6cb289a74051965a901"
},
{
"dataPath": "params_shard_245.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.36.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "695ce59a551959c98d060e43ce36bb11"
},
{
"dataPath": "params_shard_246.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.36.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "3778821f059d548908bb7f2ae81807cc"
},
{
"dataPath": "params_shard_247.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.37.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "63a719fd947f375236aebc9acd75b30a"
},
{
"dataPath": "params_shard_248.bin",
"format": "raw-shard",
"nbytes": 30195712,
"records": [
{
"name": "model.layers.35.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.35.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.36.input_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 9437184
},
{
"name": "model.layers.36.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 11272192,
"byteOffset": 9453568
},
{
"name": "model.layers.36.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.36.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 20742144
},
{
"name": "model.layers.36.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4194304,
"byteOffset": 25985024
},
{
"name": "model.layers.37.input_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 30179328
}
],
"md5sum": "1a7ef4c58d79fb6d0fa6ec460a69dce2"
},
{
"dataPath": "params_shard_249.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.37.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "975adb112c7413e03baf09847112fc2e"
},
{
"dataPath": "params_shard_250.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.37.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "06ed340f207725e96fa5b6098c8037fd"
},
{
"dataPath": "params_shard_251.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.37.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "dc6c82146d937a3739692d0ffe04af38"
},
{
"dataPath": "params_shard_252.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.37.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "631249db9749b2e41a74a12745f04602"
},
{
"dataPath": "params_shard_253.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.38.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "8d9de59684af45c6a28a901f5d215f84"
},
{
"dataPath": "params_shard_254.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.38.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "6a104822852202d9fbd65d48d6685984"
},
{
"dataPath": "params_shard_255.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.38.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "089175bb1d798a78f77da08b2295e0cb"
},
{
"dataPath": "params_shard_256.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.38.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "1944673beeb061b2ce10d600fa357940"
},
{
"dataPath": "params_shard_257.bin",
"format": "raw-shard",
"nbytes": 32030720,
"records": [
{
"name": "model.layers.37.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.37.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 11272192
},
{
"name": "model.layers.37.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 11288576
},
{
"name": "model.layers.37.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4194304,
"byteOffset": 16531456
},
{
"name": "model.layers.38.input_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.38.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 11272192,
"byteOffset": 20742144
},
{
"name": "model.layers.38.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 32014336
}
],
"md5sum": "600da115d162546c4b7fb944a7571075"
},
{
"dataPath": "params_shard_258.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.38.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "bc21cec0e4f6efe4cbc5f94ed7e1a81a"
},
{
"dataPath": "params_shard_259.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.39.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "4f3aaf5bb7eaf4dd6e948a56634a698e"
},
{
"dataPath": "params_shard_260.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.39.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "568d0e69483dc1210a1644bbc3a8f91d"
},
{
"dataPath": "params_shard_261.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.39.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "1b23a892827b6ee9f72477a253add733"
},
{
"dataPath": "params_shard_262.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.39.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "bc7716b3dd366258105902624f1b1d68"
},
{
"dataPath": "params_shard_263.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.39.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "1cfa966eeb69c9ac0175e1861e443890"
},
{
"dataPath": "params_shard_264.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.40.mlp.down_proj.q_weight",
"shape": [
8192,
2752
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "224dd9ca4c5ae414a232023b58b9c1c7"
},
{
"dataPath": "params_shard_265.bin",
"format": "raw-shard",
"nbytes": 30195712,
"records": [
{
"name": "model.layers.38.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 0
},
{
"name": "model.layers.38.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4194304,
"byteOffset": 5242880
},
{
"name": "model.layers.39.input_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 9437184
},
{
"name": "model.layers.39.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 11272192,
"byteOffset": 9453568
},
{
"name": "model.layers.39.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 20725760
},
{
"name": "model.layers.39.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 20742144
},
{
"name": "model.layers.39.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4194304,
"byteOffset": 25985024
},
{
"name": "model.layers.40.input_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 30179328
}
],
"md5sum": "fea8329d781bb013be13164eaea76f25"
},
{
"dataPath": "params_shard_266.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.40.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "8f6a462902ea4af0c0d101fef55e0cb2"
},
{
"dataPath": "params_shard_267.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.40.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "f383135de5ab88aa5b26f71516e1f171"
},
{
"dataPath": "params_shard_268.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.40.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "1c298100dfc9e0501f9cfbb0d59c8b3f"
},
{
"dataPath": "params_shard_269.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.40.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "9128d8cd8b6b3768dea3278469fcb0d5"
},
{
"dataPath": "params_shard_270.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.41.mlp.gate_up_proj.q_weight",
"shape": [
44032,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "99f9d58e4a0c7265e1da5665cdc391bf"
},
{
"dataPath": "params_shard_271.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.41.mlp.gate_up_proj.q_scale",
"shape": [
44032,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "62c1719f9076ce87c281835a8cf0b76a"
},
{
"dataPath": "params_shard_272.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.41.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "1e415c963f9ba748c87a3713e98f5b4d"
},
{
"dataPath": "params_shard_273.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.41.self_attn.o_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "337e10a8530dc8e4314fde7fdc8215d7"
},
{
"dataPath": "params_shard_274.bin",
"format": "raw-shard",
"nbytes": 30162944,
"records": [
{
"name": "model.layers.40.mlp.down_proj.q_scale",
"shape": [
8192,
688
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 11272192,
"byteOffset": 0
},
{
"name": "model.layers.40.post_attention_layernorm.weight",
"shape": [
8192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 16384,
"byteOffset": 11272192
},
{
"name": "model.layers.40.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 11288576
},
{
"name": "model.layers.40.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4194304,
"byteOffset": 16531456
},
{
"name": "model.layers.41.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 5242880,
"byteOffset": 20725760
},
{
"name": "model.layers.41.self_attn.o_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 4194304,
"byteOffset": 25968640
}
],
"md5sum": "6da908215a65ab1a6bd8ac50b0b7588a"
}
]
}