{ "metadata": { "ParamSize": 507, "ParamBytes": 5199330304.0, "BitsPerParam": 4.50075370326778 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 458752000, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 256000, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 458752000, "byteOffset": 0 } ], "md5sum": "d3b171d3cc512e5ec59a53628257ac71" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 57344000, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 256000, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 57344000, "byteOffset": 0 } ], "md5sum": "28b93f5bfd51a8afbf5366f35963ee51" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "f8a46c2ab2f7e71e1ef9c3ee371316d6" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 28908544, "records": [ { "name": "model.layers.0.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 0 }, { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 7168 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 25697280 } ], "md5sum": "06036c78d4b9e3f2f6fcebd91e66e860" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "906f39752d61ec260747e6e16f1ce516" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 31223808, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 0 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6422528 }, { "name": "model.layers.0.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6429696 }, { "name": "model.layers.0.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6436864 }, { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 6444032 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 21124096 }, { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 22959104 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 30299136 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 31216640 } ], "md5sum": "f987af1628d2e8dd52ea49bee47142d3" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "5e4fa5f7acd1e136d95d60294272c8b5" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 33510400, "records": [ { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 0 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 3211264 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9633792 }, { "name": "model.layers.1.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9640960 }, { "name": "model.layers.1.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9648128 }, { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 9655296 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 24335360 }, { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 26170368 } ], "md5sum": "a3a6c6757cc271f4ff5db6640b043a91" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "e31e28081c7422029c6a6aec5f0ce483" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 29826048, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 0 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 917504 }, { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 924672 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 26614784 } ], "md5sum": "8c60dfaa73d934a25bcae3938535df1e" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "97ff90e543ac8a1bfab6d44d9ff23f27" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 31223808, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 0 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6422528 }, { "name": "model.layers.2.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6429696 }, { "name": "model.layers.2.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6436864 }, { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 6444032 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 21124096 }, { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 22959104 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 30299136 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 31216640 } ], "md5sum": "aff72d1284b4c7065984ade880bd9cac" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "d8ac9eb2c5493d91e74ea42eea78c078" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 33510400, "records": [ { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 0 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 3211264 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9633792 }, { "name": "model.layers.3.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9640960 }, { "name": "model.layers.3.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9648128 }, { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 9655296 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 24335360 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 26170368 } ], "md5sum": "1c0e925f03d868fc46b1cdfd0568dcba" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "45463833779de216919efb9264e9f7f1" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 29826048, "records": [ { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 0 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 917504 }, { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 924672 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 26614784 } ], "md5sum": "5e66c7d94da805e2345093f8114f19d1" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "916f258877a6cc5bd9fd7661462864cb" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 31223808, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 0 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6422528 }, { "name": "model.layers.4.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6429696 }, { "name": "model.layers.4.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6436864 }, { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 6444032 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 21124096 }, { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 22959104 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 30299136 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 31216640 } ], "md5sum": "b7284c5d4df97397ad1d5f12e7ef03fc" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "a2c3c18f75ec221fdc28b9a0aa7028c5" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 33510400, "records": [ { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 0 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 3211264 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9633792 }, { "name": "model.layers.5.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9640960 }, { "name": "model.layers.5.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9648128 }, { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 9655296 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 24335360 }, { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 26170368 } ], "md5sum": "e3e0a784f02c0818d7c0bfb7f36b665a" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "9e993542e404a7c3576c31aca4f14eff" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 29826048, "records": [ { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 0 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 917504 }, { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 924672 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 26614784 } ], "md5sum": "3d118f8be4bdf3428dd5f22bfa641f6e" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "9fe204608b23b6e4eba1dc04c7116f7a" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 31216640, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 0 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6422528 }, { "name": "model.layers.6.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6429696 }, { "name": "model.layers.6.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6436864 }, { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 6444032 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 21124096 }, { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 22959104 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 30299136 } ], "md5sum": "36c50e7f9d2a1d7478b0c96fa571e16e" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "95c10e1a2f56ada9c471403abc4d4ee6" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 31202304, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 6422528 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 21102592 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 22937600 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 30277632 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 31195136 } ], "md5sum": "d64edb751390dc0d533fbeb84197dd05" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "371bb5399580fbdca1ea1577a3e0e72b" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 33510400, "records": [ { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 0 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 3211264 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9633792 }, { "name": "model.layers.10.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9640960 }, { "name": "model.layers.10.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9648128 }, { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 9655296 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 24335360 }, { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 26170368 } ], "md5sum": "4b481061984454075edd38a86cf46a15" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "f2ef218d512787aeadce8eae083d9b33" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 29826048, "records": [ { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 0 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 917504 }, { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 924672 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 26614784 } ], "md5sum": "d03bcf2a4554b2fa61f63c4c78d43b70" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "aea95b85aaa20537d9f0d944ee1e2b80" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 31223808, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 0 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6422528 }, { "name": "model.layers.11.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6429696 }, { "name": "model.layers.11.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6436864 }, { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 6444032 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 21124096 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 22959104 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 30299136 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 31216640 } ], "md5sum": "add7fe71148bbbfbd26e10f0a81ee92e" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "23ecbaba6376b2193050397059394570" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 33510400, "records": [ { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 0 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 3211264 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9633792 }, { "name": "model.layers.12.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9640960 }, { "name": "model.layers.12.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9648128 }, { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 9655296 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 24335360 }, { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 26170368 } ], "md5sum": "e0c4e8bdf848f0b87a8697291ef015b6" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "163a1e2df358cee0665cb9e57a5a6c61" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 29826048, "records": [ { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 0 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 917504 }, { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 924672 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 26614784 } ], "md5sum": "f4da1a1a242f54d8a38e1b55ca9ea5fb" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "92c00541df0147d61d45dc2986ea6190" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 31223808, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 0 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6422528 }, { "name": "model.layers.13.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6429696 }, { "name": "model.layers.13.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6436864 }, { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 6444032 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 21124096 }, { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 22959104 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 30299136 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 31216640 } ], "md5sum": "b522850d5fd5f673dffbc05a00451485" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "9fb1cc4fd7a00ada55cbbb560e7c65b0" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 33510400, "records": [ { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 0 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 3211264 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9633792 }, { "name": "model.layers.14.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9640960 }, { "name": "model.layers.14.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9648128 }, { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 9655296 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 24335360 }, { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 26170368 } ], "md5sum": "4e7738e6ee7251870316de734d167689" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "cd12e921b48cbd688b07bc54709b8b83" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 29826048, "records": [ { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 0 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 917504 }, { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 924672 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 26614784 } ], "md5sum": "a56c7b4369ad2d832adc91500e592b42" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "20e5692af90110900ebbfb489310b665" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 31223808, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 0 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6422528 }, { "name": "model.layers.15.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6429696 }, { "name": "model.layers.15.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6436864 }, { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 6444032 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 21124096 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 22959104 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 30299136 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 31216640 } ], "md5sum": "f74e0f38b2183b3ef60d21d0fe3ae93a" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "6594ad3a46309e8aebd9ccf79a7a957a" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 33510400, "records": [ { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 0 }, { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 3211264 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9633792 }, { "name": "model.layers.16.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9640960 }, { "name": "model.layers.16.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9648128 }, { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 9655296 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 24335360 }, { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 26170368 } ], "md5sum": "39e9e89a08cdcbfa9a3518906e0cc9ca" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "720b891234d897021909fbc3d141b646" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 29826048, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 0 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 917504 }, { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 924672 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 26614784 } ], "md5sum": "8d335eb31d1e8a728f8fe9163fcab066" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "2b439e42ad5a6f4839727e583eb8df49" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 31223808, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 0 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6422528 }, { "name": "model.layers.17.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6429696 }, { "name": "model.layers.17.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6436864 }, { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 6444032 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 21124096 }, { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 22959104 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 30299136 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 31216640 } ], "md5sum": "f6827e6f4d875afdbd35af1e4f629412" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "6c7939a4c25f05a2cc5be0234a061f7b" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 33510400, "records": [ { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 0 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 3211264 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9633792 }, { "name": "model.layers.18.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9640960 }, { "name": "model.layers.18.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9648128 }, { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 9655296 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 24335360 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 26170368 } ], "md5sum": "2fa35aef227ca48432986d5507bd681c" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "2f8336b59644b695ee87ef143b79b9fe" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 29826048, "records": [ { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 0 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 917504 }, { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 924672 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 26614784 } ], "md5sum": "fb417330890a2437a72a942cd86c16ba" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 31216640, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 0 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6422528 }, { "name": "model.layers.19.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6429696 }, { "name": "model.layers.19.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6436864 }, { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 6444032 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 21124096 }, { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 22959104 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 30299136 } ], "md5sum": "ec0aa90efc130755daa6789557d268cf" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "e7eb383c4dabd9be1b93bbb5efb0e2b4" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "9dd8e67f22b2ceb296e85a9aa232b55c" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "4cf90ce687b4fb3d4ea7b3f90e4023fc" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 31230976, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 14680064 }, { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 16515072 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 23855104 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 24772608 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 24779776 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 27991040 }, { "name": "model.layers.7.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 27998208 }, { "name": "model.layers.7.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 28005376 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 28012544 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 28019712 } ], "md5sum": "6d80206aa980136c92ebbd3b262570fb" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "29c0645b7e6f5ab8c46e6a8f901967b0" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 31223808, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 0 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6422528 }, { "name": "model.layers.8.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6429696 }, { "name": "model.layers.8.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6436864 }, { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 6444032 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 21124096 }, { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 22959104 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 30299136 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 31216640 } ], "md5sum": "2b2462c1a81cb85a9d36e8030abe202f" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "eafdf22af0bfe6075a1369c6764c87a1" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 33510400, "records": [ { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 0 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 3211264 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9633792 }, { "name": "model.layers.9.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9640960 }, { "name": "model.layers.9.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9648128 }, { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 9655296 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 24335360 }, { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 26170368 } ], "md5sum": "d3973c5e603a0a9bafa636121f309d53" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "f08a1485be3cf283a4dbbb5d1d48d26b" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 29826048, "records": [ { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 0 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 917504 }, { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 924672 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 26614784 } ], "md5sum": "0ed5c6afaddea52265cba1f91185973c" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 32141312, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 0 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6422528 }, { "name": "model.layers.20.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6429696 }, { "name": "model.layers.20.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6436864 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6444032 }, { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 6451200 } ], "md5sum": "ad6db960abca0cacff77cc0aca545478" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "99d4e9909ede3f88d38b423008738621" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 33510400, "records": [ { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 0 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 3211264 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9633792 }, { "name": "model.layers.21.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9640960 }, { "name": "model.layers.21.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9648128 }, { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 9655296 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 24335360 }, { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 26170368 } ], "md5sum": "bb28c4e79b1318b64b64d39e8325a48b" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "5bbdaa043e1af8b5ac85678d1703a758" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 29826048, "records": [ { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 0 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 917504 }, { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 924672 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 26614784 } ], "md5sum": "2d1bda4fb7ad4d3620c80cd4df5ce2bc" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "06299f3292e09893a5b11f687b3e16eb" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 31223808, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 0 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6422528 }, { "name": "model.layers.22.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6429696 }, { "name": "model.layers.22.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6436864 }, { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 6444032 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 21124096 }, { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 22959104 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 30299136 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 31216640 } ], "md5sum": "3c1337273e79bc89972d3c4c9e786f7c" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "54baf2c8857cad9087a4ee505675722e" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 33510400, "records": [ { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 0 }, { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 3211264 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9633792 }, { "name": "model.layers.23.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9640960 }, { "name": "model.layers.23.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9648128 }, { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 9655296 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 24335360 }, { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 26170368 } ], "md5sum": "cf6e62a73a37911c689aa0a11fa74c27" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "bbee19f4ebfe7ef89a5b8db5dc114466" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 29826048, "records": [ { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 0 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 917504 }, { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 924672 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 26614784 } ], "md5sum": "ac63c108a0f62a35949c3a826ef3a1d0" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "bcb127c6ffd065f3f1ea7cfa80354c19" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 31223808, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 0 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6422528 }, { "name": "model.layers.24.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6429696 }, { "name": "model.layers.24.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6436864 }, { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 6444032 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 21124096 }, { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 22959104 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 30299136 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 31216640 } ], "md5sum": "94d6c1c7b049097d7a4c19a948af97a3" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "ed4c6b924526344a508b75bc127a7574" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 33510400, "records": [ { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 0 }, { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 3211264 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9633792 }, { "name": "model.layers.25.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9640960 }, { "name": "model.layers.25.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9648128 }, { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 9655296 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 24335360 }, { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 26170368 } ], "md5sum": "2b92f0f769204ff891a5394a1e0369c1" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "1fff2398d02b8d70f1b44e87c932e3de" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 29826048, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 0 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 917504 }, { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 924672 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 26614784 } ], "md5sum": "c9f03e2a737f75b71b62638dd66250b2" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "0e5046e5521330eb7659c33c011e9063" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 31223808, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 0 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6422528 }, { "name": "model.layers.26.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6429696 }, { "name": "model.layers.26.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6436864 }, { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 6444032 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 21124096 }, { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 22959104 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 30299136 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 31216640 } ], "md5sum": "60e82d5a37b635728134cb1bd5e39832" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "b0427232287f0265be39c651c51ed2b9" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 33510400, "records": [ { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 0 }, { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 3211264 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9633792 }, { "name": "model.layers.27.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9640960 }, { "name": "model.layers.27.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9648128 }, { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 9655296 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 24335360 }, { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 26170368 } ], "md5sum": "d8021aeeb4ec902d0f0878bad635b3a6" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "dc53d7ea9dc19e96b978fb41197ff277" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 29826048, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 0 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 917504 }, { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 924672 }, { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 26614784 } ], "md5sum": "1348b2d309d7aa35e19c1dc98f2eef51" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "22689ade5eca450339ae62c089e32b64" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 31223808, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 0 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6422528 }, { "name": "model.layers.28.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6429696 }, { "name": "model.layers.28.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6436864 }, { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 6444032 }, { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 21124096 }, { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 22959104 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 30299136 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 31216640 } ], "md5sum": "0e5a68b92b3c95cf22c3ffd1c2b661fc" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "c116da98bfc9e6c0a7c44bcf11a81f4b" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 33510400, "records": [ { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 0 }, { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 3211264 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9633792 }, { "name": "model.layers.29.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9640960 }, { "name": "model.layers.29.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9648128 }, { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 9655296 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 24335360 }, { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 26170368 } ], "md5sum": "80f7787cc7c3b2be9ac6c51c0057445c" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "7b3a167159d581b19e98ea9708385e7e" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 29826048, "records": [ { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 0 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 917504 }, { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 924672 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 26614784 } ], "md5sum": "af6d35c77d262d674c1ddf12a638863f" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "5ca9431e01052921e3ec7b48cbcc2ece" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 31223808, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 0 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6422528 }, { "name": "model.layers.30.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6429696 }, { "name": "model.layers.30.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6436864 }, { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 6444032 }, { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 21124096 }, { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 22959104 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 30299136 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 31216640 } ], "md5sum": "c0de772e7dd7077421739304c6af9cfb" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "4b993c99435db63e2bff9fe008bc771a" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 33510400, "records": [ { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 0 }, { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 3211264 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9633792 }, { "name": "model.layers.31.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9640960 }, { "name": "model.layers.31.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9648128 }, { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 9655296 }, { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 24335360 }, { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 26170368 } ], "md5sum": "55a8e8c7ff466fe850c34ffe70a470fd" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "2c5610c50e749b9d29d74e306e8222a7" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.32.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "e66305c5365d18b9fa74b4b1a20a2c55" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 32119808, "records": [ { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 0 }, { "name": "model.layers.32.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 917504 }, { "name": "model.layers.32.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 7340032 }, { "name": "model.layers.32.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22020096 }, { "name": "model.layers.32.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 23855104 }, { "name": "model.layers.32.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 31195136 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 32112640 } ], "md5sum": "27618554ec3aaca397c0b47f6e623dcd" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "3ab5ef61a55efd0279d751404adf19c2" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 32141312, "records": [ { "name": "model.layers.32.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 0 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 3211264 }, { "name": "model.layers.32.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 3218432 }, { "name": "model.layers.32.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 3225600 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 3232768 }, { "name": "model.layers.33.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 3239936 }, { "name": "model.layers.33.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 28930048 } ], "md5sum": "27cdc7335ce7f16a694587a714da39b8" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.34.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "ae7e19107b03a6dba44fe2b45c39291c" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 31223808, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 0 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6422528 }, { "name": "model.layers.33.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6429696 }, { "name": "model.layers.33.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6436864 }, { "name": "model.layers.33.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 6444032 }, { "name": "model.layers.33.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 21124096 }, { "name": "model.layers.33.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 22959104 }, { "name": "model.layers.33.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 30299136 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 31216640 } ], "md5sum": "8e5c98e7aa4e25db4753e4097d49bb63" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "d1d3fccecaa5c5368e6467d61ba7b179" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 33510400, "records": [ { "name": "model.layers.34.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 0 }, { "name": "model.layers.34.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 3211264 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9633792 }, { "name": "model.layers.34.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9640960 }, { "name": "model.layers.34.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9648128 }, { "name": "model.layers.34.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 9655296 }, { "name": "model.layers.34.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 24335360 }, { "name": "model.layers.34.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 26170368 } ], "md5sum": "c4654685a935251adec5881d289f5cbb" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "0e5b9c2e1d526a24f045688190e5a4e6" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 29826048, "records": [ { "name": "model.layers.34.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 0 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 917504 }, { "name": "model.layers.35.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 924672 }, { "name": "model.layers.35.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 26614784 } ], "md5sum": "2f1b4011c4966e3f10b5a9eb8f4665df" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.36.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "3f4228324f40ffab79f9748876e90f14" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 31223808, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 0 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6422528 }, { "name": "model.layers.35.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6429696 }, { "name": "model.layers.35.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6436864 }, { "name": "model.layers.35.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 6444032 }, { "name": "model.layers.35.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 21124096 }, { "name": "model.layers.35.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 22959104 }, { "name": "model.layers.35.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 30299136 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 31216640 } ], "md5sum": "00b402a90cd0c5a76c26b9dd89871d8d" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "3303e8d257006abaed990487f45ea8aa" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 33510400, "records": [ { "name": "model.layers.36.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 0 }, { "name": "model.layers.36.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 3211264 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9633792 }, { "name": "model.layers.36.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9640960 }, { "name": "model.layers.36.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9648128 }, { "name": "model.layers.36.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 9655296 }, { "name": "model.layers.36.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 24335360 }, { "name": "model.layers.36.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 26170368 } ], "md5sum": "e4dcba05ac763bcf324f6ca49a2a046e" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "15dd5845e2247f0447bb1011c956d36e" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 29826048, "records": [ { "name": "model.layers.36.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 0 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 917504 }, { "name": "model.layers.37.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 924672 }, { "name": "model.layers.37.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 26614784 } ], "md5sum": "1770cbf77aa14a70eaba2809d1fb230c" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.38.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "1b7da1fc51b78ac2f9e62463a7a3a76b" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 31223808, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 0 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6422528 }, { "name": "model.layers.37.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6429696 }, { "name": "model.layers.37.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6436864 }, { "name": "model.layers.37.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 6444032 }, { "name": "model.layers.37.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 21124096 }, { "name": "model.layers.37.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 22959104 }, { "name": "model.layers.37.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 30299136 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 31216640 } ], "md5sum": "dd3919b05cef8d32ee7f42e9a1ed8806" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "0240ec08aef9c8254cbe8040c09d6c99" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 33510400, "records": [ { "name": "model.layers.38.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 0 }, { "name": "model.layers.38.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 3211264 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9633792 }, { "name": "model.layers.38.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9640960 }, { "name": "model.layers.38.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9648128 }, { "name": "model.layers.38.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 9655296 }, { "name": "model.layers.38.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 24335360 }, { "name": "model.layers.38.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 26170368 } ], "md5sum": "26326ddfd830ead502227194b2b7915a" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "0298d1c5f1741eb12aa7539b49e5d822" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 29826048, "records": [ { "name": "model.layers.38.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 0 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 917504 }, { "name": "model.layers.39.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 924672 }, { "name": "model.layers.39.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 26614784 } ], "md5sum": "38d6304590515cb5e7aa5f7f9ab2835e" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.40.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "2ae007d7eb21a12bbc197f30da749d0d" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 31223808, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 0 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6422528 }, { "name": "model.layers.39.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6429696 }, { "name": "model.layers.39.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6436864 }, { "name": "model.layers.39.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 6444032 }, { "name": "model.layers.39.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 21124096 }, { "name": "model.layers.39.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 22959104 }, { "name": "model.layers.39.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 30299136 }, { "name": "model.layers.40.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 31216640 } ], "md5sum": "c98f7316c5c4be975be68af20fc4350a" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "b357078392d4b5772e38a1eb8b65ffc6" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 33510400, "records": [ { "name": "model.layers.40.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 0 }, { "name": "model.layers.40.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 3211264 }, { "name": "model.layers.40.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9633792 }, { "name": "model.layers.40.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9640960 }, { "name": "model.layers.40.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 9648128 }, { "name": "model.layers.40.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 9655296 }, { "name": "model.layers.40.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 24335360 }, { "name": "model.layers.40.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 26170368 } ], "md5sum": "0ee17e53775e75626769947781012409" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 51380224, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_weight", "shape": [ 28672, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 51380224, "byteOffset": 0 } ], "md5sum": "d4906be588326782050c14064f7d5241" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 29826048, "records": [ { "name": "model.layers.40.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 0 }, { "name": "model.layers.41.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 917504 }, { "name": "model.layers.41.mlp.down_proj.q_weight", "shape": [ 3584, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 924672 }, { "name": "model.layers.41.mlp.down_proj.q_scale", "shape": [ 3584, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3211264, "byteOffset": 26614784 } ], "md5sum": "10321cd25d0e2edf534c1eb6925c4006" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 31223808, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_scale", "shape": [ 28672, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 0 }, { "name": "model.layers.41.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6422528 }, { "name": "model.layers.41.post_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6429696 }, { "name": "model.layers.41.pre_feedforward_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 6436864 }, { "name": "model.layers.41.self_attn.qkv_proj.q_weight", "shape": [ 8192, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 6444032 }, { "name": "model.layers.41.self_attn.qkv_proj.q_scale", "shape": [ 8192, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 21124096 }, { "name": "model.layers.41.self_attn.o_proj.q_weight", "shape": [ 3584, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 22959104 }, { "name": "model.layers.41.self_attn.o_proj.q_scale", "shape": [ 3584, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 30299136 }, { "name": "model.norm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 31216640 } ], "md5sum": "3b60bd678385798fbc2981a86c32ec64" } ] }