{ "metadata": { "ParamSize": 163, "ParamBytes": 563140608.0, "BitsPerParam": 3.645470439574098 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 106708992, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 128256, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 106708992, "byteOffset": 0 } ], "md5sum": "257714dceda34f70d621238d1f1b8ba3" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 20899840, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 128256, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 13338624, "byteOffset": 0 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 13338624 }, { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 2048, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6717440, "byteOffset": 13342720 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 2048, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 839680, "byteOffset": 20060160 } ], "md5sum": "9efd2b317e92ef5a929f38faa3d5a397" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 27693056, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 16384, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13631488, "byteOffset": 0 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 16384, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 13631488 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 15335424 }, { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 3072, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2555904, "byteOffset": 15339520 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 3072, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 319488, "byteOffset": 17895424 }, { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 2048, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 18214912 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 2048, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 212992, "byteOffset": 19918848 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20131840 }, { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 2048, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6717440, "byteOffset": 20135936 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 2048, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 839680, "byteOffset": 26853376 } ], "md5sum": "29f5de175ccab5c13fbd22eeff323da1" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 27693056, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 16384, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13631488, "byteOffset": 0 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 16384, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 13631488 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 15335424 }, { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 3072, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2555904, "byteOffset": 15339520 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 3072, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 319488, "byteOffset": 17895424 }, { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 2048, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 18214912 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 2048, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 212992, "byteOffset": 19918848 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20131840 }, { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 2048, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6717440, "byteOffset": 20135936 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 2048, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 839680, "byteOffset": 26853376 } ], "md5sum": "673df4e18394490f3281c89ee508fd18" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 27693056, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 16384, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13631488, "byteOffset": 0 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 16384, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 13631488 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 15335424 }, { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 3072, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2555904, "byteOffset": 15339520 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 3072, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 319488, "byteOffset": 17895424 }, { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 2048, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 18214912 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 2048, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 212992, "byteOffset": 19918848 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20131840 }, { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 2048, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6717440, "byteOffset": 20135936 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 2048, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 839680, "byteOffset": 26853376 } ], "md5sum": "4d7fc20cafadf378e7f47587d4f0ee02" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 27693056, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 16384, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13631488, "byteOffset": 0 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 16384, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 13631488 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 15335424 }, { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 3072, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2555904, "byteOffset": 15339520 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 3072, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 319488, "byteOffset": 17895424 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 2048, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 18214912 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 2048, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 212992, "byteOffset": 19918848 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20131840 }, { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 2048, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6717440, "byteOffset": 20135936 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 2048, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 839680, "byteOffset": 26853376 } ], "md5sum": "6cc943aa07d22c2be9d26b96dbb5ad0a" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 27693056, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 16384, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13631488, "byteOffset": 0 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 16384, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 13631488 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 15335424 }, { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 3072, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2555904, "byteOffset": 15339520 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 3072, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 319488, "byteOffset": 17895424 }, { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 2048, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 18214912 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 2048, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 212992, "byteOffset": 19918848 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20131840 }, { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 2048, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6717440, "byteOffset": 20135936 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 2048, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 839680, "byteOffset": 26853376 } ], "md5sum": "29bf9b1027af7ac0c717faa8bea77158" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 27693056, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 16384, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13631488, "byteOffset": 0 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 16384, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 13631488 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 15335424 }, { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 3072, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2555904, "byteOffset": 15339520 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 3072, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 319488, "byteOffset": 17895424 }, { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 2048, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 18214912 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 2048, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 212992, "byteOffset": 19918848 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20131840 }, { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 2048, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6717440, "byteOffset": 20135936 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 2048, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 839680, "byteOffset": 26853376 } ], "md5sum": "279dda55b5c085fd9743e56506cfa870" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 27693056, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 16384, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13631488, "byteOffset": 0 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 16384, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 13631488 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 15335424 }, { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 3072, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2555904, "byteOffset": 15339520 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 3072, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 319488, "byteOffset": 17895424 }, { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 2048, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 18214912 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 2048, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 212992, "byteOffset": 19918848 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20131840 }, { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 2048, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6717440, "byteOffset": 20135936 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 2048, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 839680, "byteOffset": 26853376 } ], "md5sum": "27466a76b8a536dba897b474bbc84979" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 27693056, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 16384, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13631488, "byteOffset": 0 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 16384, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 13631488 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 15335424 }, { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 3072, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2555904, "byteOffset": 15339520 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 3072, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 319488, "byteOffset": 17895424 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 2048, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 18214912 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 2048, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 212992, "byteOffset": 19918848 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20131840 }, { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 2048, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6717440, "byteOffset": 20135936 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 2048, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 839680, "byteOffset": 26853376 } ], "md5sum": "0c5d6fd2c0a7ce2bcd44f0bc25257ba6" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 27693056, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 16384, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13631488, "byteOffset": 0 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 16384, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 13631488 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 15335424 }, { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 3072, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2555904, "byteOffset": 15339520 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 3072, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 319488, "byteOffset": 17895424 }, { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 2048, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 18214912 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 2048, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 212992, "byteOffset": 19918848 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20131840 }, { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 2048, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6717440, "byteOffset": 20135936 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 2048, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 839680, "byteOffset": 26853376 } ], "md5sum": "522b5033a4cf049519c21d6fe8bb5f2d" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 27693056, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 16384, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13631488, "byteOffset": 0 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 16384, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 13631488 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 15335424 }, { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 3072, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2555904, "byteOffset": 15339520 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 3072, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 319488, "byteOffset": 17895424 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 2048, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 18214912 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 2048, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 212992, "byteOffset": 19918848 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20131840 }, { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 2048, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6717440, "byteOffset": 20135936 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 2048, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 839680, "byteOffset": 26853376 } ], "md5sum": "d06d6a1d6c25551e9d3720193e406e5a" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 27693056, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 16384, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13631488, "byteOffset": 0 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 16384, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 13631488 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 15335424 }, { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 3072, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2555904, "byteOffset": 15339520 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 3072, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 319488, "byteOffset": 17895424 }, { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 2048, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 18214912 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 2048, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 212992, "byteOffset": 19918848 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20131840 }, { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 2048, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6717440, "byteOffset": 20135936 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 2048, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 839680, "byteOffset": 26853376 } ], "md5sum": "a72ce9b2e8e9ab9648513539d18ddae5" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 27693056, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 16384, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13631488, "byteOffset": 0 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 16384, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 13631488 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 15335424 }, { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 3072, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2555904, "byteOffset": 15339520 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 3072, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 319488, "byteOffset": 17895424 }, { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 2048, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 18214912 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 2048, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 212992, "byteOffset": 19918848 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20131840 }, { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 2048, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6717440, "byteOffset": 20135936 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 2048, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 839680, "byteOffset": 26853376 } ], "md5sum": "f80c3c0699a7cd36b4ba0a1f9ae9ca1d" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 27693056, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 16384, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13631488, "byteOffset": 0 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 16384, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 13631488 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 15335424 }, { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 3072, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2555904, "byteOffset": 15339520 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 3072, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 319488, "byteOffset": 17895424 }, { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 2048, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 18214912 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 2048, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 212992, "byteOffset": 19918848 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20131840 }, { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 2048, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6717440, "byteOffset": 20135936 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 2048, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 839680, "byteOffset": 26853376 } ], "md5sum": "a60632f6969509e8a599486de92df73e" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 27693056, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 16384, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13631488, "byteOffset": 0 }, { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 16384, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 13631488 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 15335424 }, { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 3072, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2555904, "byteOffset": 15339520 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 3072, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 319488, "byteOffset": 17895424 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 2048, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 18214912 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 2048, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 212992, "byteOffset": 19918848 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20131840 }, { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 2048, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6717440, "byteOffset": 20135936 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 2048, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 839680, "byteOffset": 26853376 } ], "md5sum": "e0117e8aa6980190d0493920a652d700" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 27693056, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 16384, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13631488, "byteOffset": 0 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 16384, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 13631488 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 15335424 }, { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 3072, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2555904, "byteOffset": 15339520 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 3072, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 319488, "byteOffset": 17895424 }, { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 2048, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 18214912 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 2048, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 212992, "byteOffset": 19918848 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20131840 }, { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 2048, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6717440, "byteOffset": 20135936 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 2048, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 839680, "byteOffset": 26853376 } ], "md5sum": "a3e069b238ab76dba0a83c5dd04c616b" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 20135936, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 16384, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13631488, "byteOffset": 0 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 16384, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 13631488 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 15335424 }, { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 3072, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2555904, "byteOffset": 15339520 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 3072, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 319488, "byteOffset": 17895424 }, { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 2048, 208 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 18214912 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 2048, 52 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 212992, "byteOffset": 19918848 }, { "name": "model.norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 20131840 } ], "md5sum": "e0ffa10cdcdb2e5b0c14f8ac2bfb6ae1" } ] }