{ "metadata": { "ParamSize": 303, "ParamBytes": 69390720.0, "BitsPerParam": 4.12686857960117 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 33301248, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 49152, 72 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "model.embed_tokens.q_scale", "shape": [ 49152, 18 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 15925248 }, { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 1536, 288 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 442368, "byteOffset": 15926400 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 1, 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 16368768 }, { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 576, 1536 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 884736, "byteOffset": 16369920 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 1, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17254656 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 17260800 }, { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 576, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 276480, "byteOffset": 17261952 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 17538432 }, { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 576, 288 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 165888, "byteOffset": 17540352 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 1, 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 17706240 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 17707392 }, { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 1536, 288 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 442368, "byteOffset": 17708544 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 1, 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 18150912 }, { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 576, 1536 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 884736, "byteOffset": 18152064 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 1, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19036800 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 19042944 }, { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 576, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 276480, "byteOffset": 19044096 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 19320576 }, { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 576, 288 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 165888, "byteOffset": 19322496 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 1, 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 19488384 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 19489536 }, { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 1536, 288 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 442368, "byteOffset": 19490688 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 1, 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 19933056 }, { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 576, 1536 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 884736, "byteOffset": 19934208 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 1, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 20818944 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 20825088 }, { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 576, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 276480, "byteOffset": 20826240 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 21102720 }, { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 576, 288 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 165888, "byteOffset": 21104640 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 1, 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 21270528 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 21271680 }, { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 1536, 288 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 442368, "byteOffset": 21272832 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 1, 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 21715200 }, { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 576, 1536 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 884736, "byteOffset": 21716352 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 1, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 22601088 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 22607232 }, { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 576, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 276480, "byteOffset": 22608384 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 22884864 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 576, 288 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 165888, "byteOffset": 22886784 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 1, 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 23052672 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 23053824 }, { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 1536, 288 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 442368, "byteOffset": 23054976 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 1, 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 23497344 }, { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 576, 1536 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 884736, "byteOffset": 23498496 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 1, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 24383232 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 24389376 }, { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 576, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 276480, "byteOffset": 24390528 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 24667008 }, { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 576, 288 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 165888, "byteOffset": 24668928 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 1, 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 24834816 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 24835968 }, { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 1536, 288 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 442368, "byteOffset": 24837120 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 1, 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 25279488 }, { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 576, 1536 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 884736, "byteOffset": 25280640 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 1, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26165376 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 26171520 }, { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 576, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 276480, "byteOffset": 26172672 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 26449152 }, { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 576, 288 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 165888, "byteOffset": 26451072 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 1, 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 26616960 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 26618112 }, { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 1536, 288 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 442368, "byteOffset": 26619264 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 1, 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 27061632 }, { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 576, 1536 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 884736, "byteOffset": 27062784 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 1, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 27947520 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 27953664 }, { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 576, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 276480, "byteOffset": 27954816 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 28231296 }, { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 576, 288 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 165888, "byteOffset": 28233216 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 1, 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 28399104 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 28400256 }, { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 1536, 288 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 442368, "byteOffset": 28401408 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 1, 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 28843776 }, { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 576, 1536 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 884736, "byteOffset": 28844928 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 1, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 29729664 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 29735808 }, { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 576, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 276480, "byteOffset": 29736960 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 30013440 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 576, 288 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 165888, "byteOffset": 30015360 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 1, 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 30181248 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 30182400 }, { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 1536, 288 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 442368, "byteOffset": 30183552 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 1, 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 30625920 }, { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 576, 1536 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 884736, "byteOffset": 30627072 }, { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 1, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31511808 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 31517952 }, { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 576, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 276480, "byteOffset": 31519104 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 31795584 }, { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 576, 288 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 165888, "byteOffset": 31797504 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 1, 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 31963392 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 31964544 }, { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 1536, 288 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 442368, "byteOffset": 31965696 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 1, 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 32408064 }, { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 576, 1536 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 884736, "byteOffset": 32409216 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 1, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 33293952 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 33300096 } ], "md5sum": "8c27c65a609e64ab75b3bbf527046f95" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 32968704, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 576, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 276480, "byteOffset": 0 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 276480 }, { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 576, 288 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 165888, "byteOffset": 278400 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 1, 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 444288 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 445440 }, { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 1536, 288 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 442368, "byteOffset": 446592 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 1, 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 888960 }, { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 576, 1536 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 884736, "byteOffset": 890112 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 1, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 1774848 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 1780992 }, { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 576, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 276480, "byteOffset": 1782144 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 2058624 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 576, 288 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 165888, "byteOffset": 2060544 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 1, 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 2226432 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 2227584 }, { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 1536, 288 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 442368, "byteOffset": 2228736 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 1, 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 2671104 }, { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 576, 1536 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 884736, "byteOffset": 2672256 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 1, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 3556992 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 3563136 }, { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 576, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 276480, "byteOffset": 3564288 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 3840768 }, { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 576, 288 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 165888, "byteOffset": 3842688 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 1, 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 4008576 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 4009728 }, { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 1536, 288 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 442368, "byteOffset": 4010880 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 1, 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 4453248 }, { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 576, 1536 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 884736, "byteOffset": 4454400 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 1, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 5339136 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 5345280 }, { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 576, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 276480, "byteOffset": 5346432 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 5622912 }, { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 576, 288 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 165888, "byteOffset": 5624832 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 1, 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 5790720 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 5791872 }, { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 1536, 288 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 442368, "byteOffset": 5793024 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 1, 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 6235392 }, { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 576, 1536 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 884736, "byteOffset": 6236544 }, { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 1, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 7121280 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 7127424 }, { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 576, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 276480, "byteOffset": 7128576 }, { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 7405056 }, { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 576, 288 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 165888, "byteOffset": 7406976 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 1, 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 7572864 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 7574016 }, { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 1536, 288 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 442368, "byteOffset": 7575168 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 1, 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 8017536 }, { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 576, 1536 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 884736, "byteOffset": 8018688 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 1, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 8903424 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 8909568 }, { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 576, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 276480, "byteOffset": 8910720 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 9187200 }, { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 576, 288 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 165888, "byteOffset": 9189120 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 1, 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 9355008 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 9356160 }, { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 1536, 288 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 442368, "byteOffset": 9357312 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 1, 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 9799680 }, { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 576, 1536 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 884736, "byteOffset": 9800832 }, { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 1, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 10685568 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 10691712 }, { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 576, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 276480, "byteOffset": 10692864 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 10969344 }, { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 576, 288 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 165888, "byteOffset": 10971264 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 1, 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 11137152 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 11138304 }, { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 1536, 288 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 442368, "byteOffset": 11139456 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 1, 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 11581824 }, { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 576, 1536 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 884736, "byteOffset": 11582976 }, { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 1, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 12467712 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 12473856 }, { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 576, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 276480, "byteOffset": 12475008 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 12751488 }, { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 576, 288 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 165888, "byteOffset": 12753408 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 1, 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 12919296 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 12920448 }, { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 1536, 288 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 442368, "byteOffset": 12921600 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 1, 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 13363968 }, { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 576, 1536 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 884736, "byteOffset": 13365120 }, { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 1, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 14249856 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 14256000 }, { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 576, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 276480, "byteOffset": 14257152 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 14533632 }, { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 576, 288 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 165888, "byteOffset": 14535552 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 1, 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 14701440 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 14702592 }, { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 1536, 288 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 442368, "byteOffset": 14703744 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 1, 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 15146112 }, { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 576, 1536 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 884736, "byteOffset": 15147264 }, { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 1, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 16032000 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 16038144 }, { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 576, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 276480, "byteOffset": 16039296 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 16315776 }, { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 576, 288 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 165888, "byteOffset": 16317696 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 1, 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 16483584 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 16484736 }, { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 1536, 288 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 442368, "byteOffset": 16485888 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 1, 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 16928256 }, { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 576, 1536 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 884736, "byteOffset": 16929408 }, { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 1, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17814144 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 17820288 }, { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 576, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 276480, "byteOffset": 17821440 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 18097920 }, { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 576, 288 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 165888, "byteOffset": 18099840 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 1, 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 18265728 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 18266880 }, { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 1536, 288 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 442368, "byteOffset": 18268032 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 1, 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 18710400 }, { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 576, 1536 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 884736, "byteOffset": 18711552 }, { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 1, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19596288 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 19602432 }, { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 576, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 276480, "byteOffset": 19603584 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 19880064 }, { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 576, 288 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 165888, "byteOffset": 19881984 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 1, 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 20047872 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 20049024 }, { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 1536, 288 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 442368, "byteOffset": 20050176 }, { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 1, 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 20492544 }, { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 576, 1536 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 884736, "byteOffset": 20493696 }, { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 1, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21378432 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 21384576 }, { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 576, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 276480, "byteOffset": 21385728 }, { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 21662208 }, { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 576, 288 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 165888, "byteOffset": 21664128 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 1, 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 21830016 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 21831168 }, { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 1536, 288 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 442368, "byteOffset": 21832320 }, { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 1, 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 22274688 }, { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 576, 1536 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 884736, "byteOffset": 22275840 }, { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 1, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 23160576 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 23166720 }, { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 576, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 276480, "byteOffset": 23167872 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 23444352 }, { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 576, 288 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 165888, "byteOffset": 23446272 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 1, 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 23612160 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 23613312 }, { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 1536, 288 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 442368, "byteOffset": 23614464 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 1, 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 24056832 }, { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 576, 1536 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 884736, "byteOffset": 24057984 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 1, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 24942720 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 24948864 }, { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 576, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 276480, "byteOffset": 24950016 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 25226496 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 576, 288 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 165888, "byteOffset": 25228416 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 1, 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 25394304 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 25395456 }, { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 1536, 288 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 442368, "byteOffset": 25396608 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 1, 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 25838976 }, { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 576, 1536 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 884736, "byteOffset": 25840128 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 1, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26724864 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 26731008 }, { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 576, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 276480, "byteOffset": 26732160 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 27008640 }, { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 576, 288 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 165888, "byteOffset": 27010560 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 1, 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 27176448 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 27177600 }, { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 1536, 288 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 442368, "byteOffset": 27178752 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 1, 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 27621120 }, { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 576, 1536 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 884736, "byteOffset": 27622272 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 1, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 28507008 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 28513152 }, { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 576, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 276480, "byteOffset": 28514304 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 28790784 }, { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 576, 288 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 165888, "byteOffset": 28792704 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 1, 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 28958592 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 28959744 }, { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 1536, 288 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 442368, "byteOffset": 28960896 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 1, 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 29403264 }, { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 576, 1536 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 884736, "byteOffset": 29404416 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 1, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 30289152 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 30295296 }, { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 576, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 276480, "byteOffset": 30296448 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 30572928 }, { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 576, 288 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 165888, "byteOffset": 30574848 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 1, 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 30740736 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 30741888 }, { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 1536, 288 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 442368, "byteOffset": 30743040 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 1, 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 31185408 }, { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 576, 1536 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 884736, "byteOffset": 31186560 }, { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 1, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 32071296 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 32077440 }, { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 576, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 276480, "byteOffset": 32078592 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 32355072 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 576, 288 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 165888, "byteOffset": 32356992 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 1, 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 32522880 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 32524032 }, { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 1536, 288 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 442368, "byteOffset": 32525184 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 1, 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 32967552 } ], "md5sum": "754c516b6e9a414726e16798fa94ad36" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 3120768, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 576, 1536 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 884736, "byteOffset": 0 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 1, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 884736 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 890880 }, { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 576, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 276480, "byteOffset": 892032 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 1168512 }, { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 576, 288 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 165888, "byteOffset": 1170432 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 1, 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 1336320 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 1337472 }, { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 1536, 288 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 442368, "byteOffset": 1338624 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 1, 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 1780992 }, { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 576, 1536 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 884736, "byteOffset": 1782144 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 1, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 2666880 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 2673024 }, { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 576, 480 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 276480, "byteOffset": 2674176 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 1, 960 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1920, "byteOffset": 2950656 }, { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 576, 288 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 165888, "byteOffset": 2952576 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 1, 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 3118464 }, { "name": "model.norm.weight", "shape": [ 576 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1152, "byteOffset": 3119616 } ], "md5sum": "1641080b39b88ab1a28525e1a66ddedf" } ] }