|
{ |
|
"metadata": { |
|
"ParamSize": 303, |
|
"ParamBytes": 69390720.0, |
|
"BitsPerParam": 4.12686857960117 |
|
}, |
|
"records": [ |
|
{ |
|
"dataPath": "params_shard_0.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33301248, |
|
"records": [ |
|
{ |
|
"name": "model.embed_tokens.q_weight", |
|
"shape": [ |
|
49152, |
|
72 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14155776, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.embed_tokens.q_scale", |
|
"shape": [ |
|
49152, |
|
18 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1769472, |
|
"byteOffset": 14155776 |
|
}, |
|
{ |
|
"name": "model.layers.0.input_layernorm.weight", |
|
"shape": [ |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 15925248 |
|
}, |
|
{ |
|
"name": "model.layers.0.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1536, |
|
288 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 442368, |
|
"byteOffset": 15926400 |
|
}, |
|
{ |
|
"name": "model.layers.0.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1, |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 16368768 |
|
}, |
|
{ |
|
"name": "model.layers.0.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
576, |
|
1536 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 884736, |
|
"byteOffset": 16369920 |
|
}, |
|
{ |
|
"name": "model.layers.0.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 17254656 |
|
}, |
|
{ |
|
"name": "model.layers.0.post_attention_layernorm.weight", |
|
"shape": [ |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 17260800 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
576, |
|
480 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 276480, |
|
"byteOffset": 17261952 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
1, |
|
960 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1920, |
|
"byteOffset": 17538432 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
576, |
|
288 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 165888, |
|
"byteOffset": 17540352 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1, |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 17706240 |
|
}, |
|
{ |
|
"name": "model.layers.1.input_layernorm.weight", |
|
"shape": [ |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 17707392 |
|
}, |
|
{ |
|
"name": "model.layers.1.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1536, |
|
288 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 442368, |
|
"byteOffset": 17708544 |
|
}, |
|
{ |
|
"name": "model.layers.1.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1, |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 18150912 |
|
}, |
|
{ |
|
"name": "model.layers.1.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
576, |
|
1536 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 884736, |
|
"byteOffset": 18152064 |
|
}, |
|
{ |
|
"name": "model.layers.1.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19036800 |
|
}, |
|
{ |
|
"name": "model.layers.1.post_attention_layernorm.weight", |
|
"shape": [ |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 19042944 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
576, |
|
480 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 276480, |
|
"byteOffset": 19044096 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
1, |
|
960 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1920, |
|
"byteOffset": 19320576 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
576, |
|
288 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 165888, |
|
"byteOffset": 19322496 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1, |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 19488384 |
|
}, |
|
{ |
|
"name": "model.layers.10.input_layernorm.weight", |
|
"shape": [ |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 19489536 |
|
}, |
|
{ |
|
"name": "model.layers.10.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1536, |
|
288 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 442368, |
|
"byteOffset": 19490688 |
|
}, |
|
{ |
|
"name": "model.layers.10.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1, |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 19933056 |
|
}, |
|
{ |
|
"name": "model.layers.10.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
576, |
|
1536 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 884736, |
|
"byteOffset": 19934208 |
|
}, |
|
{ |
|
"name": "model.layers.10.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 20818944 |
|
}, |
|
{ |
|
"name": "model.layers.10.post_attention_layernorm.weight", |
|
"shape": [ |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 20825088 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
576, |
|
480 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 276480, |
|
"byteOffset": 20826240 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
1, |
|
960 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1920, |
|
"byteOffset": 21102720 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
576, |
|
288 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 165888, |
|
"byteOffset": 21104640 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1, |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 21270528 |
|
}, |
|
{ |
|
"name": "model.layers.11.input_layernorm.weight", |
|
"shape": [ |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 21271680 |
|
}, |
|
{ |
|
"name": "model.layers.11.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1536, |
|
288 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 442368, |
|
"byteOffset": 21272832 |
|
}, |
|
{ |
|
"name": "model.layers.11.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1, |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 21715200 |
|
}, |
|
{ |
|
"name": "model.layers.11.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
576, |
|
1536 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 884736, |
|
"byteOffset": 21716352 |
|
}, |
|
{ |
|
"name": "model.layers.11.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 22601088 |
|
}, |
|
{ |
|
"name": "model.layers.11.post_attention_layernorm.weight", |
|
"shape": [ |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 22607232 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
576, |
|
480 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 276480, |
|
"byteOffset": 22608384 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
1, |
|
960 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1920, |
|
"byteOffset": 22884864 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
576, |
|
288 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 165888, |
|
"byteOffset": 22886784 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1, |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 23052672 |
|
}, |
|
{ |
|
"name": "model.layers.12.input_layernorm.weight", |
|
"shape": [ |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 23053824 |
|
}, |
|
{ |
|
"name": "model.layers.12.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1536, |
|
288 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 442368, |
|
"byteOffset": 23054976 |
|
}, |
|
{ |
|
"name": "model.layers.12.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1, |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 23497344 |
|
}, |
|
{ |
|
"name": "model.layers.12.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
576, |
|
1536 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 884736, |
|
"byteOffset": 23498496 |
|
}, |
|
{ |
|
"name": "model.layers.12.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 24383232 |
|
}, |
|
{ |
|
"name": "model.layers.12.post_attention_layernorm.weight", |
|
"shape": [ |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 24389376 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
576, |
|
480 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 276480, |
|
"byteOffset": 24390528 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
1, |
|
960 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1920, |
|
"byteOffset": 24667008 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
576, |
|
288 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 165888, |
|
"byteOffset": 24668928 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1, |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 24834816 |
|
}, |
|
{ |
|
"name": "model.layers.13.input_layernorm.weight", |
|
"shape": [ |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 24835968 |
|
}, |
|
{ |
|
"name": "model.layers.13.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1536, |
|
288 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 442368, |
|
"byteOffset": 24837120 |
|
}, |
|
{ |
|
"name": "model.layers.13.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1, |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 25279488 |
|
}, |
|
{ |
|
"name": "model.layers.13.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
576, |
|
1536 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 884736, |
|
"byteOffset": 25280640 |
|
}, |
|
{ |
|
"name": "model.layers.13.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 26165376 |
|
}, |
|
{ |
|
"name": "model.layers.13.post_attention_layernorm.weight", |
|
"shape": [ |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 26171520 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
576, |
|
480 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 276480, |
|
"byteOffset": 26172672 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
1, |
|
960 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1920, |
|
"byteOffset": 26449152 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
576, |
|
288 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 165888, |
|
"byteOffset": 26451072 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1, |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 26616960 |
|
}, |
|
{ |
|
"name": "model.layers.14.input_layernorm.weight", |
|
"shape": [ |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 26618112 |
|
}, |
|
{ |
|
"name": "model.layers.14.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1536, |
|
288 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 442368, |
|
"byteOffset": 26619264 |
|
}, |
|
{ |
|
"name": "model.layers.14.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1, |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 27061632 |
|
}, |
|
{ |
|
"name": "model.layers.14.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
576, |
|
1536 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 884736, |
|
"byteOffset": 27062784 |
|
}, |
|
{ |
|
"name": "model.layers.14.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 27947520 |
|
}, |
|
{ |
|
"name": "model.layers.14.post_attention_layernorm.weight", |
|
"shape": [ |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 27953664 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
576, |
|
480 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 276480, |
|
"byteOffset": 27954816 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
1, |
|
960 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1920, |
|
"byteOffset": 28231296 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
576, |
|
288 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 165888, |
|
"byteOffset": 28233216 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1, |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 28399104 |
|
}, |
|
{ |
|
"name": "model.layers.15.input_layernorm.weight", |
|
"shape": [ |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 28400256 |
|
}, |
|
{ |
|
"name": "model.layers.15.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1536, |
|
288 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 442368, |
|
"byteOffset": 28401408 |
|
}, |
|
{ |
|
"name": "model.layers.15.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1, |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 28843776 |
|
}, |
|
{ |
|
"name": "model.layers.15.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
576, |
|
1536 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 884736, |
|
"byteOffset": 28844928 |
|
}, |
|
{ |
|
"name": "model.layers.15.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 29729664 |
|
}, |
|
{ |
|
"name": "model.layers.15.post_attention_layernorm.weight", |
|
"shape": [ |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 29735808 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
576, |
|
480 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 276480, |
|
"byteOffset": 29736960 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
1, |
|
960 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1920, |
|
"byteOffset": 30013440 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
576, |
|
288 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 165888, |
|
"byteOffset": 30015360 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1, |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 30181248 |
|
}, |
|
{ |
|
"name": "model.layers.16.input_layernorm.weight", |
|
"shape": [ |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 30182400 |
|
}, |
|
{ |
|
"name": "model.layers.16.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1536, |
|
288 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 442368, |
|
"byteOffset": 30183552 |
|
}, |
|
{ |
|
"name": "model.layers.16.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1, |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 30625920 |
|
}, |
|
{ |
|
"name": "model.layers.16.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
576, |
|
1536 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 884736, |
|
"byteOffset": 30627072 |
|
}, |
|
{ |
|
"name": "model.layers.16.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 31511808 |
|
}, |
|
{ |
|
"name": "model.layers.16.post_attention_layernorm.weight", |
|
"shape": [ |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 31517952 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
576, |
|
480 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 276480, |
|
"byteOffset": 31519104 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
1, |
|
960 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1920, |
|
"byteOffset": 31795584 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
576, |
|
288 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 165888, |
|
"byteOffset": 31797504 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1, |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 31963392 |
|
}, |
|
{ |
|
"name": "model.layers.17.input_layernorm.weight", |
|
"shape": [ |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 31964544 |
|
}, |
|
{ |
|
"name": "model.layers.17.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1536, |
|
288 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 442368, |
|
"byteOffset": 31965696 |
|
}, |
|
{ |
|
"name": "model.layers.17.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1, |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 32408064 |
|
}, |
|
{ |
|
"name": "model.layers.17.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
576, |
|
1536 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 884736, |
|
"byteOffset": 32409216 |
|
}, |
|
{ |
|
"name": "model.layers.17.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 33293952 |
|
}, |
|
{ |
|
"name": "model.layers.17.post_attention_layernorm.weight", |
|
"shape": [ |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 33300096 |
|
} |
|
], |
|
"md5sum": "8c27c65a609e64ab75b3bbf527046f95" |
|
}, |
|
{ |
|
"dataPath": "params_shard_1.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32968704, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
576, |
|
480 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 276480, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
1, |
|
960 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1920, |
|
"byteOffset": 276480 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
576, |
|
288 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 165888, |
|
"byteOffset": 278400 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1, |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 444288 |
|
}, |
|
{ |
|
"name": "model.layers.18.input_layernorm.weight", |
|
"shape": [ |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 445440 |
|
}, |
|
{ |
|
"name": "model.layers.18.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1536, |
|
288 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 442368, |
|
"byteOffset": 446592 |
|
}, |
|
{ |
|
"name": "model.layers.18.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1, |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 888960 |
|
}, |
|
{ |
|
"name": "model.layers.18.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
576, |
|
1536 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 884736, |
|
"byteOffset": 890112 |
|
}, |
|
{ |
|
"name": "model.layers.18.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 1774848 |
|
}, |
|
{ |
|
"name": "model.layers.18.post_attention_layernorm.weight", |
|
"shape": [ |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 1780992 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
576, |
|
480 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 276480, |
|
"byteOffset": 1782144 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
1, |
|
960 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1920, |
|
"byteOffset": 2058624 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
576, |
|
288 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 165888, |
|
"byteOffset": 2060544 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1, |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 2226432 |
|
}, |
|
{ |
|
"name": "model.layers.19.input_layernorm.weight", |
|
"shape": [ |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 2227584 |
|
}, |
|
{ |
|
"name": "model.layers.19.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1536, |
|
288 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 442368, |
|
"byteOffset": 2228736 |
|
}, |
|
{ |
|
"name": "model.layers.19.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1, |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 2671104 |
|
}, |
|
{ |
|
"name": "model.layers.19.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
576, |
|
1536 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 884736, |
|
"byteOffset": 2672256 |
|
}, |
|
{ |
|
"name": "model.layers.19.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 3556992 |
|
}, |
|
{ |
|
"name": "model.layers.19.post_attention_layernorm.weight", |
|
"shape": [ |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 3563136 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
576, |
|
480 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 276480, |
|
"byteOffset": 3564288 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
1, |
|
960 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1920, |
|
"byteOffset": 3840768 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
576, |
|
288 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 165888, |
|
"byteOffset": 3842688 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1, |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 4008576 |
|
}, |
|
{ |
|
"name": "model.layers.2.input_layernorm.weight", |
|
"shape": [ |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 4009728 |
|
}, |
|
{ |
|
"name": "model.layers.2.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1536, |
|
288 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 442368, |
|
"byteOffset": 4010880 |
|
}, |
|
{ |
|
"name": "model.layers.2.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1, |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 4453248 |
|
}, |
|
{ |
|
"name": "model.layers.2.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
576, |
|
1536 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 884736, |
|
"byteOffset": 4454400 |
|
}, |
|
{ |
|
"name": "model.layers.2.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 5339136 |
|
}, |
|
{ |
|
"name": "model.layers.2.post_attention_layernorm.weight", |
|
"shape": [ |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 5345280 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
576, |
|
480 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 276480, |
|
"byteOffset": 5346432 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
1, |
|
960 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1920, |
|
"byteOffset": 5622912 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
576, |
|
288 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 165888, |
|
"byteOffset": 5624832 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1, |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 5790720 |
|
}, |
|
{ |
|
"name": "model.layers.20.input_layernorm.weight", |
|
"shape": [ |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 5791872 |
|
}, |
|
{ |
|
"name": "model.layers.20.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1536, |
|
288 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 442368, |
|
"byteOffset": 5793024 |
|
}, |
|
{ |
|
"name": "model.layers.20.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1, |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 6235392 |
|
}, |
|
{ |
|
"name": "model.layers.20.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
576, |
|
1536 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 884736, |
|
"byteOffset": 6236544 |
|
}, |
|
{ |
|
"name": "model.layers.20.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 7121280 |
|
}, |
|
{ |
|
"name": "model.layers.20.post_attention_layernorm.weight", |
|
"shape": [ |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 7127424 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
576, |
|
480 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 276480, |
|
"byteOffset": 7128576 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
1, |
|
960 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1920, |
|
"byteOffset": 7405056 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
576, |
|
288 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 165888, |
|
"byteOffset": 7406976 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1, |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 7572864 |
|
}, |
|
{ |
|
"name": "model.layers.21.input_layernorm.weight", |
|
"shape": [ |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 7574016 |
|
}, |
|
{ |
|
"name": "model.layers.21.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1536, |
|
288 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 442368, |
|
"byteOffset": 7575168 |
|
}, |
|
{ |
|
"name": "model.layers.21.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1, |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 8017536 |
|
}, |
|
{ |
|
"name": "model.layers.21.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
576, |
|
1536 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 884736, |
|
"byteOffset": 8018688 |
|
}, |
|
{ |
|
"name": "model.layers.21.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 8903424 |
|
}, |
|
{ |
|
"name": "model.layers.21.post_attention_layernorm.weight", |
|
"shape": [ |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 8909568 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
576, |
|
480 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 276480, |
|
"byteOffset": 8910720 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
1, |
|
960 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1920, |
|
"byteOffset": 9187200 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
576, |
|
288 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 165888, |
|
"byteOffset": 9189120 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1, |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 9355008 |
|
}, |
|
{ |
|
"name": "model.layers.22.input_layernorm.weight", |
|
"shape": [ |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 9356160 |
|
}, |
|
{ |
|
"name": "model.layers.22.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1536, |
|
288 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 442368, |
|
"byteOffset": 9357312 |
|
}, |
|
{ |
|
"name": "model.layers.22.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1, |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 9799680 |
|
}, |
|
{ |
|
"name": "model.layers.22.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
576, |
|
1536 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 884736, |
|
"byteOffset": 9800832 |
|
}, |
|
{ |
|
"name": "model.layers.22.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 10685568 |
|
}, |
|
{ |
|
"name": "model.layers.22.post_attention_layernorm.weight", |
|
"shape": [ |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 10691712 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
576, |
|
480 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 276480, |
|
"byteOffset": 10692864 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
1, |
|
960 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1920, |
|
"byteOffset": 10969344 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
576, |
|
288 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 165888, |
|
"byteOffset": 10971264 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1, |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 11137152 |
|
}, |
|
{ |
|
"name": "model.layers.23.input_layernorm.weight", |
|
"shape": [ |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 11138304 |
|
}, |
|
{ |
|
"name": "model.layers.23.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1536, |
|
288 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 442368, |
|
"byteOffset": 11139456 |
|
}, |
|
{ |
|
"name": "model.layers.23.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1, |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 11581824 |
|
}, |
|
{ |
|
"name": "model.layers.23.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
576, |
|
1536 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 884736, |
|
"byteOffset": 11582976 |
|
}, |
|
{ |
|
"name": "model.layers.23.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 12467712 |
|
}, |
|
{ |
|
"name": "model.layers.23.post_attention_layernorm.weight", |
|
"shape": [ |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 12473856 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
576, |
|
480 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 276480, |
|
"byteOffset": 12475008 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
1, |
|
960 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1920, |
|
"byteOffset": 12751488 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
576, |
|
288 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 165888, |
|
"byteOffset": 12753408 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1, |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 12919296 |
|
}, |
|
{ |
|
"name": "model.layers.24.input_layernorm.weight", |
|
"shape": [ |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 12920448 |
|
}, |
|
{ |
|
"name": "model.layers.24.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1536, |
|
288 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 442368, |
|
"byteOffset": 12921600 |
|
}, |
|
{ |
|
"name": "model.layers.24.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1, |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 13363968 |
|
}, |
|
{ |
|
"name": "model.layers.24.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
576, |
|
1536 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 884736, |
|
"byteOffset": 13365120 |
|
}, |
|
{ |
|
"name": "model.layers.24.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 14249856 |
|
}, |
|
{ |
|
"name": "model.layers.24.post_attention_layernorm.weight", |
|
"shape": [ |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 14256000 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
576, |
|
480 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 276480, |
|
"byteOffset": 14257152 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
1, |
|
960 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1920, |
|
"byteOffset": 14533632 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
576, |
|
288 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 165888, |
|
"byteOffset": 14535552 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1, |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 14701440 |
|
}, |
|
{ |
|
"name": "model.layers.25.input_layernorm.weight", |
|
"shape": [ |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 14702592 |
|
}, |
|
{ |
|
"name": "model.layers.25.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1536, |
|
288 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 442368, |
|
"byteOffset": 14703744 |
|
}, |
|
{ |
|
"name": "model.layers.25.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1, |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 15146112 |
|
}, |
|
{ |
|
"name": "model.layers.25.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
576, |
|
1536 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 884736, |
|
"byteOffset": 15147264 |
|
}, |
|
{ |
|
"name": "model.layers.25.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 16032000 |
|
}, |
|
{ |
|
"name": "model.layers.25.post_attention_layernorm.weight", |
|
"shape": [ |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 16038144 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
576, |
|
480 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 276480, |
|
"byteOffset": 16039296 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
1, |
|
960 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1920, |
|
"byteOffset": 16315776 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
576, |
|
288 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 165888, |
|
"byteOffset": 16317696 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1, |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 16483584 |
|
}, |
|
{ |
|
"name": "model.layers.26.input_layernorm.weight", |
|
"shape": [ |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 16484736 |
|
}, |
|
{ |
|
"name": "model.layers.26.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1536, |
|
288 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 442368, |
|
"byteOffset": 16485888 |
|
}, |
|
{ |
|
"name": "model.layers.26.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1, |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 16928256 |
|
}, |
|
{ |
|
"name": "model.layers.26.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
576, |
|
1536 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 884736, |
|
"byteOffset": 16929408 |
|
}, |
|
{ |
|
"name": "model.layers.26.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 17814144 |
|
}, |
|
{ |
|
"name": "model.layers.26.post_attention_layernorm.weight", |
|
"shape": [ |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 17820288 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
576, |
|
480 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 276480, |
|
"byteOffset": 17821440 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
1, |
|
960 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1920, |
|
"byteOffset": 18097920 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
576, |
|
288 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 165888, |
|
"byteOffset": 18099840 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1, |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 18265728 |
|
}, |
|
{ |
|
"name": "model.layers.27.input_layernorm.weight", |
|
"shape": [ |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 18266880 |
|
}, |
|
{ |
|
"name": "model.layers.27.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1536, |
|
288 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 442368, |
|
"byteOffset": 18268032 |
|
}, |
|
{ |
|
"name": "model.layers.27.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1, |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 18710400 |
|
}, |
|
{ |
|
"name": "model.layers.27.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
576, |
|
1536 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 884736, |
|
"byteOffset": 18711552 |
|
}, |
|
{ |
|
"name": "model.layers.27.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 19596288 |
|
}, |
|
{ |
|
"name": "model.layers.27.post_attention_layernorm.weight", |
|
"shape": [ |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 19602432 |
|
}, |
|
{ |
|
"name": "model.layers.27.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
576, |
|
480 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 276480, |
|
"byteOffset": 19603584 |
|
}, |
|
{ |
|
"name": "model.layers.27.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
1, |
|
960 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1920, |
|
"byteOffset": 19880064 |
|
}, |
|
{ |
|
"name": "model.layers.27.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
576, |
|
288 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 165888, |
|
"byteOffset": 19881984 |
|
}, |
|
{ |
|
"name": "model.layers.27.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1, |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 20047872 |
|
}, |
|
{ |
|
"name": "model.layers.28.input_layernorm.weight", |
|
"shape": [ |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 20049024 |
|
}, |
|
{ |
|
"name": "model.layers.28.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1536, |
|
288 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 442368, |
|
"byteOffset": 20050176 |
|
}, |
|
{ |
|
"name": "model.layers.28.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1, |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 20492544 |
|
}, |
|
{ |
|
"name": "model.layers.28.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
576, |
|
1536 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 884736, |
|
"byteOffset": 20493696 |
|
}, |
|
{ |
|
"name": "model.layers.28.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 21378432 |
|
}, |
|
{ |
|
"name": "model.layers.28.post_attention_layernorm.weight", |
|
"shape": [ |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 21384576 |
|
}, |
|
{ |
|
"name": "model.layers.28.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
576, |
|
480 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 276480, |
|
"byteOffset": 21385728 |
|
}, |
|
{ |
|
"name": "model.layers.28.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
1, |
|
960 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1920, |
|
"byteOffset": 21662208 |
|
}, |
|
{ |
|
"name": "model.layers.28.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
576, |
|
288 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 165888, |
|
"byteOffset": 21664128 |
|
}, |
|
{ |
|
"name": "model.layers.28.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1, |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 21830016 |
|
}, |
|
{ |
|
"name": "model.layers.29.input_layernorm.weight", |
|
"shape": [ |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 21831168 |
|
}, |
|
{ |
|
"name": "model.layers.29.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1536, |
|
288 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 442368, |
|
"byteOffset": 21832320 |
|
}, |
|
{ |
|
"name": "model.layers.29.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1, |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 22274688 |
|
}, |
|
{ |
|
"name": "model.layers.29.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
576, |
|
1536 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 884736, |
|
"byteOffset": 22275840 |
|
}, |
|
{ |
|
"name": "model.layers.29.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 23160576 |
|
}, |
|
{ |
|
"name": "model.layers.29.post_attention_layernorm.weight", |
|
"shape": [ |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 23166720 |
|
}, |
|
{ |
|
"name": "model.layers.29.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
576, |
|
480 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 276480, |
|
"byteOffset": 23167872 |
|
}, |
|
{ |
|
"name": "model.layers.29.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
1, |
|
960 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1920, |
|
"byteOffset": 23444352 |
|
}, |
|
{ |
|
"name": "model.layers.29.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
576, |
|
288 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 165888, |
|
"byteOffset": 23446272 |
|
}, |
|
{ |
|
"name": "model.layers.29.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1, |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 23612160 |
|
}, |
|
{ |
|
"name": "model.layers.3.input_layernorm.weight", |
|
"shape": [ |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 23613312 |
|
}, |
|
{ |
|
"name": "model.layers.3.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1536, |
|
288 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 442368, |
|
"byteOffset": 23614464 |
|
}, |
|
{ |
|
"name": "model.layers.3.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1, |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 24056832 |
|
}, |
|
{ |
|
"name": "model.layers.3.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
576, |
|
1536 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 884736, |
|
"byteOffset": 24057984 |
|
}, |
|
{ |
|
"name": "model.layers.3.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 24942720 |
|
}, |
|
{ |
|
"name": "model.layers.3.post_attention_layernorm.weight", |
|
"shape": [ |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 24948864 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
576, |
|
480 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 276480, |
|
"byteOffset": 24950016 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
1, |
|
960 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1920, |
|
"byteOffset": 25226496 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
576, |
|
288 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 165888, |
|
"byteOffset": 25228416 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1, |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 25394304 |
|
}, |
|
{ |
|
"name": "model.layers.4.input_layernorm.weight", |
|
"shape": [ |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 25395456 |
|
}, |
|
{ |
|
"name": "model.layers.4.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1536, |
|
288 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 442368, |
|
"byteOffset": 25396608 |
|
}, |
|
{ |
|
"name": "model.layers.4.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1, |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 25838976 |
|
}, |
|
{ |
|
"name": "model.layers.4.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
576, |
|
1536 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 884736, |
|
"byteOffset": 25840128 |
|
}, |
|
{ |
|
"name": "model.layers.4.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 26724864 |
|
}, |
|
{ |
|
"name": "model.layers.4.post_attention_layernorm.weight", |
|
"shape": [ |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 26731008 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
576, |
|
480 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 276480, |
|
"byteOffset": 26732160 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
1, |
|
960 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1920, |
|
"byteOffset": 27008640 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
576, |
|
288 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 165888, |
|
"byteOffset": 27010560 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1, |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 27176448 |
|
}, |
|
{ |
|
"name": "model.layers.5.input_layernorm.weight", |
|
"shape": [ |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 27177600 |
|
}, |
|
{ |
|
"name": "model.layers.5.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1536, |
|
288 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 442368, |
|
"byteOffset": 27178752 |
|
}, |
|
{ |
|
"name": "model.layers.5.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1, |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 27621120 |
|
}, |
|
{ |
|
"name": "model.layers.5.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
576, |
|
1536 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 884736, |
|
"byteOffset": 27622272 |
|
}, |
|
{ |
|
"name": "model.layers.5.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 28507008 |
|
}, |
|
{ |
|
"name": "model.layers.5.post_attention_layernorm.weight", |
|
"shape": [ |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 28513152 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
576, |
|
480 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 276480, |
|
"byteOffset": 28514304 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
1, |
|
960 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1920, |
|
"byteOffset": 28790784 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
576, |
|
288 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 165888, |
|
"byteOffset": 28792704 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1, |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 28958592 |
|
}, |
|
{ |
|
"name": "model.layers.6.input_layernorm.weight", |
|
"shape": [ |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 28959744 |
|
}, |
|
{ |
|
"name": "model.layers.6.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1536, |
|
288 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 442368, |
|
"byteOffset": 28960896 |
|
}, |
|
{ |
|
"name": "model.layers.6.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1, |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 29403264 |
|
}, |
|
{ |
|
"name": "model.layers.6.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
576, |
|
1536 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 884736, |
|
"byteOffset": 29404416 |
|
}, |
|
{ |
|
"name": "model.layers.6.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 30289152 |
|
}, |
|
{ |
|
"name": "model.layers.6.post_attention_layernorm.weight", |
|
"shape": [ |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 30295296 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
576, |
|
480 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 276480, |
|
"byteOffset": 30296448 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
1, |
|
960 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1920, |
|
"byteOffset": 30572928 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
576, |
|
288 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 165888, |
|
"byteOffset": 30574848 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1, |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 30740736 |
|
}, |
|
{ |
|
"name": "model.layers.7.input_layernorm.weight", |
|
"shape": [ |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 30741888 |
|
}, |
|
{ |
|
"name": "model.layers.7.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1536, |
|
288 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 442368, |
|
"byteOffset": 30743040 |
|
}, |
|
{ |
|
"name": "model.layers.7.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1, |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 31185408 |
|
}, |
|
{ |
|
"name": "model.layers.7.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
576, |
|
1536 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 884736, |
|
"byteOffset": 31186560 |
|
}, |
|
{ |
|
"name": "model.layers.7.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 32071296 |
|
}, |
|
{ |
|
"name": "model.layers.7.post_attention_layernorm.weight", |
|
"shape": [ |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 32077440 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
576, |
|
480 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 276480, |
|
"byteOffset": 32078592 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
1, |
|
960 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1920, |
|
"byteOffset": 32355072 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
576, |
|
288 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 165888, |
|
"byteOffset": 32356992 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1, |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 32522880 |
|
}, |
|
{ |
|
"name": "model.layers.8.input_layernorm.weight", |
|
"shape": [ |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 32524032 |
|
}, |
|
{ |
|
"name": "model.layers.8.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1536, |
|
288 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 442368, |
|
"byteOffset": 32525184 |
|
}, |
|
{ |
|
"name": "model.layers.8.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1, |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 32967552 |
|
} |
|
], |
|
"md5sum": "754c516b6e9a414726e16798fa94ad36" |
|
}, |
|
{ |
|
"dataPath": "params_shard_2.bin", |
|
"format": "raw-shard", |
|
"nbytes": 3120768, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
576, |
|
1536 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 884736, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.8.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 884736 |
|
}, |
|
{ |
|
"name": "model.layers.8.post_attention_layernorm.weight", |
|
"shape": [ |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 890880 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
576, |
|
480 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 276480, |
|
"byteOffset": 892032 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
1, |
|
960 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1920, |
|
"byteOffset": 1168512 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
576, |
|
288 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 165888, |
|
"byteOffset": 1170432 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1, |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 1336320 |
|
}, |
|
{ |
|
"name": "model.layers.9.input_layernorm.weight", |
|
"shape": [ |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 1337472 |
|
}, |
|
{ |
|
"name": "model.layers.9.mlp.down_proj.q_weight", |
|
"shape": [ |
|
1536, |
|
288 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 442368, |
|
"byteOffset": 1338624 |
|
}, |
|
{ |
|
"name": "model.layers.9.mlp.down_proj.q_scale", |
|
"shape": [ |
|
1, |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 1780992 |
|
}, |
|
{ |
|
"name": "model.layers.9.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
576, |
|
1536 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 884736, |
|
"byteOffset": 1782144 |
|
}, |
|
{ |
|
"name": "model.layers.9.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
1, |
|
3072 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 6144, |
|
"byteOffset": 2666880 |
|
}, |
|
{ |
|
"name": "model.layers.9.post_attention_layernorm.weight", |
|
"shape": [ |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 2673024 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
576, |
|
480 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 276480, |
|
"byteOffset": 2674176 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
1, |
|
960 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1920, |
|
"byteOffset": 2950656 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
576, |
|
288 |
|
], |
|
"dtype": "int8", |
|
"format": "f32-to-bf16", |
|
"nbytes": 165888, |
|
"byteOffset": 2952576 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
1, |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 3118464 |
|
}, |
|
{ |
|
"name": "model.norm.weight", |
|
"shape": [ |
|
576 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1152, |
|
"byteOffset": 3119616 |
|
} |
|
], |
|
"md5sum": "1641080b39b88ab1a28525e1a66ddedf" |
|
} |
|
] |
|
} |