dusty-nv's picture
Upload folder using huggingface_hub
567911b verified
{
"metadata": {
"ParamSize": 303,
"ParamBytes": 69390720.0,
"BitsPerParam": 4.12686857960117
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 33301248,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
49152,
72
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 14155776,
"byteOffset": 0
},
{
"name": "model.embed_tokens.q_scale",
"shape": [
49152,
18
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1769472,
"byteOffset": 14155776
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 15925248
},
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
1536,
288
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 15926400
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
1,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 16368768
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
576,
1536
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 884736,
"byteOffset": 16369920
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
1,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17254656
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 17260800
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_weight",
"shape": [
576,
480
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 276480,
"byteOffset": 17261952
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_scale",
"shape": [
1,
960
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1920,
"byteOffset": 17538432
},
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
576,
288
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 165888,
"byteOffset": 17540352
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
1,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 17706240
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 17707392
},
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
1536,
288
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 17708544
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
1,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 18150912
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
576,
1536
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 884736,
"byteOffset": 18152064
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
1,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19036800
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 19042944
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_weight",
"shape": [
576,
480
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 276480,
"byteOffset": 19044096
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_scale",
"shape": [
1,
960
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1920,
"byteOffset": 19320576
},
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
576,
288
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 165888,
"byteOffset": 19322496
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
1,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 19488384
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 19489536
},
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
1536,
288
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 19490688
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
1,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 19933056
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
576,
1536
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 884736,
"byteOffset": 19934208
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
1,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 20818944
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 20825088
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_weight",
"shape": [
576,
480
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 276480,
"byteOffset": 20826240
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_scale",
"shape": [
1,
960
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1920,
"byteOffset": 21102720
},
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
576,
288
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 165888,
"byteOffset": 21104640
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
1,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 21270528
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 21271680
},
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
1536,
288
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 21272832
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
1,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 21715200
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
576,
1536
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 884736,
"byteOffset": 21716352
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
1,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 22601088
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 22607232
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_weight",
"shape": [
576,
480
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 276480,
"byteOffset": 22608384
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_scale",
"shape": [
1,
960
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1920,
"byteOffset": 22884864
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
576,
288
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 165888,
"byteOffset": 22886784
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
1,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 23052672
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 23053824
},
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
1536,
288
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 23054976
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
1,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 23497344
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
576,
1536
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 884736,
"byteOffset": 23498496
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
1,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 24383232
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 24389376
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_weight",
"shape": [
576,
480
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 276480,
"byteOffset": 24390528
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_scale",
"shape": [
1,
960
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1920,
"byteOffset": 24667008
},
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
576,
288
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 165888,
"byteOffset": 24668928
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
1,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 24834816
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 24835968
},
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
1536,
288
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 24837120
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
1,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 25279488
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
576,
1536
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 884736,
"byteOffset": 25280640
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
1,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 26165376
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 26171520
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_weight",
"shape": [
576,
480
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 276480,
"byteOffset": 26172672
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_scale",
"shape": [
1,
960
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1920,
"byteOffset": 26449152
},
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
576,
288
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 165888,
"byteOffset": 26451072
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
1,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 26616960
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 26618112
},
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
1536,
288
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 26619264
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
1,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 27061632
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
576,
1536
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 884736,
"byteOffset": 27062784
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
1,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 27947520
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 27953664
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_weight",
"shape": [
576,
480
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 276480,
"byteOffset": 27954816
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_scale",
"shape": [
1,
960
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1920,
"byteOffset": 28231296
},
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
576,
288
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 165888,
"byteOffset": 28233216
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
1,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 28399104
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 28400256
},
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
1536,
288
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 28401408
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
1,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 28843776
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
576,
1536
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 884736,
"byteOffset": 28844928
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
1,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 29729664
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 29735808
},
{
"name": "model.layers.15.self_attn.qkv_proj.q_weight",
"shape": [
576,
480
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 276480,
"byteOffset": 29736960
},
{
"name": "model.layers.15.self_attn.qkv_proj.q_scale",
"shape": [
1,
960
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1920,
"byteOffset": 30013440
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
576,
288
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 165888,
"byteOffset": 30015360
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
1,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 30181248
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 30182400
},
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
1536,
288
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 30183552
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
1,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 30625920
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
576,
1536
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 884736,
"byteOffset": 30627072
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
1,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 31511808
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 31517952
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_weight",
"shape": [
576,
480
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 276480,
"byteOffset": 31519104
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_scale",
"shape": [
1,
960
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1920,
"byteOffset": 31795584
},
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
576,
288
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 165888,
"byteOffset": 31797504
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
1,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 31963392
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 31964544
},
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
1536,
288
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 31965696
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
1,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 32408064
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
576,
1536
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 884736,
"byteOffset": 32409216
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
1,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 33293952
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 33300096
}
],
"md5sum": "8c27c65a609e64ab75b3bbf527046f95"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 32968704,
"records": [
{
"name": "model.layers.17.self_attn.qkv_proj.q_weight",
"shape": [
576,
480
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 276480,
"byteOffset": 0
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_scale",
"shape": [
1,
960
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1920,
"byteOffset": 276480
},
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
576,
288
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 165888,
"byteOffset": 278400
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
1,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 444288
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 445440
},
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
1536,
288
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 446592
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
1,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 888960
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
576,
1536
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 884736,
"byteOffset": 890112
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
1,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 1774848
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 1780992
},
{
"name": "model.layers.18.self_attn.qkv_proj.q_weight",
"shape": [
576,
480
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 276480,
"byteOffset": 1782144
},
{
"name": "model.layers.18.self_attn.qkv_proj.q_scale",
"shape": [
1,
960
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1920,
"byteOffset": 2058624
},
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
576,
288
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 165888,
"byteOffset": 2060544
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
1,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 2226432
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 2227584
},
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
1536,
288
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 2228736
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
1,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 2671104
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
576,
1536
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 884736,
"byteOffset": 2672256
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
1,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 3556992
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 3563136
},
{
"name": "model.layers.19.self_attn.qkv_proj.q_weight",
"shape": [
576,
480
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 276480,
"byteOffset": 3564288
},
{
"name": "model.layers.19.self_attn.qkv_proj.q_scale",
"shape": [
1,
960
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1920,
"byteOffset": 3840768
},
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
576,
288
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 165888,
"byteOffset": 3842688
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
1,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 4008576
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 4009728
},
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
1536,
288
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 4010880
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
1,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 4453248
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
576,
1536
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 884736,
"byteOffset": 4454400
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
1,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 5339136
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 5345280
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_weight",
"shape": [
576,
480
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 276480,
"byteOffset": 5346432
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_scale",
"shape": [
1,
960
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1920,
"byteOffset": 5622912
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
576,
288
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 165888,
"byteOffset": 5624832
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
1,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 5790720
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 5791872
},
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
1536,
288
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 5793024
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
1,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 6235392
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
576,
1536
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 884736,
"byteOffset": 6236544
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
1,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 7121280
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 7127424
},
{
"name": "model.layers.20.self_attn.qkv_proj.q_weight",
"shape": [
576,
480
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 276480,
"byteOffset": 7128576
},
{
"name": "model.layers.20.self_attn.qkv_proj.q_scale",
"shape": [
1,
960
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1920,
"byteOffset": 7405056
},
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
576,
288
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 165888,
"byteOffset": 7406976
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
1,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 7572864
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 7574016
},
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
1536,
288
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 7575168
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
1,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 8017536
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
576,
1536
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 884736,
"byteOffset": 8018688
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
1,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 8903424
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 8909568
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_weight",
"shape": [
576,
480
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 276480,
"byteOffset": 8910720
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_scale",
"shape": [
1,
960
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1920,
"byteOffset": 9187200
},
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
576,
288
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 165888,
"byteOffset": 9189120
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
1,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 9355008
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 9356160
},
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
1536,
288
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 9357312
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
1,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 9799680
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
576,
1536
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 884736,
"byteOffset": 9800832
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
1,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 10685568
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 10691712
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_weight",
"shape": [
576,
480
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 276480,
"byteOffset": 10692864
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_scale",
"shape": [
1,
960
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1920,
"byteOffset": 10969344
},
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
576,
288
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 165888,
"byteOffset": 10971264
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
1,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 11137152
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 11138304
},
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
1536,
288
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 11139456
},
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
1,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 11581824
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
576,
1536
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 884736,
"byteOffset": 11582976
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
1,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 12467712
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 12473856
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_weight",
"shape": [
576,
480
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 276480,
"byteOffset": 12475008
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_scale",
"shape": [
1,
960
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1920,
"byteOffset": 12751488
},
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
576,
288
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 165888,
"byteOffset": 12753408
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
1,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 12919296
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 12920448
},
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
1536,
288
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 12921600
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
1,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 13363968
},
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
576,
1536
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 884736,
"byteOffset": 13365120
},
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
1,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14249856
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 14256000
},
{
"name": "model.layers.24.self_attn.qkv_proj.q_weight",
"shape": [
576,
480
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 276480,
"byteOffset": 14257152
},
{
"name": "model.layers.24.self_attn.qkv_proj.q_scale",
"shape": [
1,
960
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1920,
"byteOffset": 14533632
},
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
576,
288
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 165888,
"byteOffset": 14535552
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
1,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 14701440
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 14702592
},
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
1536,
288
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 14703744
},
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
1,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 15146112
},
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
576,
1536
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 884736,
"byteOffset": 15147264
},
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
1,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 16032000
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 16038144
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_weight",
"shape": [
576,
480
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 276480,
"byteOffset": 16039296
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_scale",
"shape": [
1,
960
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1920,
"byteOffset": 16315776
},
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
576,
288
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 165888,
"byteOffset": 16317696
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
1,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 16483584
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 16484736
},
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
1536,
288
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 16485888
},
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
1,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 16928256
},
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
576,
1536
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 884736,
"byteOffset": 16929408
},
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
1,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17814144
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 17820288
},
{
"name": "model.layers.26.self_attn.qkv_proj.q_weight",
"shape": [
576,
480
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 276480,
"byteOffset": 17821440
},
{
"name": "model.layers.26.self_attn.qkv_proj.q_scale",
"shape": [
1,
960
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1920,
"byteOffset": 18097920
},
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
576,
288
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 165888,
"byteOffset": 18099840
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
1,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 18265728
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 18266880
},
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
1536,
288
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 18268032
},
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
1,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 18710400
},
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
576,
1536
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 884736,
"byteOffset": 18711552
},
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
1,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19596288
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 19602432
},
{
"name": "model.layers.27.self_attn.qkv_proj.q_weight",
"shape": [
576,
480
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 276480,
"byteOffset": 19603584
},
{
"name": "model.layers.27.self_attn.qkv_proj.q_scale",
"shape": [
1,
960
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1920,
"byteOffset": 19880064
},
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
576,
288
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 165888,
"byteOffset": 19881984
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
1,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 20047872
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 20049024
},
{
"name": "model.layers.28.mlp.down_proj.q_weight",
"shape": [
1536,
288
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 20050176
},
{
"name": "model.layers.28.mlp.down_proj.q_scale",
"shape": [
1,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 20492544
},
{
"name": "model.layers.28.mlp.gate_up_proj.q_weight",
"shape": [
576,
1536
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 884736,
"byteOffset": 20493696
},
{
"name": "model.layers.28.mlp.gate_up_proj.q_scale",
"shape": [
1,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21378432
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 21384576
},
{
"name": "model.layers.28.self_attn.qkv_proj.q_weight",
"shape": [
576,
480
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 276480,
"byteOffset": 21385728
},
{
"name": "model.layers.28.self_attn.qkv_proj.q_scale",
"shape": [
1,
960
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1920,
"byteOffset": 21662208
},
{
"name": "model.layers.28.self_attn.o_proj.q_weight",
"shape": [
576,
288
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 165888,
"byteOffset": 21664128
},
{
"name": "model.layers.28.self_attn.o_proj.q_scale",
"shape": [
1,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 21830016
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 21831168
},
{
"name": "model.layers.29.mlp.down_proj.q_weight",
"shape": [
1536,
288
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 21832320
},
{
"name": "model.layers.29.mlp.down_proj.q_scale",
"shape": [
1,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 22274688
},
{
"name": "model.layers.29.mlp.gate_up_proj.q_weight",
"shape": [
576,
1536
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 884736,
"byteOffset": 22275840
},
{
"name": "model.layers.29.mlp.gate_up_proj.q_scale",
"shape": [
1,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 23160576
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 23166720
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_weight",
"shape": [
576,
480
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 276480,
"byteOffset": 23167872
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_scale",
"shape": [
1,
960
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1920,
"byteOffset": 23444352
},
{
"name": "model.layers.29.self_attn.o_proj.q_weight",
"shape": [
576,
288
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 165888,
"byteOffset": 23446272
},
{
"name": "model.layers.29.self_attn.o_proj.q_scale",
"shape": [
1,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 23612160
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 23613312
},
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
1536,
288
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 23614464
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
1,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 24056832
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
576,
1536
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 884736,
"byteOffset": 24057984
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
1,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 24942720
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 24948864
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_weight",
"shape": [
576,
480
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 276480,
"byteOffset": 24950016
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_scale",
"shape": [
1,
960
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1920,
"byteOffset": 25226496
},
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
576,
288
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 165888,
"byteOffset": 25228416
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
1,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 25394304
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 25395456
},
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
1536,
288
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 25396608
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
1,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 25838976
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
576,
1536
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 884736,
"byteOffset": 25840128
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
1,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 26724864
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 26731008
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_weight",
"shape": [
576,
480
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 276480,
"byteOffset": 26732160
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_scale",
"shape": [
1,
960
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1920,
"byteOffset": 27008640
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
576,
288
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 165888,
"byteOffset": 27010560
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
1,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 27176448
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 27177600
},
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
1536,
288
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 27178752
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
1,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 27621120
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
576,
1536
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 884736,
"byteOffset": 27622272
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
1,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 28507008
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 28513152
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_weight",
"shape": [
576,
480
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 276480,
"byteOffset": 28514304
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_scale",
"shape": [
1,
960
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1920,
"byteOffset": 28790784
},
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
576,
288
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 165888,
"byteOffset": 28792704
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
1,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 28958592
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 28959744
},
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
1536,
288
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 28960896
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
1,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 29403264
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
576,
1536
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 884736,
"byteOffset": 29404416
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
1,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 30289152
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 30295296
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_weight",
"shape": [
576,
480
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 276480,
"byteOffset": 30296448
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_scale",
"shape": [
1,
960
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1920,
"byteOffset": 30572928
},
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
576,
288
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 165888,
"byteOffset": 30574848
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
1,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 30740736
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 30741888
},
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
1536,
288
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 30743040
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
1,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 31185408
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
576,
1536
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 884736,
"byteOffset": 31186560
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
1,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 32071296
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 32077440
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_weight",
"shape": [
576,
480
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 276480,
"byteOffset": 32078592
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_scale",
"shape": [
1,
960
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1920,
"byteOffset": 32355072
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
576,
288
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 165888,
"byteOffset": 32356992
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
1,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 32522880
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 32524032
},
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
1536,
288
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 32525184
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
1,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 32967552
}
],
"md5sum": "754c516b6e9a414726e16798fa94ad36"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 3120768,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
576,
1536
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 884736,
"byteOffset": 0
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
1,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 884736
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 890880
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_weight",
"shape": [
576,
480
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 276480,
"byteOffset": 892032
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_scale",
"shape": [
1,
960
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1920,
"byteOffset": 1168512
},
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
576,
288
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 165888,
"byteOffset": 1170432
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
1,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 1336320
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 1337472
},
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
1536,
288
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 442368,
"byteOffset": 1338624
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
1,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 1780992
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
576,
1536
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 884736,
"byteOffset": 1782144
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
1,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 2666880
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 2673024
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_weight",
"shape": [
576,
480
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 276480,
"byteOffset": 2674176
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_scale",
"shape": [
1,
960
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1920,
"byteOffset": 2950656
},
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
576,
288
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 165888,
"byteOffset": 2952576
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
1,
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 3118464
},
{
"name": "model.norm.weight",
"shape": [
576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1152,
"byteOffset": 3119616
}
],
"md5sum": "1641080b39b88ab1a28525e1a66ddedf"
}
]
}