riczhou's picture
Upload folder using huggingface_hub
65edce2 verified
{
"metadata": {
"ParamSize": 198,
"ParamBytes": 3087428608.0,
"BitsPerParam": 16.0
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 466747392,
"records": [
{
"name": "model.embed_tokens.weight",
"shape": [
151936,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 466747392,
"byteOffset": 0
}
],
"md5sum": "9c394790c22960ff045f6cecf24f2478"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 55050240,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.weight",
"shape": [
17920,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 55050240,
"byteOffset": 0
}
],
"md5sum": "2b2470b4a9e72adb478d1631f7405cf1"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 27535360,
"records": [
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 0
},
{
"name": "model.layers.0.mlp.down_proj.weight",
"shape": [
1536,
8960
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 27525120,
"byteOffset": 3072
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 27528192
},
{
"name": "model.layers.0.self_attn.c_attn.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 27531264
}
],
"md5sum": "dc9e08ac4db31734a1b603ee26599517"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 27525120,
"records": [
{
"name": "model.layers.1.mlp.down_proj.weight",
"shape": [
1536,
8960
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 27525120,
"byteOffset": 0
}
],
"md5sum": "49563c8f4aa9a7eb793010398bfda240"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 55050240,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.weight",
"shape": [
17920,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 55050240,
"byteOffset": 0
}
],
"md5sum": "51521a14a5183f53e7597fb1b0c1ac02"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 27525120,
"records": [
{
"name": "model.layers.10.mlp.down_proj.weight",
"shape": [
1536,
8960
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 27525120,
"byteOffset": 0
}
],
"md5sum": "3c9458be61e6f6d2f09bba659866fdc5"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 55050240,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.weight",
"shape": [
17920,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 55050240,
"byteOffset": 0
}
],
"md5sum": "e09a65b0e17d3503c7fc7a1ca671a5ba"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 27525120,
"records": [
{
"name": "model.layers.11.mlp.down_proj.weight",
"shape": [
1536,
8960
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 27525120,
"byteOffset": 0
}
],
"md5sum": "672289b109ad0020f17e9f6844d72eb0"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 55050240,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.weight",
"shape": [
17920,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 55050240,
"byteOffset": 0
}
],
"md5sum": "1468ccdaed4ba067e4867957c993c049"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 33060864,
"records": [
{
"name": "model.layers.0.self_attn.c_attn.weight",
"shape": [
2048,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 0
},
{
"name": "model.layers.0.self_attn.o_proj.weight",
"shape": [
1536,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 6291456
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 11010048
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 11013120
},
{
"name": "model.layers.1.self_attn.c_attn.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11016192
},
{
"name": "model.layers.1.self_attn.c_attn.weight",
"shape": [
2048,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 11020288
},
{
"name": "model.layers.1.self_attn.o_proj.weight",
"shape": [
1536,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 17311744
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 22030336
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 22033408
},
{
"name": "model.layers.10.self_attn.c_attn.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 22036480
},
{
"name": "model.layers.10.self_attn.c_attn.weight",
"shape": [
2048,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 22040576
},
{
"name": "model.layers.10.self_attn.o_proj.weight",
"shape": [
1536,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 28332032
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 33050624
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 33053696
},
{
"name": "model.layers.11.self_attn.c_attn.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 33056768
}
],
"md5sum": "386225f7f70fddb6a9e43795ad3a3202"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 27525120,
"records": [
{
"name": "model.layers.12.mlp.down_proj.weight",
"shape": [
1536,
8960
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 27525120,
"byteOffset": 0
}
],
"md5sum": "2f0ee2ee3560e76e2906ba68bed46ccd"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 55050240,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.weight",
"shape": [
17920,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 55050240,
"byteOffset": 0
}
],
"md5sum": "af10359a4a55734f11c85bd7abd23af3"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 27525120,
"records": [
{
"name": "model.layers.13.mlp.down_proj.weight",
"shape": [
1536,
8960
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 27525120,
"byteOffset": 0
}
],
"md5sum": "bd931a02a4417507ced3902718fd7d78"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 55050240,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.weight",
"shape": [
17920,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 55050240,
"byteOffset": 0
}
],
"md5sum": "a886e033b34ef4492524060ada67c5f3"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 27525120,
"records": [
{
"name": "model.layers.14.mlp.down_proj.weight",
"shape": [
1536,
8960
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 27525120,
"byteOffset": 0
}
],
"md5sum": "fdf7135c7f050510e918a6da2eb747e4"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 55050240,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.weight",
"shape": [
17920,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 55050240,
"byteOffset": 0
}
],
"md5sum": "19b7ab0aedab2c98ef84d1bb74677cec"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 33060864,
"records": [
{
"name": "model.layers.11.self_attn.c_attn.weight",
"shape": [
2048,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 0
},
{
"name": "model.layers.11.self_attn.o_proj.weight",
"shape": [
1536,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 6291456
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 11010048
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 11013120
},
{
"name": "model.layers.12.self_attn.c_attn.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11016192
},
{
"name": "model.layers.12.self_attn.c_attn.weight",
"shape": [
2048,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 11020288
},
{
"name": "model.layers.12.self_attn.o_proj.weight",
"shape": [
1536,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 17311744
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 22030336
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 22033408
},
{
"name": "model.layers.13.self_attn.c_attn.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 22036480
},
{
"name": "model.layers.13.self_attn.c_attn.weight",
"shape": [
2048,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 22040576
},
{
"name": "model.layers.13.self_attn.o_proj.weight",
"shape": [
1536,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 28332032
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 33050624
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 33053696
},
{
"name": "model.layers.14.self_attn.c_attn.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 33056768
}
],
"md5sum": "40123c949516b96ac14030b58f97c970"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 27525120,
"records": [
{
"name": "model.layers.15.mlp.down_proj.weight",
"shape": [
1536,
8960
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 27525120,
"byteOffset": 0
}
],
"md5sum": "ba52e42aa6e9bfddb33bc6be2cbbed6c"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 55050240,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.weight",
"shape": [
17920,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 55050240,
"byteOffset": 0
}
],
"md5sum": "ef3a674229205ddd58f4e2a2df97af1b"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 27525120,
"records": [
{
"name": "model.layers.16.mlp.down_proj.weight",
"shape": [
1536,
8960
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 27525120,
"byteOffset": 0
}
],
"md5sum": "4f3105a964d9957e5b54bce793846d4d"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 55050240,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.weight",
"shape": [
17920,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 55050240,
"byteOffset": 0
}
],
"md5sum": "5e1387ab5bc412368449752b6370e568"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 27525120,
"records": [
{
"name": "model.layers.17.mlp.down_proj.weight",
"shape": [
1536,
8960
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 27525120,
"byteOffset": 0
}
],
"md5sum": "cb2e93a9ceaf3233fbd74948a6c249d9"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 55050240,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.weight",
"shape": [
17920,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 55050240,
"byteOffset": 0
}
],
"md5sum": "8a80c045d2edddb7ef0573f7c564c519"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 33060864,
"records": [
{
"name": "model.layers.14.self_attn.c_attn.weight",
"shape": [
2048,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 0
},
{
"name": "model.layers.14.self_attn.o_proj.weight",
"shape": [
1536,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 6291456
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 11010048
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 11013120
},
{
"name": "model.layers.15.self_attn.c_attn.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11016192
},
{
"name": "model.layers.15.self_attn.c_attn.weight",
"shape": [
2048,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 11020288
},
{
"name": "model.layers.15.self_attn.o_proj.weight",
"shape": [
1536,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 17311744
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 22030336
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 22033408
},
{
"name": "model.layers.16.self_attn.c_attn.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 22036480
},
{
"name": "model.layers.16.self_attn.c_attn.weight",
"shape": [
2048,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 22040576
},
{
"name": "model.layers.16.self_attn.o_proj.weight",
"shape": [
1536,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 28332032
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 33050624
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 33053696
},
{
"name": "model.layers.17.self_attn.c_attn.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 33056768
}
],
"md5sum": "2d0661bc87c9fd3e7da9a0d13349380a"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 27525120,
"records": [
{
"name": "model.layers.18.mlp.down_proj.weight",
"shape": [
1536,
8960
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 27525120,
"byteOffset": 0
}
],
"md5sum": "e6d23b39b8cb567cf0a4a7e6714cd78d"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 55050240,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.weight",
"shape": [
17920,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 55050240,
"byteOffset": 0
}
],
"md5sum": "b4df51c707ad996798fd88cca7d4f82d"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 27525120,
"records": [
{
"name": "model.layers.19.mlp.down_proj.weight",
"shape": [
1536,
8960
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 27525120,
"byteOffset": 0
}
],
"md5sum": "00a26156008979c2af45a95ec82a3a0d"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 55050240,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.weight",
"shape": [
17920,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 55050240,
"byteOffset": 0
}
],
"md5sum": "4d45780eb98ca483a06df9ab8adbe7e2"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 27525120,
"records": [
{
"name": "model.layers.2.mlp.down_proj.weight",
"shape": [
1536,
8960
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 27525120,
"byteOffset": 0
}
],
"md5sum": "75221e62142c91edd2dc091cec30665f"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 55050240,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.weight",
"shape": [
17920,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 55050240,
"byteOffset": 0
}
],
"md5sum": "0fbd4abacd6750357450eaac4c8cfba8"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 33060864,
"records": [
{
"name": "model.layers.17.self_attn.c_attn.weight",
"shape": [
2048,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 0
},
{
"name": "model.layers.17.self_attn.o_proj.weight",
"shape": [
1536,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 6291456
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 11010048
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 11013120
},
{
"name": "model.layers.18.self_attn.c_attn.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11016192
},
{
"name": "model.layers.18.self_attn.c_attn.weight",
"shape": [
2048,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 11020288
},
{
"name": "model.layers.18.self_attn.o_proj.weight",
"shape": [
1536,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 17311744
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 22030336
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 22033408
},
{
"name": "model.layers.19.self_attn.c_attn.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 22036480
},
{
"name": "model.layers.19.self_attn.c_attn.weight",
"shape": [
2048,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 22040576
},
{
"name": "model.layers.19.self_attn.o_proj.weight",
"shape": [
1536,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 28332032
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 33050624
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 33053696
},
{
"name": "model.layers.2.self_attn.c_attn.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 33056768
}
],
"md5sum": "6fb6758ef41b318d5f5bc1c009f643ac"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 27525120,
"records": [
{
"name": "model.layers.20.mlp.down_proj.weight",
"shape": [
1536,
8960
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 27525120,
"byteOffset": 0
}
],
"md5sum": "dd93ed00fe037887023cc5e040d40186"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 55050240,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.weight",
"shape": [
17920,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 55050240,
"byteOffset": 0
}
],
"md5sum": "d36d64f0ffa4c128414487032a334a9f"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 27525120,
"records": [
{
"name": "model.layers.21.mlp.down_proj.weight",
"shape": [
1536,
8960
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 27525120,
"byteOffset": 0
}
],
"md5sum": "8ae17a585657e430228933f94477a4bd"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 55050240,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.weight",
"shape": [
17920,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 55050240,
"byteOffset": 0
}
],
"md5sum": "b8788042f9ad0c7915afadbe04e31930"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 27525120,
"records": [
{
"name": "model.layers.22.mlp.down_proj.weight",
"shape": [
1536,
8960
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 27525120,
"byteOffset": 0
}
],
"md5sum": "353c3896a5d202428c9620719a05bca6"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 55050240,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.weight",
"shape": [
17920,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 55050240,
"byteOffset": 0
}
],
"md5sum": "c0f5cd315d9f707d055fc2505442cb4e"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 33060864,
"records": [
{
"name": "model.layers.2.self_attn.c_attn.weight",
"shape": [
2048,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 0
},
{
"name": "model.layers.2.self_attn.o_proj.weight",
"shape": [
1536,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 6291456
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 11010048
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 11013120
},
{
"name": "model.layers.20.self_attn.c_attn.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11016192
},
{
"name": "model.layers.20.self_attn.c_attn.weight",
"shape": [
2048,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 11020288
},
{
"name": "model.layers.20.self_attn.o_proj.weight",
"shape": [
1536,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 17311744
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 22030336
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 22033408
},
{
"name": "model.layers.21.self_attn.c_attn.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 22036480
},
{
"name": "model.layers.21.self_attn.c_attn.weight",
"shape": [
2048,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 22040576
},
{
"name": "model.layers.21.self_attn.o_proj.weight",
"shape": [
1536,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 28332032
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 33050624
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 33053696
},
{
"name": "model.layers.22.self_attn.c_attn.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 33056768
}
],
"md5sum": "2a5ef0f2584bde186b674af63e6e7b75"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 27525120,
"records": [
{
"name": "model.layers.23.mlp.down_proj.weight",
"shape": [
1536,
8960
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 27525120,
"byteOffset": 0
}
],
"md5sum": "e0cc850be792ebbec970e7ff6f4856ef"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 55050240,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.weight",
"shape": [
17920,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 55050240,
"byteOffset": 0
}
],
"md5sum": "36341c148fa89452c08e764f461d6298"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 27525120,
"records": [
{
"name": "model.layers.24.mlp.down_proj.weight",
"shape": [
1536,
8960
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 27525120,
"byteOffset": 0
}
],
"md5sum": "21773a84fcb6c6e742babfba3cdb8359"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 55050240,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.weight",
"shape": [
17920,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 55050240,
"byteOffset": 0
}
],
"md5sum": "e671efec0402a6cee8977fc5c58e507c"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 27525120,
"records": [
{
"name": "model.layers.25.mlp.down_proj.weight",
"shape": [
1536,
8960
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 27525120,
"byteOffset": 0
}
],
"md5sum": "5bb0260d69401b96e8c6bdebdfca36be"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 55050240,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.weight",
"shape": [
17920,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 55050240,
"byteOffset": 0
}
],
"md5sum": "59128784d429bdd191282b7d6442addc"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 33060864,
"records": [
{
"name": "model.layers.22.self_attn.c_attn.weight",
"shape": [
2048,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 0
},
{
"name": "model.layers.22.self_attn.o_proj.weight",
"shape": [
1536,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 6291456
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 11010048
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 11013120
},
{
"name": "model.layers.23.self_attn.c_attn.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11016192
},
{
"name": "model.layers.23.self_attn.c_attn.weight",
"shape": [
2048,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 11020288
},
{
"name": "model.layers.23.self_attn.o_proj.weight",
"shape": [
1536,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 17311744
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 22030336
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 22033408
},
{
"name": "model.layers.24.self_attn.c_attn.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 22036480
},
{
"name": "model.layers.24.self_attn.c_attn.weight",
"shape": [
2048,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 22040576
},
{
"name": "model.layers.24.self_attn.o_proj.weight",
"shape": [
1536,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 28332032
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 33050624
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 33053696
},
{
"name": "model.layers.25.self_attn.c_attn.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 33056768
}
],
"md5sum": "2e3ff83df81c41b602349fa77b1f52b4"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 27525120,
"records": [
{
"name": "model.layers.26.mlp.down_proj.weight",
"shape": [
1536,
8960
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 27525120,
"byteOffset": 0
}
],
"md5sum": "0d198691fa3d0f8a8f048203a35098c6"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 55050240,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.weight",
"shape": [
17920,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 55050240,
"byteOffset": 0
}
],
"md5sum": "ae27a680d3d40b1fa8619c493bc41372"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 27525120,
"records": [
{
"name": "model.layers.27.mlp.down_proj.weight",
"shape": [
1536,
8960
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 27525120,
"byteOffset": 0
}
],
"md5sum": "a3da97ef93ff7b3a9f75fe3c61966a07"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 55050240,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.weight",
"shape": [
17920,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 55050240,
"byteOffset": 0
}
],
"md5sum": "9dbf820a69c2940892a3ba78eaa3ac17"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 27525120,
"records": [
{
"name": "model.layers.3.mlp.down_proj.weight",
"shape": [
1536,
8960
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 27525120,
"byteOffset": 0
}
],
"md5sum": "923a91f17052a3d0f91efd819f28a9e3"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 55050240,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.weight",
"shape": [
17920,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 55050240,
"byteOffset": 0
}
],
"md5sum": "b2e04d25a01a6e878053e3929c326e5c"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 33060864,
"records": [
{
"name": "model.layers.25.self_attn.c_attn.weight",
"shape": [
2048,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 0
},
{
"name": "model.layers.25.self_attn.o_proj.weight",
"shape": [
1536,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 6291456
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 11010048
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 11013120
},
{
"name": "model.layers.26.self_attn.c_attn.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11016192
},
{
"name": "model.layers.26.self_attn.c_attn.weight",
"shape": [
2048,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 11020288
},
{
"name": "model.layers.26.self_attn.o_proj.weight",
"shape": [
1536,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 17311744
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 22030336
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 22033408
},
{
"name": "model.layers.27.self_attn.c_attn.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 22036480
},
{
"name": "model.layers.27.self_attn.c_attn.weight",
"shape": [
2048,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 22040576
},
{
"name": "model.layers.27.self_attn.o_proj.weight",
"shape": [
1536,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 28332032
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 33050624
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 33053696
},
{
"name": "model.layers.3.self_attn.c_attn.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 33056768
}
],
"md5sum": "e3772e7e14ef6fa64fc1d690dc042bb6"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 27525120,
"records": [
{
"name": "model.layers.4.mlp.down_proj.weight",
"shape": [
1536,
8960
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 27525120,
"byteOffset": 0
}
],
"md5sum": "889d5ae8be3b2a52a14a806a87d91a51"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 55050240,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.weight",
"shape": [
17920,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 55050240,
"byteOffset": 0
}
],
"md5sum": "d9e5436000200181c6bb168b3a57895b"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 27525120,
"records": [
{
"name": "model.layers.5.mlp.down_proj.weight",
"shape": [
1536,
8960
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 27525120,
"byteOffset": 0
}
],
"md5sum": "d40f1fddbc729b64c88e261d6b556b48"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 55050240,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.weight",
"shape": [
17920,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 55050240,
"byteOffset": 0
}
],
"md5sum": "3c664af5bf4dc2b72824b91dc57e191b"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 27525120,
"records": [
{
"name": "model.layers.6.mlp.down_proj.weight",
"shape": [
1536,
8960
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 27525120,
"byteOffset": 0
}
],
"md5sum": "5bc317302d4897884b55ec6f42427685"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 55050240,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.weight",
"shape": [
17920,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 55050240,
"byteOffset": 0
}
],
"md5sum": "add3b34b5d3ff066fc2f581f2a8e6d80"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 33060864,
"records": [
{
"name": "model.layers.3.self_attn.c_attn.weight",
"shape": [
2048,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 0
},
{
"name": "model.layers.3.self_attn.o_proj.weight",
"shape": [
1536,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 6291456
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 11010048
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 11013120
},
{
"name": "model.layers.4.self_attn.c_attn.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11016192
},
{
"name": "model.layers.4.self_attn.c_attn.weight",
"shape": [
2048,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 11020288
},
{
"name": "model.layers.4.self_attn.o_proj.weight",
"shape": [
1536,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 17311744
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 22030336
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 22033408
},
{
"name": "model.layers.5.self_attn.c_attn.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 22036480
},
{
"name": "model.layers.5.self_attn.c_attn.weight",
"shape": [
2048,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 22040576
},
{
"name": "model.layers.5.self_attn.o_proj.weight",
"shape": [
1536,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 28332032
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 33050624
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 33053696
},
{
"name": "model.layers.6.self_attn.c_attn.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 33056768
}
],
"md5sum": "ee80628c2a2a049c273a1e9b0b1b7ddb"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 27525120,
"records": [
{
"name": "model.layers.7.mlp.down_proj.weight",
"shape": [
1536,
8960
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 27525120,
"byteOffset": 0
}
],
"md5sum": "afd49cfb9ea9a046505f1c2b166d4e4a"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 55050240,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.weight",
"shape": [
17920,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 55050240,
"byteOffset": 0
}
],
"md5sum": "9390f2ec375eb8c6156ec82543434e6b"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 27525120,
"records": [
{
"name": "model.layers.8.mlp.down_proj.weight",
"shape": [
1536,
8960
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 27525120,
"byteOffset": 0
}
],
"md5sum": "eaf99bc611ac6669988c0cf21a32eba0"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 55050240,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.weight",
"shape": [
17920,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 55050240,
"byteOffset": 0
}
],
"md5sum": "c910c61629f83e45715ca3cc84de034e"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 27525120,
"records": [
{
"name": "model.layers.9.mlp.down_proj.weight",
"shape": [
1536,
8960
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 27525120,
"byteOffset": 0
}
],
"md5sum": "8f0903d34e62052c91a180e686ffa258"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 55050240,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.weight",
"shape": [
17920,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 55050240,
"byteOffset": 0
}
],
"md5sum": "be500b3df22deade101f844744892dc1"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 33060864,
"records": [
{
"name": "model.layers.6.self_attn.c_attn.weight",
"shape": [
2048,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 0
},
{
"name": "model.layers.6.self_attn.o_proj.weight",
"shape": [
1536,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 6291456
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 11010048
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 11013120
},
{
"name": "model.layers.7.self_attn.c_attn.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 11016192
},
{
"name": "model.layers.7.self_attn.c_attn.weight",
"shape": [
2048,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 11020288
},
{
"name": "model.layers.7.self_attn.o_proj.weight",
"shape": [
1536,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 17311744
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 22030336
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 22033408
},
{
"name": "model.layers.8.self_attn.c_attn.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 22036480
},
{
"name": "model.layers.8.self_attn.c_attn.weight",
"shape": [
2048,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 22040576
},
{
"name": "model.layers.8.self_attn.o_proj.weight",
"shape": [
1536,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 28332032
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 33050624
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 33053696
},
{
"name": "model.layers.9.self_attn.c_attn.bias",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 33056768
}
],
"md5sum": "8df7c8c8dc860c0e0a4670128a173b70"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 11013120,
"records": [
{
"name": "model.layers.9.self_attn.c_attn.weight",
"shape": [
2048,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 0
},
{
"name": "model.layers.9.self_attn.o_proj.weight",
"shape": [
1536,
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 6291456
},
{
"name": "model.norm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 11010048
}
],
"md5sum": "db8b6ec54d311d54bf38489a6a162580"
}
]
}