{ "metadata": { "ParamSize": 254, "ParamBytes": 6171877376.0, "BitsPerParam": 16.0 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 622329856, "records": [ { "name": "model.embed_tokens.weight", "shape": [ 151936, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 622329856, "byteOffset": 0 } ], "md5sum": "f58c0acb41b85d8e7d5f5c53d9501526" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.0.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "0bfaa9620d061038eb70bf4c0188c447" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "45649b9a88a54d3f210ec7c6b762aee8" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.1.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "1fb6637c50eec204f716a6e2b7004948" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "826a05e4a27388aafc5e4a489f6ab271" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 29386752, "records": [ { "name": "model.layers.0.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 0 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 4096 }, { "name": "model.layers.0.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 8192 }, { "name": "model.layers.0.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 13312 }, { "name": "model.layers.0.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 10499072 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18887680 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18891776 }, { "name": "model.layers.1.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 18895872 }, { "name": "model.layers.1.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 18900992 } ], "md5sum": "4965e678f27fd5965a9dc4b529721a61" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.10.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "9dfa1476617ee80bce853a1d126ef23a" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "e497cf0aad47d8ef2cbae595c67ba1e3" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.11.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "73bff5ef420b7df2698ca90bf2d4df51" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "b67ddd9c92d023512982fa3830353421" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 27289600, "records": [ { "name": "model.layers.1.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8392704 }, { "name": "model.layers.10.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 8396800 }, { "name": "model.layers.10.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 8401920 }, { "name": "model.layers.10.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 18887680 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 27276288 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 27280384 }, { "name": "model.layers.11.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 27284480 } ], "md5sum": "1688f84e4f7922f76817b6bb23ac1384" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.12.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "de14a5223492bc23ce31d025d7344400" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "b6672bf8dba2cac2b56ff2a25b32e42a" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 29373440, "records": [ { "name": "model.layers.11.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 10485760 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18874368 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18878464 }, { "name": "model.layers.12.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 18882560 }, { "name": "model.layers.12.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 18887680 } ], "md5sum": "9eae6a11b421f3ce61171309b12aed24" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.13.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "c0a03bfde4dbb8e009498dfecb3e8265" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "002e4817886977ee630d7042d6d5a09b" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.14.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "ace1c86724325188be11dff0efc4ee5c" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "dd6107ab54b454867c2be25833ceddf5" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 27289600, "records": [ { "name": "model.layers.12.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8392704 }, { "name": "model.layers.13.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 8396800 }, { "name": "model.layers.13.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 8401920 }, { "name": "model.layers.13.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 18887680 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 27276288 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 27280384 }, { "name": "model.layers.14.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 27284480 } ], "md5sum": "29896a6487425c50ce734d63c64a0045" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.15.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "b9813a99205422ef528dd478f0e0426e" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "1a9ec90c169d1163127c0060ade7e033" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 29373440, "records": [ { "name": "model.layers.14.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 10485760 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18874368 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18878464 }, { "name": "model.layers.15.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 18882560 }, { "name": "model.layers.15.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 18887680 } ], "md5sum": "b815ce34bc6d755e86aaa243d10e785f" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.16.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "5bfced7df7530f5b0e2b5dd35d9e483e" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "a4c0ff835bf6654a4adde03177309a07" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.17.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "c5eec349a32db23bd55b8ec57df3ab41" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "88771c127d151a4f3980537e800dc1a7" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 27289600, "records": [ { "name": "model.layers.15.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8392704 }, { "name": "model.layers.16.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 8396800 }, { "name": "model.layers.16.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 8401920 }, { "name": "model.layers.16.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 18887680 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 27276288 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 27280384 }, { "name": "model.layers.17.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 27284480 } ], "md5sum": "a61edfef11446ca99e769e1df08e1267" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.18.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "ac2fdf4213976668e5aa80b1d262b200" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "73d57042875e2e07e4b0f609e017c735" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 29373440, "records": [ { "name": "model.layers.17.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.17.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 10485760 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18874368 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18878464 }, { "name": "model.layers.18.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 18882560 }, { "name": "model.layers.18.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 18887680 } ], "md5sum": "3abb7cf0a44a0b73e5137b61d1dabddd" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.19.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "c0f9b1d551c5f23d140dbdb87314eb3c" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "04f9bd1796ef023f16c8f879227f18d8" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.2.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "7bc4817941104c75718246d974ed5193" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "8e9a521dbceb7b63b33a46b7c5280dae" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 27289600, "records": [ { "name": "model.layers.18.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8392704 }, { "name": "model.layers.19.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 8396800 }, { "name": "model.layers.19.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 8401920 }, { "name": "model.layers.19.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 18887680 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 27276288 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 27280384 }, { "name": "model.layers.2.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 27284480 } ], "md5sum": "db8679b3c3a963a2ecd1d8291d4a0cab" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.20.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "5f68575c10b75b3a9c0e4f1170f561f6" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "74c5a55fb43017a54caf745862f3a5ff" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 29373440, "records": [ { "name": "model.layers.2.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 10485760 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18874368 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18878464 }, { "name": "model.layers.20.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 18882560 }, { "name": "model.layers.20.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 18887680 } ], "md5sum": "67cbfe7a7e0885134b290b5c53a399b9" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "214347f2fb8081a59b2c862cc9219e30" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.3.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "e4a4529f892e439e12c31bfb13046593" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "1a1ca8b9926654a6a926464cd0a8c964" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 27289600, "records": [ { "name": "model.layers.20.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8392704 }, { "name": "model.layers.21.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 8396800 }, { "name": "model.layers.21.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 8401920 }, { "name": "model.layers.21.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 18887680 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 27276288 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 27280384 }, { "name": "model.layers.3.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 27284480 } ], "md5sum": "b3606dd298a4e5bb260e7d70987b6694" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.4.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "fcdec8529d65079960749f437f2a4f79" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "b85856695ac2cc6b23b43984d4246946" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 29373440, "records": [ { "name": "model.layers.3.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 10485760 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18874368 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18878464 }, { "name": "model.layers.4.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 18882560 }, { "name": "model.layers.4.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 18887680 } ], "md5sum": "00ae1c8d4f65260416161a405e1481ee" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.5.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "0ddf1424188e8231e7ba3d3136bb35d4" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "098629b017a248912fa17e9032ccbe42" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.6.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "2fa47fd530a7896a5d3f164798004f5d" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "02c980ee910a4030b28037c0ffbc0242" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 27289600, "records": [ { "name": "model.layers.4.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8392704 }, { "name": "model.layers.5.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 8396800 }, { "name": "model.layers.5.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 8401920 }, { "name": "model.layers.5.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 18887680 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 27276288 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 27280384 }, { "name": "model.layers.6.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 27284480 } ], "md5sum": "9d325e8a401014b4354d693f03b067f1" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.7.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "d7d9998f5372b2a62e6700e14dba76ab" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "d5f943fef4c70ad775575f0b9272269a" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 29373440, "records": [ { "name": "model.layers.6.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 10485760 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18874368 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18878464 }, { "name": "model.layers.7.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 18882560 }, { "name": "model.layers.7.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 18887680 } ], "md5sum": "821e893e7e7007d1b2c54ad1a1e5e409" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.8.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "b5c48f3ece0b879d6a2552db05b19cfc" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "9b819fb21b6a3b345e1c27f7e6d71e9a" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.9.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "5f909663574916741ceed1c39d858df6" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "779156a50a2d480c5b03179af05548e6" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 27289600, "records": [ { "name": "model.layers.7.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8392704 }, { "name": "model.layers.8.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 8396800 }, { "name": "model.layers.8.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 8401920 }, { "name": "model.layers.8.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 18887680 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 27276288 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 27280384 }, { "name": "model.layers.9.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 27284480 } ], "md5sum": "4f1d844111ca65fdb37ee3a1a7e84765" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.21.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "16fb473b08fb56dd88d676eedb48193b" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.22.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "8c0f189b6ff437d8519774cec2d07753" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "88e2891c863f2fe0c52f2d56ba4f795e" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 29373440, "records": [ { "name": "model.layers.9.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 10485760 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18874368 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18878464 }, { "name": "model.layers.22.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 18882560 }, { "name": "model.layers.22.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 18887680 } ], "md5sum": "2a7200dee16602ffda10b634e5f5b6e7" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.23.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "306407ec30a7cf3667b48023b469eb8c" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "3e9c86504bb7f37a115b73b2a89ad9ef" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.24.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "6aa6c27ceec822684eb2947e6ab6c844" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "776b5452c9e1347a040b3197973a5bd7" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 27289600, "records": [ { "name": "model.layers.22.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8392704 }, { "name": "model.layers.23.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 8396800 }, { "name": "model.layers.23.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 8401920 }, { "name": "model.layers.23.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 18887680 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 27276288 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 27280384 }, { "name": "model.layers.24.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 27284480 } ], "md5sum": "b8cded6bb57028d8caee5d16685eda56" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.25.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "8e00da96cd0492374f4c113556feadd8" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "5ff80e592ca02bcc840f2c45ffb377a3" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 29373440, "records": [ { "name": "model.layers.24.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.24.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 10485760 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18874368 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18878464 }, { "name": "model.layers.25.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 18882560 }, { "name": "model.layers.25.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 18887680 } ], "md5sum": "6000b1252c865c1244a2bae136d7ef20" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.26.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "f8f1f022388cc0617f8d740c0fb7dafc" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "4b4e9ff85306691dec1ebc118da45aab" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.27.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "8567f854abc6dec3f694600ef5de6cb1" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "306fd8e1d1fb539cb4b0243206b70cf9" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 27289600, "records": [ { "name": "model.layers.25.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8392704 }, { "name": "model.layers.26.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 8396800 }, { "name": "model.layers.26.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 8401920 }, { "name": "model.layers.26.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 18887680 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 27276288 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 27280384 }, { "name": "model.layers.27.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 27284480 } ], "md5sum": "96cd5edca290c894a0b8556be3db83fa" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.28.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "6d7eec6feefa599db05df37bdafd0942" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "9c5eafa3521197bbe7e228bd348cb615" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 29373440, "records": [ { "name": "model.layers.27.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.27.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 10485760 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18874368 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18878464 }, { "name": "model.layers.28.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 18882560 }, { "name": "model.layers.28.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 18887680 } ], "md5sum": "e686501bcee88364812b9aa417379861" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.29.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "25a1442240f3548f9ed920138d6f2302" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "649c542a832e9f48252b1e9837f813fd" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.30.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "d331c2101d88423c9f1666a2c683c22b" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "e4975ad33de1bf0d90a1473dc4bf930a" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 27289600, "records": [ { "name": "model.layers.28.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8392704 }, { "name": "model.layers.29.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 8396800 }, { "name": "model.layers.29.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 8401920 }, { "name": "model.layers.29.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 18887680 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 27276288 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 27280384 }, { "name": "model.layers.30.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 27284480 } ], "md5sum": "6bc588c7e64f3cc48aa39089ab27d9dc" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.31.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "854d99689ea537c3f5538c3512908a21" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "729f4f7b87185ef60bce343e60e219d9" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 29373440, "records": [ { "name": "model.layers.30.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.30.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 10485760 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18874368 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18878464 }, { "name": "model.layers.31.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 18882560 }, { "name": "model.layers.31.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 18887680 } ], "md5sum": "c6eb60e2bb0ae2c5fe0856c13dc51f34" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.32.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "59b8945b6e2c7116a9e142c52b4f2a7d" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "4860f118c1b7a21d144ffedf118af89d" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.33.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "3eced39182906479bc26b68da5f80148" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "0fe17a873e5d718536793729d0e2712e" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 27289600, "records": [ { "name": "model.layers.31.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8392704 }, { "name": "model.layers.32.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 8396800 }, { "name": "model.layers.32.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 8401920 }, { "name": "model.layers.32.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 18887680 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 27276288 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 27280384 }, { "name": "model.layers.33.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 27284480 } ], "md5sum": "40c0dfa72b018a22d11d30a45e3bb978" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.34.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "9b73caae98cd7e47ebb08c804f468df2" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "92f1f90019615d78f7f6ef17873e477d" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 29373440, "records": [ { "name": "model.layers.33.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.33.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 10485760 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18874368 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18878464 }, { "name": "model.layers.34.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 18882560 }, { "name": "model.layers.34.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 18887680 } ], "md5sum": "4806c3a4257b09a7a8dff09505f59605" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.35.mlp.down_proj.weight", "shape": [ 2048, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "1de221761f59f15b8d24b05258fc6f89" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.weight", "shape": [ 22016, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "8f79a06439059bdb5f769b209f66aa4c" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 27280384, "records": [ { "name": "model.layers.34.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 8392704 }, { "name": "model.layers.35.self_attn.c_attn.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 8396800 }, { "name": "model.layers.35.self_attn.c_attn.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 8401920 }, { "name": "model.layers.35.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 18887680 }, { "name": "model.norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 27276288 } ], "md5sum": "6e3f4ecf3c3a86b1f33b4642e6de9869" } ] }