{ "metadata": { "ParamSize": 147, "ParamBytes": 5381726208.0, "BitsPerParam": 32.0 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 131080192, "records": [ { "name": "lm_head.weight", "shape": [ 32002, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 131080192, "byteOffset": 0 } ], "md5sum": "aa62c06a70fb5a950fdf95e841e5665e" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "d40b8a049ad29101b2018a3204a0a5df" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 131080192, "records": [ { "name": "model.embed_tokens.weight", "shape": [ 32002, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 131080192, "byteOffset": 0 } ], "md5sum": "59030e677e50b90799cee84b8bd17aac" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.0.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "bfloat16", "format": "raw", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "2223086381b6c11f8e3b4097666b4b43" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "976092320e540691298bd23e6a19d51b" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "a17af969921ff417997e1a9f171757c9" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.1.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "bfloat16", "format": "raw", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "7de3188049d77eeaaf8c1ec1679f9770" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "5cb8cbcbe69a20172e8fd32496409c17" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "ac079473cd12fe8ecdc5a247970d8bae" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 30961664, "records": [ { "name": "model.layers.23.input_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 0 }, { "name": "model.layers.23.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "bfloat16", "format": "raw", "nbytes": 22544384, "byteOffset": 4096 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 22548480 }, { "name": "model.norm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 22552576 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 22556672 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 22560768 }, { "name": "model.layers.0.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8388608, "byteOffset": 22564864 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 30953472 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 30957568 } ], "md5sum": "b7c06e3d5c13f15d67af393f4bd320eb" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "020cfbd62dd493e2fcdc29a306197c2f" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "e6c18a0e1cfe78a5131fdd0cf308a203" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.1.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.10.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "bfloat16", "format": "raw", "nbytes": 22544384, "byteOffset": 8392704 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 30937088 } ], "md5sum": "30bba0b8f4ab7339075345d1990cd769" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "3b599d950e01d1f80832b78247ad8ad1" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "9cd2dd2d4e7a9875eda0d7b04a20f2e3" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.10.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.11.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "bfloat16", "format": "raw", "nbytes": 22544384, "byteOffset": 8392704 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 30937088 } ], "md5sum": "92f9cccc48402e8d85f2e40fdf55771e" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "869032eda4613844d213826e2ef09a73" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "e647431e88003ea8f92cfb7f2b2eb2bf" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.11.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.12.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "bfloat16", "format": "raw", "nbytes": 22544384, "byteOffset": 8392704 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 30937088 } ], "md5sum": "2e9100b65f4aa7887be8478ac3828917" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "00772199120bcd9bf6324d733231a637" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "6db279220fb374624125b82d4103076d" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.12.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.13.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "bfloat16", "format": "raw", "nbytes": 22544384, "byteOffset": 8392704 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 30937088 } ], "md5sum": "df379c7431751c2b5a2e7194f36e8fbe" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "594fb919a0d890957ae6ba4a948b01df" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "036ec847d1f9764e72a3d6ebc0e1580f" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.13.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.14.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "bfloat16", "format": "raw", "nbytes": 22544384, "byteOffset": 8392704 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 30937088 } ], "md5sum": "c98e85c605c2d3651c04deca07e73934" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "3b7c1a1324cd44f27e195bb6b78f2313" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "0268230024d8996567bc343ed98e8b4e" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.14.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.15.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "bfloat16", "format": "raw", "nbytes": 22544384, "byteOffset": 8392704 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 30937088 } ], "md5sum": "60e3c90f656c5b5cbe683060c9ad945a" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "da9021fa65ac793c638fc93a947027ac" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "67e6efd43226e0071f7335c472594797" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.15.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.16.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "bfloat16", "format": "raw", "nbytes": 22544384, "byteOffset": 8392704 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 30937088 } ], "md5sum": "47eb6c3d1c47d070915ee70c2c0d36f5" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "8589520d8e3da7848294eb1bf482a12c" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "05715969af9d065c912b13a293dd0577" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.16.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.17.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "bfloat16", "format": "raw", "nbytes": 22544384, "byteOffset": 8392704 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 30937088 } ], "md5sum": "12c481680eae04667b8fbc4c2c586b30" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "75753fe64fb7abae4d9d3fbe2af37b6e" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "ae72d717b7431b4173ae1d989c82eb45" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.17.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.18.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "bfloat16", "format": "raw", "nbytes": 22544384, "byteOffset": 8392704 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 30937088 } ], "md5sum": "b419d5a34c6fa576db3556ae6677b0f7" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "484f36a954d7d7998da147ef2bfdfb70" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "e333e7f479e36bfa443df35facd8dbde" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.18.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.19.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "bfloat16", "format": "raw", "nbytes": 22544384, "byteOffset": 8392704 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 30937088 } ], "md5sum": "1e986705776b44442133ca82a95a8646" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "addc1e5e6e3c1e2e99063e7c32b53b4a" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "7bc54a4631d5eadb26047db5f3654c47" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.19.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.2.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "bfloat16", "format": "raw", "nbytes": 22544384, "byteOffset": 8392704 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 30937088 } ], "md5sum": "009074e6399de6d54ae5cd99d2fe7c12" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "28127f319a79e9810066b46a10f30694" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "382d8c9e7b0702dda876278dff397f9f" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.2.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.20.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "bfloat16", "format": "raw", "nbytes": 22544384, "byteOffset": 8392704 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 30937088 } ], "md5sum": "1760d8b2b01fa1a7a99501b0c7fad2a1" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "565f592c20535aa97baf8cae3d3b0d31" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "bd64c37cc13f3a98c0270aaa6e18a4b6" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.20.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.21.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "bfloat16", "format": "raw", "nbytes": 22544384, "byteOffset": 8392704 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 30937088 } ], "md5sum": "5d5e7f9b1aefb1395e4ec278b91c6576" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "978cce3424af315de3885009c29d8088" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "c3feafb46731485c7e5ce037cb2893fc" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.21.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.22.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "bfloat16", "format": "raw", "nbytes": 22544384, "byteOffset": 8392704 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 30937088 } ], "md5sum": "f2c59368b948b4f273d2d23aedf2d7fe" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "d488002dcfbc1ba9989d2e86f65b52ae" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.3.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "bfloat16", "format": "raw", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "134467d95320431edff2995abab692d4" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "b82eeb25e2913e45c75b8aff2189daaa" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "b230ec756056b4c66f05a6a844c31314" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.4.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "bfloat16", "format": "raw", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "12d179582a775daaef2c77e861551a4f" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "82471398e92c296cdc2528c655a57c5e" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "824a54620c8b55aa556c19f7ae315432" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 25182208, "records": [ { "name": "model.layers.22.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.23.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8388608, "byteOffset": 8388608 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 16777216 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 16781312 }, { "name": "model.layers.3.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8388608, "byteOffset": 16785408 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 25174016 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 25178112 } ], "md5sum": "b40e122daf222280fdaa257d54693086" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "fc9dea4d6bb85e28b4e320b2f7780585" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "42109a68e412963f39be209afd3479b9" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.4.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.5.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "bfloat16", "format": "raw", "nbytes": 22544384, "byteOffset": 8392704 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 30937088 } ], "md5sum": "0cceb5c8ec2056a42138fc54cc727996" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "aadfc198b67b055e42b46efdfb9dedc7" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "b0eef32bb9e23574c8c4b72cfccf40f0" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.5.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.6.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "bfloat16", "format": "raw", "nbytes": 22544384, "byteOffset": 8392704 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 30937088 } ], "md5sum": "0651a94bbc62480da5371fe0135bf0c9" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "3087ec531cc0cc6ca577838d0d5df17d" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "b0b4280f416929d3e4613d7bc09cc769" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.6.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.7.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "bfloat16", "format": "raw", "nbytes": 22544384, "byteOffset": 8392704 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 30937088 } ], "md5sum": "654d609b9b57316529730fb7f4c58058" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "fe4c1bc80331a13add80503e3f1f1111" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "5db17977fc55b17628f13dae5276f965" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.7.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.8.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "bfloat16", "format": "raw", "nbytes": 22544384, "byteOffset": 8392704 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 30937088 } ], "md5sum": "b8ad47013861a2dc1db3f1c8ec7cf4f8" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.weight", "shape": [ 11008, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "ef8c893026cd01061d89702ea0723343" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.weight", "shape": [ 6144, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "e474f1b1ee8ec09d6ab4113a0ce733f7" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 30941184, "records": [ { "name": "model.layers.8.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 8388608 }, { "name": "model.layers.9.mlp.down_proj.weight", "shape": [ 2048, 5504 ], "dtype": "bfloat16", "format": "raw", "nbytes": 22544384, "byteOffset": 8392704 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4096, "byteOffset": 30937088 } ], "md5sum": "168c3e8c611de7da4d3297d67b37195a" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 8388608, "records": [ { "name": "model.layers.9.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8388608, "byteOffset": 0 } ], "md5sum": "477b07f89a7376837d76bcb3e288a0c0" } ] }