{ "metadata": { "ParamSize": 563, "ParamBytes": 145412407296.0, "BitsPerParam": 14.781631589720977 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 2491416576, "records": [ { "name": "lm_head.weight", "shape": [ 152064, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2491416576, "byteOffset": 0 } ], "md5sum": "14d764c6880f37a4807f92ea485f1a0e" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.79.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "441b344007cfae1cc7a32ea0a79dc419" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.79.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "7b7a89e70c024ea2e26f64a5785d1cfa" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 2491416576, "records": [ { "name": "model.embed_tokens.weight", "shape": [ 152064, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2491416576, "byteOffset": 0 } ], "md5sum": "aefbe69dc0f57bd8324e1c5ca3f43073" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "40721ea9c9a79bb0090c2a6b5b66cbbc" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.0.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "b256902d4296c6c0d56dafa2f4adf635" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.0.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "487b5576f42b97ae884b7b746a2d001d" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.0.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "f1f1f2889f0b615f9dfa87b771a31be7" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.1.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "bf4f78c5a6e9cba9a331dfc566538229" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "db47751125f5aa7ca9e888c96b977db3" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.1.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "1a275bfee89f3041a2c1f312d0902d1e" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.1.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "7d9033afe8a6ff6238c8deee063d934a" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.2.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "d7e87e75e89fad2a209526d8d940ef7e" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "d3e92a3a6578bc1c7ca3778ca4b962a7" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.2.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "799989f1cbb1c57163c4ff3cee2cd01d" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.2.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "7193dadbeca208e6f11a06c150f40f97" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.10.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "7a5c7593e8a881e3eb33c3c5c660abbb" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "68f75010c93376ce5b1a71144282031a" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.10.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "f2d8cd64aa358477b0f5d0792eedc0d8" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.10.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "c163c49afb03f314e7794bdc14893ac3" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.11.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "a32841d5d965a60d150bc8a6020ba128" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "0ae3db3dbc1ed68c8d7766d33e981144" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.11.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "65fec1ceaf1030773b8b4e4bd35a7974" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.11.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "18e907de8da94a592f736363bd20aa10" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.9.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "57cd05f0f7c10ffb52a6f11144c07152" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.12.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "c3b0f0a6d04e9f761214a5f8240ddb84" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "c5e36035ce58323f5c464ff1200fa90e" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.12.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "64bdaccb09a42a0e5309e1040e51411d" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.12.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "c670620d4eab25da823f4a922d064960" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.13.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "cb9c06105bf63e1a635cc143c764bda8" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "9ca545c992c80794b507c809a6331a78" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.13.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "a8184ce1581a6bb02ceee984b670f753" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.13.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "db97ec76e22579356c650ed09cc5cb3e" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.14.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "e4201b82e1a12a26909f7fab4be8e8fa" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.14.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "e61acc3174ea23178160dc8d6ee567a0" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.14.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "eb5ce4cac98ac90ca3375bdc900328f4" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "5d478b689190a83fa95f2484a6a59023" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.15.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "8b4f894e6576e8aec4ccf240a6c01eec" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "fd72205d816d25f9214de923861dde09" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.15.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "8bd218519edcfa6abc16150969dd5cfd" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.15.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "ff93068515d6e3772bf35fb9d12d0ce1" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "1a83d58d28c49ea4a54160eed491d904" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.16.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "9e528024ebf20fb96ff09f0d8e9f79aa" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.16.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "132e5aebabfa6cbfce207f2170024aab" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.16.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "9e4279bf0cc6b180be6b3a50eaf0e5b4" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.17.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "4d47ce0359e63e46c8ca2b800ddd2c2c" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "025372a3fc64084224e78b8971023db1" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.17.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "0169b4981c09f7a1c5876ee9f7cfc16d" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.17.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "194cc23b83b3d2004a47df8b86079021" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "cba6b11968ceec995a9700300dde2dd9" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.18.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "b62ef0bd01a348702c8068fb56a30c66" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.18.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "b92bdefa87f00b17756bb705a45c08fd" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.18.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "3bb82c743e4006de80a61134b412b67e" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.19.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "b44056b808af9a25a1e5a0552e4da49b" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "89c4ddf8765e63a81f3dfc8b0378a19a" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.19.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "ab38f7131fcba7c71a1113dd2929c569" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.19.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "3b562c82bee44ad16decd208ac54ea06" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.20.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "3a2bd6b879ea26f4dd8806287dfe287d" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "079f7c8a85f1598a1e8c4c07e88bcb2f" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.20.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "a1afc8cf6ff6b6527d5b53ca6f867e9c" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.20.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "15c280051676aa26b6db21d524e1e7b1" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.21.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "ea2fca425eef7a596504554f6a4334aa" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "bc695080349fafcc716831da6712fd8c" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.21.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "1349255ba0c0b984958ee6d6dbade040" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.21.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "9e4262c74e2bf2b83a4bd1e468ed98fd" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.22.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "98bb1efa08bd9a94c3a85f7409b1a186" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "75498c2a02c48a78d95b254e9ca2fd71" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.22.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "cd055c96c6b4693834dbaec5eab1bfdf" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.22.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "652ac8767019ca77c3e546c163fe166e" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.23.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "f3adb3762d53dc38dc37a21c6ceb7252" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.23.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "53aed4d0d7005b6d0215f12905c32c56" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.23.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "bfa9bb343c1eedf20ff96d2b686fe68e" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "921f525b50c896efff58ca8012685243" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.24.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "fc481bd6da15a89c0411c8c032567516" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "6ec050ee239fd2731fb1646fe5beff05" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.24.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "8063c2b53c18d8ffffa7f3c5d8601905" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.24.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "0dd95fdb5599781023809e564f3ba484" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "a5cfb860d13b151899700469a034e96f" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.25.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "b040aaddcb8aadbc51c39316c4d488b1" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.25.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "99372d83a936cd2c7bc92e7f621cb870" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.25.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "f454fc23f1ddd6d1523082fa0dd3481b" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.26.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "b36d03367395d7804f1384a714ae1a54" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "25a6ea55f094c2b79deda77f0bd3c593" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.26.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "8f6fcfca11fef010432be08bbf4cec21" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.26.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "f8db7877f937927dccb41ec63268347e" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "a2852e725eadf7d149f91db349f2f8c4" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.27.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "c05d9c3b753347fac5c0d429ef8edb34" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.27.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "e5111f2d04cd28ca1e4af1715d27bb14" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.27.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "d12f5f89621a2a8887d6fe942714d8ac" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.28.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "96b6fa68a0a4f66b1459564aa3bf9107" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "a1dfd460cd65ab606b27fcfbbe0f1634" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.28.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "ca352a9951fdce8c5980609db3f96748" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.28.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "02ee30d1164ed2af1dd489a7838abfbc" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.29.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "b33213fcffa050a78c7acf4d6a46616d" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "acd58b41a335f547d77f05b4c3f5b757" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.29.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "b5bea61014f64c65c61b9af230a8eb7c" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.29.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "0b119b2da004d97e10fbf0910ed10667" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.3.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "1f8ce9c48ece8b6d2cd859adaf88e966" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "008beb17c4c3537796064397bc6f1659" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.3.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "6350ed5778c75d61394a757dc91d89b7" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.3.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "c109678fa93f166130778f3d6a9af0ad" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.4.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "5f8e8ddc69af67fde2da19255e235372" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "045fbedec95d362437c86f5f3b485972" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.4.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "509b29143ccc80e43b7035403489fad0" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.4.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "c30eee056a617f00564add6a6a1eac34" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.5.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "142daa467f0c8a1e742da827a42e3586" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.5.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "f5b73c43d5f0ac4e9c58d4dea18bb3bc" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.30.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "3e74e542ba76bb0e9e6e9d44ae1de074" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "7f614ee9b93e54ddebab81490cb8d037" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.30.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "e4b2c15c8e24ad4d489922e8377cdaa7" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.30.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "8a510bd5c2ec37b3cadb8bd7d9e3b9aa" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.31.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "6409d1380d8a5ffcdf1e693d60917b4d" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "e1b84c5c42af4bb9e799f932a2707027" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.31.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "cebc1c6ab31ed62dcf2c4eab343ee800" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.31.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "5bdc51f19454d40b937eb6c2a1dc2a40" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.32.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "9e6ed9f2f638f1c99ec6414761786959" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.32.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "2f8723c896c8d95708c21b2df005143c" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.32.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "c57abd3d616bdb18f2c5165bfd45b839" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "eb601188732a9ea8e312a27ba72c299b" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.33.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "92c5e3f6d9208b681bf0603105601927" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "785cca45a5b645bd7d64cbfd5a9b50bd" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.33.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "57c92b2557b110fdb04947885018d346" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.33.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "2020499cf53369c502dfc406384f445c" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "60ea24e60fa04ee175b9958ebb0fe1e0" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.34.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "8c6a540473f148700fdb02799ecbce30" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.34.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "72decbd3a8fd416fe2a7a0ef2dc8625d" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.34.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "7a4caf8a96fa94a35bc59bf595718bae" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.35.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "c54e8c9c04d8a9bb335bfbc7f66214f7" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "1ccc07672e5766ec868f44613a0c21e6" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.35.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "9aa69ea005434b087cd96d9254a5f1da" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.35.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "96e0582f58b09ef58573230964d037ba" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "94f71eba8914fb3667bceec5ad8638ed" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.36.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "cd7c2b7606db103593ddb6353bc083f0" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.36.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "474b39ea880fd31e0125743a890db2bd" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.36.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "b6fdb53fd852536d22f0bd76d0e6dbf3" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.37.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "c124c1933e5e9ee8657a02fe855daa21" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "5402ee372bdc7d92273d4bf78c201fe4" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.37.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "22ea62a2fb547263322bfdb67f9b26d7" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.37.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "9f4bc08b80c89c6aec1abb8ab80b7531" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.38.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "0c6e23331edb94cc1303b1fb9e1d66b5" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "01be5ed85d204b36c6e050ddf89a4e2f" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.38.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "c1c5184ea85e8e5ee39d2dbc7df7d4e0" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.38.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "486d29a91dbcc66e1ae62de10d8c5edc" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.39.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "6386849e20e1841eed2f7a0246a0cc61" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "157571065a1cce9516c99b9b246afc44" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.39.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "a70ed87d945e277a46a605a7900a232c" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.39.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "28f0232651334302a6743e6f785ae2be" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.40.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "d1d9a281d98ef76aeaa3f538f8f13e64" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "15b0a0615b66fc294ebb824c7410474e" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.40.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "a0e8e3b464a7fac3d3fbbb2a2dd40ab7" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.40.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "9e2797019107e5117fa92161d1449f5f" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.41.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "c2708ea86553bc47a457cb2a6d9e2eb5" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.41.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "279c3573fe4bac04ff53a3b12418d4d6" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.41.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "30bd5514d4c950be3fa7080c330bd09a" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "97047d94415c2aedca46f82db0b6ffe3" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.42.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "8a9ccf558348009a94b1855887b41436" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "7b73e0b6791ad05dfcd6cd22c1c63952" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.42.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "c84a0abf708eda36c8d3450e91eb68ea" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.42.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "6ecf266855ad22708713ae8c9f7e6b08" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "c415bf6608d116bf14925e86cc2abbd6" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.43.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "11eb93e1055e7acdc24b8e27ae1e6b4b" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.43.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "de1c354c59452fe09c4319e83914837b" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.43.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "61040cb8aab7ad1b402100a1c3e465fc" }, { "dataPath": "params_shard_163.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.44.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "977e88dc7c790c13b7483c7c869fc459" }, { "dataPath": "params_shard_164.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "0b3761ecd918f341a7a686e8a9925d1a" }, { "dataPath": "params_shard_165.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.44.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "7139026bc4fa834fc4c9ebf81f3c54d0" }, { "dataPath": "params_shard_166.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.44.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "d150186dc490cdf0c3c24824a45a8ec0" }, { "dataPath": "params_shard_167.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "e46a1927c3cc6773df178737d4d6a33d" }, { "dataPath": "params_shard_168.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.45.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "282ee3bc94c03a78895cb703719b0523" }, { "dataPath": "params_shard_169.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.45.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "83eda8496e49a3675388060df1c5db60" }, { "dataPath": "params_shard_170.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.45.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "c86ae162ed3e077e5b64c6d41af5719c" }, { "dataPath": "params_shard_171.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.46.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "b9a3c4463bb23e29ab83450c886a58d4" }, { "dataPath": "params_shard_172.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "849738915fb1608f121e8d3009f680c9" }, { "dataPath": "params_shard_173.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.46.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "58b01846b9912eed18cf19d8901dca06" }, { "dataPath": "params_shard_174.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.46.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "ef34456a5e8077b7daffd5c28ae5fde3" }, { "dataPath": "params_shard_175.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.47.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "35d1a75564274fd8ae95c585a8f6f1ba" }, { "dataPath": "params_shard_176.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "e5a6e065d5e0c243dad00c5a0de71785" }, { "dataPath": "params_shard_177.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.47.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "2d87b238e00cf823bf584da1583b9e5d" }, { "dataPath": "params_shard_178.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.47.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "4357148993d4bcc6978346cf0beb16a6" }, { "dataPath": "params_shard_179.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.48.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "040cd0dfb7198f114e88c34a8cc23651" }, { "dataPath": "params_shard_180.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.48.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "9bbd91d4f7f66f5c0feadd0a514b42bd" }, { "dataPath": "params_shard_181.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.48.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "e4169e3196c13ee62337e6ffb87aa6b1" }, { "dataPath": "params_shard_182.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.48.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "795f973cf306fa4b5d2ac0ec7d395646" }, { "dataPath": "params_shard_183.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.49.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "e4ce0e42c591195a61b5ab793a1ab1ed" }, { "dataPath": "params_shard_184.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.49.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "91288bc7cebccdd3937af7bed6e32780" }, { "dataPath": "params_shard_185.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.49.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "d2a2b3f5f3c60b815c3ddf0376121fb3" }, { "dataPath": "params_shard_186.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.49.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "bb1efaf9ed29d112ac43a8eb30de0dc5" }, { "dataPath": "params_shard_187.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.50.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "0c6296a65f99bc496c0c99f6a78cfa14" }, { "dataPath": "params_shard_188.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.50.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "630e34878ff9dff0c6c7b48eb7c634be" }, { "dataPath": "params_shard_189.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.5.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "5c84cb57b025bdbadd1347ba6fa786d5" }, { "dataPath": "params_shard_190.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "4aa8fbbef46ca7947c01c1bb4559e9ba" }, { "dataPath": "params_shard_191.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.6.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "040c8eeaed159adb3181d9a7990b7646" }, { "dataPath": "params_shard_192.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "73a29504631477ee0b8d6a95e8640f2c" }, { "dataPath": "params_shard_193.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.6.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "75674601b2f7c75e30d77460bb57a8dd" }, { "dataPath": "params_shard_194.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.6.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "558e85480c7405d67591303e26c58cf9" }, { "dataPath": "params_shard_195.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "0f55225640be2a94bba67f16288fb184" }, { "dataPath": "params_shard_196.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.7.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "ca3be6480cfef23ffacdce9faca7bb2b" }, { "dataPath": "params_shard_197.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.7.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "a46106737e7c80aae932309241a976bf" }, { "dataPath": "params_shard_198.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.50.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "fdd0d7cef5b653ca1fb0bc3982d22faa" }, { "dataPath": "params_shard_199.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.50.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "012b0c72ce7d4030ed47ca7b85abcb51" }, { "dataPath": "params_shard_200.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.51.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "a5beee559eb15e02599fca77be94ccb3" }, { "dataPath": "params_shard_201.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.51.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "bafd668c35ef41c22d2903bd75b9fe64" }, { "dataPath": "params_shard_202.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.51.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "12597232482b67c1381d24d9c2d4fe84" }, { "dataPath": "params_shard_203.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.51.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "78d498632b89826f9c6f7d9e583eb3f4" }, { "dataPath": "params_shard_204.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.52.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "e72bc6fd763bad0ad7b1b7c2d3964147" }, { "dataPath": "params_shard_205.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.52.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "5dd6ba10116117890be0b220c5825138" }, { "dataPath": "params_shard_206.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.52.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "61939eea2febd742a18842e4c27688ea" }, { "dataPath": "params_shard_207.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.52.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "2764fe72be7e02ca952c3a6f79568134" }, { "dataPath": "params_shard_208.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.53.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "629368a52d33f4331e69d046f31ac568" }, { "dataPath": "params_shard_209.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.53.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "40637132f17d5b60d3d6b95f5ff4f97b" }, { "dataPath": "params_shard_210.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.53.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "e7ebded1b80a50d74023cf6d8e262b4e" }, { "dataPath": "params_shard_211.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.53.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "e098f2cb89bfa07e015ba6ba63ea3e97" }, { "dataPath": "params_shard_212.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.54.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "bea0a1ba7d49dec859b9e36860c07e84" }, { "dataPath": "params_shard_213.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.54.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "3a7d3eb6990b2142dc61187fd9f96ab0" }, { "dataPath": "params_shard_214.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.54.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "96ff2a8b3e550dda18fb82b422e6aa21" }, { "dataPath": "params_shard_215.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.54.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "51d0140800e71be2d35ff6589028016c" }, { "dataPath": "params_shard_216.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.55.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "23c4278bfe70e801996e42b589c7479e" }, { "dataPath": "params_shard_217.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "a7985ff4dabe165f35d20a49b9ffb1a4" }, { "dataPath": "params_shard_218.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.55.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "8370e0e95e5051501c8145300f2cc045" }, { "dataPath": "params_shard_219.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.55.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "40f3a44befeb28623daf696e05ef849b" }, { "dataPath": "params_shard_220.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.56.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "3f5b2d138c86ca7ba5812573c547859f" }, { "dataPath": "params_shard_221.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.56.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "1093ad29b0d016a23f0dca073492d906" }, { "dataPath": "params_shard_222.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.56.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "a0553d95161571af7a4fa858d07e86de" }, { "dataPath": "params_shard_223.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.56.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "69eda2d395025b29395e9146d5910bad" }, { "dataPath": "params_shard_224.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.57.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "f422b63d213b7b3917afc3009d0ceb3e" }, { "dataPath": "params_shard_225.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.57.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "45640c943876aa0a62c90c92b5e3611d" }, { "dataPath": "params_shard_226.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.57.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "8029f94384c2cc108a8120c753339535" }, { "dataPath": "params_shard_227.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.57.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "13df791227553e0f4c7e4a09dd591170" }, { "dataPath": "params_shard_228.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.58.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "ce8700312286e1db2758aba0f069ff87" }, { "dataPath": "params_shard_229.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.58.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "d9b31e383af659953619baa6f623920e" }, { "dataPath": "params_shard_230.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.58.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "ad1e9605909da056842007624baf60f4" }, { "dataPath": "params_shard_231.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.58.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "aaf09649cabe44126b674c13dfdb81a2" }, { "dataPath": "params_shard_232.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.59.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "0f577151ef008177df8defbb898d51e9" }, { "dataPath": "params_shard_233.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.59.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "b916888e1eb4aaf8fc81fb8ff6ae8842" }, { "dataPath": "params_shard_234.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.59.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "35c9e0e270299f02dd539147c00a1213" }, { "dataPath": "params_shard_235.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.59.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "e124f0a901b7ea882df2950c8f3c09eb" }, { "dataPath": "params_shard_236.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.60.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "47e2cbc05ad6ef462627cdb9cf415441" }, { "dataPath": "params_shard_237.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.60.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "041ffc519cbe5e212a53237cfb94e27b" }, { "dataPath": "params_shard_238.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.60.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "5d8e5b0fc546ac52c83fc8c479dcfbba" }, { "dataPath": "params_shard_239.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.60.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "0f6a1c0b2dac63d598fe2ae998cfd1a4" }, { "dataPath": "params_shard_240.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.61.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "79356200fb92e1fa5c1d10fdb603cabd" }, { "dataPath": "params_shard_241.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.61.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "fe886dc2743c38c54d9c171c68aae813" }, { "dataPath": "params_shard_242.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.61.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "9d133b0f8c5fb6e2d3d609a6b087a330" }, { "dataPath": "params_shard_243.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.61.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "67032fd0d1b0bcf51fb57947c837f672" }, { "dataPath": "params_shard_244.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.62.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "ac5ed35423565f631323cc2ad54cf9f0" }, { "dataPath": "params_shard_245.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.62.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "2befb410c0ebaff86885809150e756f5" }, { "dataPath": "params_shard_246.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.62.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "92c6be51dbe25dbccc72d9f1a2d17cb3" }, { "dataPath": "params_shard_247.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.62.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "e5eed548e9a1ab136f2895179b5fc4dd" }, { "dataPath": "params_shard_248.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.63.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "c3979bc159198f9d88267153b19be3ad" }, { "dataPath": "params_shard_249.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.63.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "8bfd01cb30e0cda175751f3636475816" }, { "dataPath": "params_shard_250.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.63.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "67794bf1887ec0c421d6ab2ddcee07c1" }, { "dataPath": "params_shard_251.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.63.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "8b60f4aac7d2fb9be0c0187b4503629e" }, { "dataPath": "params_shard_252.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.64.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "66749ba879e1d4b6c773f1863fd6981b" }, { "dataPath": "params_shard_253.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.64.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "31720a5203beef093c5f809c0f75cce7" }, { "dataPath": "params_shard_254.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.64.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "ad32487b33ee37791c5b57aa551173ae" }, { "dataPath": "params_shard_255.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.64.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "00d66f0ca8f64c8a134e79a9bf437579" }, { "dataPath": "params_shard_256.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.65.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "d32cc3516fc7b5722fdf97ff09101f62" }, { "dataPath": "params_shard_257.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.65.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "436d41a7b46dd50ac8852f9e2c810495" }, { "dataPath": "params_shard_258.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.65.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "cfa538217114031e9e0bbfa55118794c" }, { "dataPath": "params_shard_259.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.65.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "0f748fa82303296fb44fd701b8e13bfa" }, { "dataPath": "params_shard_260.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.66.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "cc3de7f22c7c67d1aff58288b6466bd1" }, { "dataPath": "params_shard_261.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.66.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "d6adb1e0267946052a0419c296afd16e" }, { "dataPath": "params_shard_262.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.66.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "5ec377eb8d9ef269a3760a0e9554390d" }, { "dataPath": "params_shard_263.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.66.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "fda50be1b13c28bbafade02832f0c39b" }, { "dataPath": "params_shard_264.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.67.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "a233d6ed653cbd8e6f1f006c0b9d2ca6" }, { "dataPath": "params_shard_265.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.67.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "3c3f2a11d829bcb0cfc1b404254f6d41" }, { "dataPath": "params_shard_266.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.67.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "11a5b211b7783d43f1da68651025e865" }, { "dataPath": "params_shard_267.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.67.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "175b81fe6e5bf2b3a4028679f20983fc" }, { "dataPath": "params_shard_268.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.68.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "5897015191d6b8a25030008f37f8a5ae" }, { "dataPath": "params_shard_269.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.68.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "2211b4a219ffbac8736eea25a1feb9b4" }, { "dataPath": "params_shard_270.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.68.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "145052cc917add033ea0326c7a6573cb" }, { "dataPath": "params_shard_271.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.68.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "35da03e83e25862bebe87def06dae5ca" }, { "dataPath": "params_shard_272.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.69.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "c122ea194fb79fad0b081548a0ca2748" }, { "dataPath": "params_shard_273.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.69.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "8e37c567a8f4ad62570aa9639a26185a" }, { "dataPath": "params_shard_274.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.69.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "4854f115fbf4a41fdfae599f836413fc" }, { "dataPath": "params_shard_275.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.69.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "2cbf61213347aad46aa29d9bec1d7ad8" }, { "dataPath": "params_shard_276.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.70.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "4c545740f6d988b91e221fae3d4fca30" }, { "dataPath": "params_shard_277.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.70.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "678df97bda283b8980d849d4b720c14d" }, { "dataPath": "params_shard_278.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.70.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "45e5a7b2a7a9d50c858adb3ab778adec" }, { "dataPath": "params_shard_279.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.7.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "d0968f671d3989bf89dc76724d512b7c" }, { "dataPath": "params_shard_280.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.8.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "420800e88f07272533604317cf8ede24" }, { "dataPath": "params_shard_281.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "e6b21c1881387e0d5e360dd59c07c2ac" }, { "dataPath": "params_shard_282.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.8.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "0ebe7e26bfef6ada42dea79aef4547e9" }, { "dataPath": "params_shard_283.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.8.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "56c76258d8b84d77cf91c9591e4b4e65" }, { "dataPath": "params_shard_284.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "504e229f44f2f6153735f5ea8d2cc882" }, { "dataPath": "params_shard_285.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.9.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "f58c8a3f112678c000139e730c15067c" }, { "dataPath": "params_shard_286.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.9.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "993df2f5fc72ffeccd3ea3e5ef9b71dc" }, { "dataPath": "params_shard_287.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.70.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "effb6fccddfcb6e87d601a1024de0167" }, { "dataPath": "params_shard_288.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.71.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "1ffea0abd085e3442fb39e16d0ae3f61" }, { "dataPath": "params_shard_289.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.71.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "d8c37ba1020bbbdc07392e57ed3c0dcb" }, { "dataPath": "params_shard_290.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.71.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "e5aca5d62a9b0b950d547d412aed573b" }, { "dataPath": "params_shard_291.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.71.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "357a4b2a0d21fe05b4b77f5f3b7fe749" }, { "dataPath": "params_shard_292.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.72.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "a3d9b956a240e29e5c2b86911c403cd7" }, { "dataPath": "params_shard_293.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.72.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "faa4f6e0604bb55cc7970b4969bd68bc" }, { "dataPath": "params_shard_294.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.72.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "8d558fb0ed587151b01da686cb9d9609" }, { "dataPath": "params_shard_295.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.72.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "2bf2f09107316a62934b350247b226eb" }, { "dataPath": "params_shard_296.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.73.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "2894e75e0b4c2176c2f12b825199b4e3" }, { "dataPath": "params_shard_297.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.73.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "7bc9d667345f8466c94d6b0c4dacad0e" }, { "dataPath": "params_shard_298.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.73.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "a798b5ec49d5450fd59de46c4865e80e" }, { "dataPath": "params_shard_299.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.73.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "9ed0a40534bfe95c7df6f51743235011" }, { "dataPath": "params_shard_300.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.74.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "71cfa989d52e360ad9eb9f542a581d12" }, { "dataPath": "params_shard_301.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.74.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "7bd52bdd0be3782d3bc9f61c13aa8af5" }, { "dataPath": "params_shard_302.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.74.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "5718e80aecf28aff0dccee32fd524c89" }, { "dataPath": "params_shard_303.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.74.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "74259eb64475640c76a225325f709ac2" }, { "dataPath": "params_shard_304.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.75.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "47f597260cb8cf87938609d29e50f33a" }, { "dataPath": "params_shard_305.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.75.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "d4052ce4027b6cc855da1cf26194822a" }, { "dataPath": "params_shard_306.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.75.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "8a174e0ddc50be397cf2dd7920274377" }, { "dataPath": "params_shard_307.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.75.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "bab1e89a756821531516823465a8e000" }, { "dataPath": "params_shard_308.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.76.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "f025bbcfd419c136691e6c48dbbee363" }, { "dataPath": "params_shard_309.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.76.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "6f3f5a5e78d636d7c0041ad66dc74323" }, { "dataPath": "params_shard_310.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.76.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "b32bff96c87bc06494faade95c84ca46" }, { "dataPath": "params_shard_311.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.76.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "b65fe0476e21f058832bc705e3da83c8" }, { "dataPath": "params_shard_312.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.77.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "0809a8ad01771e7dbb4da3f1aca5f962" }, { "dataPath": "params_shard_313.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.77.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "f858ac75e76a1ccf7886cf637e67e821" }, { "dataPath": "params_shard_314.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.77.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "e4d493e762ce1797daa3eccef5e8da26" }, { "dataPath": "params_shard_315.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.77.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "8d48cfb5e9e25ae7f3e8fd92c5ffa954" }, { "dataPath": "params_shard_316.bin", "format": "raw-shard", "nbytes": 484442112, "records": [ { "name": "model.layers.78.mlp.down_proj.weight", "shape": [ 8192, 29568 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 484442112, "byteOffset": 0 } ], "md5sum": "ce474355f278fecb4dace8910ccfe600" }, { "dataPath": "params_shard_317.bin", "format": "raw-shard", "nbytes": 968884224, "records": [ { "name": "model.layers.78.mlp.gate_up_proj.weight", "shape": [ 59136, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 968884224, "byteOffset": 0 } ], "md5sum": "7eb2d8ce567f538341d00f3dcae34000" }, { "dataPath": "params_shard_318.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.78.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "d15e319b93808e2212cdddbf58ebdeff" }, { "dataPath": "params_shard_319.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.78.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "96a450e8aba8aab584d5af51ecc54a71" }, { "dataPath": "params_shard_320.bin", "format": "raw-shard", "nbytes": 167772160, "records": [ { "name": "model.layers.79.self_attn.c_attn.weight", "shape": [ 10240, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 167772160, "byteOffset": 0 } ], "md5sum": "e3e74b670f9e4b2ba8cb4a67d91630f3" }, { "dataPath": "params_shard_321.bin", "format": "raw-shard", "nbytes": 134217728, "records": [ { "name": "model.layers.79.self_attn.o_proj.weight", "shape": [ 8192, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 134217728, "byteOffset": 0 } ], "md5sum": "9befbb1fd452790768d92b7c305caec7" }, { "dataPath": "params_shard_322.bin", "format": "raw-shard", "nbytes": 4276224, "records": [ { "name": "model.norm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 0 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 16384 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 32768 }, { "name": "model.layers.0.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 49152 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 69632 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 86016 }, { "name": "model.layers.1.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 102400 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 122880 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 139264 }, { "name": "model.layers.2.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 155648 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 176128 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 192512 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 208896 }, { "name": "model.layers.10.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 225280 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 245760 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 262144 }, { "name": "model.layers.11.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 278528 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 299008 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 315392 }, { "name": "model.layers.12.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 331776 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 352256 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 368640 }, { "name": "model.layers.13.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 385024 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 405504 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 421888 }, { "name": "model.layers.14.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 438272 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 458752 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 475136 }, { "name": "model.layers.15.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 491520 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 512000 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 528384 }, { "name": "model.layers.16.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 544768 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 565248 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 581632 }, { "name": "model.layers.17.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 598016 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 618496 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 634880 }, { "name": "model.layers.18.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 651264 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 671744 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 688128 }, { "name": "model.layers.19.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 704512 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 724992 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 741376 }, { "name": "model.layers.20.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 757760 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 778240 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 794624 }, { "name": "model.layers.21.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 811008 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 831488 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 847872 }, { "name": "model.layers.22.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 864256 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 884736 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 901120 }, { "name": "model.layers.23.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 917504 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 937984 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 954368 }, { "name": "model.layers.24.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 970752 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 991232 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1007616 }, { "name": "model.layers.25.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 1024000 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1044480 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1060864 }, { "name": "model.layers.26.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 1077248 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1097728 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1114112 }, { "name": "model.layers.27.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 1130496 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1150976 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1167360 }, { "name": "model.layers.28.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 1183744 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1204224 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1220608 }, { "name": "model.layers.29.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 1236992 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1257472 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1273856 }, { "name": "model.layers.3.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 1290240 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1310720 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1327104 }, { "name": "model.layers.4.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 1343488 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1363968 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1380352 }, { "name": "model.layers.5.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 1396736 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1417216 }, { "name": "model.layers.30.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 1433600 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1454080 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1470464 }, { "name": "model.layers.31.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 1486848 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1507328 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1523712 }, { "name": "model.layers.32.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 1540096 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1560576 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1576960 }, { "name": "model.layers.33.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 1593344 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1613824 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1630208 }, { "name": "model.layers.34.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 1646592 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1667072 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1683456 }, { "name": "model.layers.35.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 1699840 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1720320 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1736704 }, { "name": "model.layers.36.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 1753088 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1773568 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1789952 }, { "name": "model.layers.37.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 1806336 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1826816 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1843200 }, { "name": "model.layers.38.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 1859584 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1880064 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1896448 }, { "name": "model.layers.39.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 1912832 }, { "name": "model.layers.40.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1933312 }, { "name": "model.layers.40.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1949696 }, { "name": "model.layers.40.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 1966080 }, { "name": "model.layers.41.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1986560 }, { "name": "model.layers.41.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2002944 }, { "name": "model.layers.41.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 2019328 }, { "name": "model.layers.42.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2039808 }, { "name": "model.layers.42.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2056192 }, { "name": "model.layers.42.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 2072576 }, { "name": "model.layers.43.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2093056 }, { "name": "model.layers.43.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2109440 }, { "name": "model.layers.43.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 2125824 }, { "name": "model.layers.44.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2146304 }, { "name": "model.layers.44.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2162688 }, { "name": "model.layers.44.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 2179072 }, { "name": "model.layers.45.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2199552 }, { "name": "model.layers.45.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2215936 }, { "name": "model.layers.45.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 2232320 }, { "name": "model.layers.46.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2252800 }, { "name": "model.layers.46.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2269184 }, { "name": "model.layers.46.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 2285568 }, { "name": "model.layers.47.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2306048 }, { "name": "model.layers.47.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2322432 }, { "name": "model.layers.47.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 2338816 }, { "name": "model.layers.48.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2359296 }, { "name": "model.layers.48.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2375680 }, { "name": "model.layers.48.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 2392064 }, { "name": "model.layers.49.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2412544 }, { "name": "model.layers.49.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2428928 }, { "name": "model.layers.49.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 2445312 }, { "name": "model.layers.50.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2465792 }, { "name": "model.layers.50.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2482176 }, { "name": "model.layers.50.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 2498560 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2519040 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2535424 }, { "name": "model.layers.6.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 2551808 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2572288 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2588672 }, { "name": "model.layers.7.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 2605056 }, { "name": "model.layers.51.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2625536 }, { "name": "model.layers.51.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2641920 }, { "name": "model.layers.51.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 2658304 }, { "name": "model.layers.52.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2678784 }, { "name": "model.layers.52.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2695168 }, { "name": "model.layers.52.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 2711552 }, { "name": "model.layers.53.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2732032 }, { "name": "model.layers.53.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2748416 }, { "name": "model.layers.53.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 2764800 }, { "name": "model.layers.54.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2785280 }, { "name": "model.layers.54.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2801664 }, { "name": "model.layers.54.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 2818048 }, { "name": "model.layers.55.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2838528 }, { "name": "model.layers.55.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2854912 }, { "name": "model.layers.55.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 2871296 }, { "name": "model.layers.56.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2891776 }, { "name": "model.layers.56.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2908160 }, { "name": "model.layers.56.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 2924544 }, { "name": "model.layers.57.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2945024 }, { "name": "model.layers.57.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2961408 }, { "name": "model.layers.57.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 2977792 }, { "name": "model.layers.58.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2998272 }, { "name": "model.layers.58.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3014656 }, { "name": "model.layers.58.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 3031040 }, { "name": "model.layers.59.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3051520 }, { "name": "model.layers.59.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3067904 }, { "name": "model.layers.59.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 3084288 }, { "name": "model.layers.60.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3104768 }, { "name": "model.layers.60.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3121152 }, { "name": "model.layers.60.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 3137536 }, { "name": "model.layers.61.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3158016 }, { "name": "model.layers.61.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3174400 }, { "name": "model.layers.61.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 3190784 }, { "name": "model.layers.62.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3211264 }, { "name": "model.layers.62.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3227648 }, { "name": "model.layers.62.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 3244032 }, { "name": "model.layers.63.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3264512 }, { "name": "model.layers.63.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3280896 }, { "name": "model.layers.63.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 3297280 }, { "name": "model.layers.64.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3317760 }, { "name": "model.layers.64.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3334144 }, { "name": "model.layers.64.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 3350528 }, { "name": "model.layers.65.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3371008 }, { "name": "model.layers.65.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3387392 }, { "name": "model.layers.65.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 3403776 }, { "name": "model.layers.66.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3424256 }, { "name": "model.layers.66.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3440640 }, { "name": "model.layers.66.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 3457024 }, { "name": "model.layers.67.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3477504 }, { "name": "model.layers.67.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3493888 }, { "name": "model.layers.67.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 3510272 }, { "name": "model.layers.68.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3530752 }, { "name": "model.layers.68.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3547136 }, { "name": "model.layers.68.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 3563520 }, { "name": "model.layers.69.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3584000 }, { "name": "model.layers.69.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3600384 }, { "name": "model.layers.69.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 3616768 }, { "name": "model.layers.70.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3637248 }, { "name": "model.layers.70.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3653632 }, { "name": "model.layers.70.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 3670016 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3690496 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3706880 }, { "name": "model.layers.8.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 3723264 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3743744 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3760128 }, { "name": "model.layers.9.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 3776512 }, { "name": "model.layers.71.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3796992 }, { "name": "model.layers.71.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3813376 }, { "name": "model.layers.71.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 3829760 }, { "name": "model.layers.72.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3850240 }, { "name": "model.layers.72.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3866624 }, { "name": "model.layers.72.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 3883008 }, { "name": "model.layers.73.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3903488 }, { "name": "model.layers.73.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3919872 }, { "name": "model.layers.73.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 3936256 }, { "name": "model.layers.74.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3956736 }, { "name": "model.layers.74.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3973120 }, { "name": "model.layers.74.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 3989504 }, { "name": "model.layers.75.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4009984 }, { "name": "model.layers.75.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4026368 }, { "name": "model.layers.75.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 4042752 }, { "name": "model.layers.76.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4063232 }, { "name": "model.layers.76.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4079616 }, { "name": "model.layers.76.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 4096000 }, { "name": "model.layers.77.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4116480 }, { "name": "model.layers.77.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4132864 }, { "name": "model.layers.77.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 4149248 }, { "name": "model.layers.78.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4169728 }, { "name": "model.layers.78.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4186112 }, { "name": "model.layers.78.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 4202496 }, { "name": "model.layers.79.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4222976 }, { "name": "model.layers.79.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 4239360 }, { "name": "model.layers.79.self_attn.c_attn.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 4255744 } ], "md5sum": "0f7202328665846034cab834f1c75d8b" } ] }