{ "metadata": { "ParamSize": 293, "ParamBytes": 812572672.0, "BitsPerParam": 16.0 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 102926336, "records": [ { "name": "lm_head.weight", "shape": [ 50257, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 102926336, "byteOffset": 0 } ], "md5sum": "eec32f50280ffcd1a236c7e7dbd83cb6" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 102926336, "records": [ { "name": "transformer.wte.weight", "shape": [ 50257, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 102926336, "byteOffset": 0 } ], "md5sum": "eec32f50280ffcd1a236c7e7dbd83cb6" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 27293696, "records": [ { "name": "transformer.wpe.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 0 }, { "name": "transformer.h.0.ln_1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 2097152 }, { "name": "transformer.h.0.ln_1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 2099200 }, { "name": "transformer.h.0.attn.c_attn.weight", "shape": [ 3072, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 2101248 }, { "name": "transformer.h.0.attn.c_attn.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 8392704 }, { "name": "transformer.h.0.attn.c_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 8398848 }, { "name": "transformer.h.0.attn.c_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 10496000 }, { "name": "transformer.h.0.ln_2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 10498048 }, { "name": "transformer.h.0.ln_2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 10500096 }, { "name": "transformer.h.0.mlp.c_fc.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 10502144 }, { "name": "transformer.h.0.mlp.c_fc.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18890752 }, { "name": "transformer.h.0.mlp.c_proj.weight", "shape": [ 1024, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 18898944 }, { "name": "transformer.h.0.mlp.c_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 27287552 }, { "name": "transformer.h.1.ln_1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 27289600 }, { "name": "transformer.h.1.ln_1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 27291648 } ], "md5sum": "112a664a1110ab79703ac38f87b1fe8f" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 31490048, "records": [ { "name": "transformer.h.1.attn.c_attn.weight", "shape": [ 3072, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 0 }, { "name": "transformer.h.1.attn.c_attn.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 6291456 }, { "name": "transformer.h.1.attn.c_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 6297600 }, { "name": "transformer.h.1.attn.c_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 8394752 }, { "name": "transformer.h.1.ln_2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 8396800 }, { "name": "transformer.h.1.ln_2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 8398848 }, { "name": "transformer.h.1.mlp.c_fc.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 8400896 }, { "name": "transformer.h.1.mlp.c_fc.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16789504 }, { "name": "transformer.h.1.mlp.c_proj.weight", "shape": [ 1024, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 16797696 }, { "name": "transformer.h.1.mlp.c_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25186304 }, { "name": "transformer.h.2.ln_1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25188352 }, { "name": "transformer.h.2.ln_1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25190400 }, { "name": "transformer.h.2.attn.c_attn.weight", "shape": [ 3072, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 25192448 }, { "name": "transformer.h.2.attn.c_attn.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31483904 } ], "md5sum": "c8425e929b9cf76cb3643623b218aae3" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 27295744, "records": [ { "name": "transformer.h.2.attn.c_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 0 }, { "name": "transformer.h.2.attn.c_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 2097152 }, { "name": "transformer.h.2.ln_2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 2099200 }, { "name": "transformer.h.2.ln_2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 2101248 }, { "name": "transformer.h.2.mlp.c_fc.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 2103296 }, { "name": "transformer.h.2.mlp.c_fc.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10491904 }, { "name": "transformer.h.2.mlp.c_proj.weight", "shape": [ 1024, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 10500096 }, { "name": "transformer.h.2.mlp.c_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 18888704 }, { "name": "transformer.h.3.ln_1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 18890752 }, { "name": "transformer.h.3.ln_1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 18892800 }, { "name": "transformer.h.3.attn.c_attn.weight", "shape": [ 3072, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 18894848 }, { "name": "transformer.h.3.attn.c_attn.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25186304 }, { "name": "transformer.h.3.attn.c_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 25192448 }, { "name": "transformer.h.3.attn.c_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 27289600 }, { "name": "transformer.h.3.ln_2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 27291648 }, { "name": "transformer.h.3.ln_2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 27293696 } ], "md5sum": "116731931ee1771b82ec10524a88cf3c" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 25192448, "records": [ { "name": "transformer.h.3.mlp.c_fc.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "transformer.h.3.mlp.c_fc.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8388608 }, { "name": "transformer.h.3.mlp.c_proj.weight", "shape": [ 1024, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 8396800 }, { "name": "transformer.h.3.mlp.c_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16785408 }, { "name": "transformer.h.4.ln_1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16787456 }, { "name": "transformer.h.4.ln_1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16789504 }, { "name": "transformer.h.4.attn.c_attn.weight", "shape": [ 3072, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 16791552 }, { "name": "transformer.h.4.attn.c_attn.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 23083008 }, { "name": "transformer.h.4.attn.c_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 23089152 }, { "name": "transformer.h.4.attn.c_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25186304 }, { "name": "transformer.h.4.ln_2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25188352 }, { "name": "transformer.h.4.ln_2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25190400 } ], "md5sum": "fb5c75c0b282fef20b386c538b4c7c3c" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 25192448, "records": [ { "name": "transformer.h.4.mlp.c_fc.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "transformer.h.4.mlp.c_fc.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8388608 }, { "name": "transformer.h.4.mlp.c_proj.weight", "shape": [ 1024, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 8396800 }, { "name": "transformer.h.4.mlp.c_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16785408 }, { "name": "transformer.h.5.ln_1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16787456 }, { "name": "transformer.h.5.ln_1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16789504 }, { "name": "transformer.h.5.attn.c_attn.weight", "shape": [ 3072, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 16791552 }, { "name": "transformer.h.5.attn.c_attn.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 23083008 }, { "name": "transformer.h.5.attn.c_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 23089152 }, { "name": "transformer.h.5.attn.c_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25186304 }, { "name": "transformer.h.5.ln_2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25188352 }, { "name": "transformer.h.5.ln_2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25190400 } ], "md5sum": "ea4bcbd38e010438180664d3501567da" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 25192448, "records": [ { "name": "transformer.h.5.mlp.c_fc.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "transformer.h.5.mlp.c_fc.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8388608 }, { "name": "transformer.h.5.mlp.c_proj.weight", "shape": [ 1024, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 8396800 }, { "name": "transformer.h.5.mlp.c_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16785408 }, { "name": "transformer.h.6.ln_1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16787456 }, { "name": "transformer.h.6.ln_1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16789504 }, { "name": "transformer.h.6.attn.c_attn.weight", "shape": [ 3072, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 16791552 }, { "name": "transformer.h.6.attn.c_attn.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 23083008 }, { "name": "transformer.h.6.attn.c_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 23089152 }, { "name": "transformer.h.6.attn.c_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25186304 }, { "name": "transformer.h.6.ln_2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25188352 }, { "name": "transformer.h.6.ln_2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25190400 } ], "md5sum": "ef7889578a63dab6c66b4c070159d182" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 25192448, "records": [ { "name": "transformer.h.6.mlp.c_fc.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "transformer.h.6.mlp.c_fc.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8388608 }, { "name": "transformer.h.6.mlp.c_proj.weight", "shape": [ 1024, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 8396800 }, { "name": "transformer.h.6.mlp.c_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16785408 }, { "name": "transformer.h.7.ln_1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16787456 }, { "name": "transformer.h.7.ln_1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16789504 }, { "name": "transformer.h.7.attn.c_attn.weight", "shape": [ 3072, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 16791552 }, { "name": "transformer.h.7.attn.c_attn.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 23083008 }, { "name": "transformer.h.7.attn.c_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 23089152 }, { "name": "transformer.h.7.attn.c_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25186304 }, { "name": "transformer.h.7.ln_2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25188352 }, { "name": "transformer.h.7.ln_2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25190400 } ], "md5sum": "7d3b89d1dccc117d0b7ad85fc5edd64c" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 25192448, "records": [ { "name": "transformer.h.7.mlp.c_fc.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "transformer.h.7.mlp.c_fc.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8388608 }, { "name": "transformer.h.7.mlp.c_proj.weight", "shape": [ 1024, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 8396800 }, { "name": "transformer.h.7.mlp.c_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16785408 }, { "name": "transformer.h.8.ln_1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16787456 }, { "name": "transformer.h.8.ln_1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16789504 }, { "name": "transformer.h.8.attn.c_attn.weight", "shape": [ 3072, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 16791552 }, { "name": "transformer.h.8.attn.c_attn.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 23083008 }, { "name": "transformer.h.8.attn.c_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 23089152 }, { "name": "transformer.h.8.attn.c_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25186304 }, { "name": "transformer.h.8.ln_2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25188352 }, { "name": "transformer.h.8.ln_2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25190400 } ], "md5sum": "52a567337e48aae8179a77c111464d1d" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 25192448, "records": [ { "name": "transformer.h.8.mlp.c_fc.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "transformer.h.8.mlp.c_fc.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8388608 }, { "name": "transformer.h.8.mlp.c_proj.weight", "shape": [ 1024, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 8396800 }, { "name": "transformer.h.8.mlp.c_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16785408 }, { "name": "transformer.h.9.ln_1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16787456 }, { "name": "transformer.h.9.ln_1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16789504 }, { "name": "transformer.h.9.attn.c_attn.weight", "shape": [ 3072, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 16791552 }, { "name": "transformer.h.9.attn.c_attn.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 23083008 }, { "name": "transformer.h.9.attn.c_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 23089152 }, { "name": "transformer.h.9.attn.c_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25186304 }, { "name": "transformer.h.9.ln_2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25188352 }, { "name": "transformer.h.9.ln_2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25190400 } ], "md5sum": "ef47f0009672c223059f783172e0f815" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 25192448, "records": [ { "name": "transformer.h.9.mlp.c_fc.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "transformer.h.9.mlp.c_fc.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8388608 }, { "name": "transformer.h.9.mlp.c_proj.weight", "shape": [ 1024, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 8396800 }, { "name": "transformer.h.9.mlp.c_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16785408 }, { "name": "transformer.h.10.ln_1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16787456 }, { "name": "transformer.h.10.ln_1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16789504 }, { "name": "transformer.h.10.attn.c_attn.weight", "shape": [ 3072, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 16791552 }, { "name": "transformer.h.10.attn.c_attn.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 23083008 }, { "name": "transformer.h.10.attn.c_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 23089152 }, { "name": "transformer.h.10.attn.c_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25186304 }, { "name": "transformer.h.10.ln_2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25188352 }, { "name": "transformer.h.10.ln_2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25190400 } ], "md5sum": "fdf1efa1d84efed4dc338323e5693d04" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 25192448, "records": [ { "name": "transformer.h.10.mlp.c_fc.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "transformer.h.10.mlp.c_fc.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8388608 }, { "name": "transformer.h.10.mlp.c_proj.weight", "shape": [ 1024, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 8396800 }, { "name": "transformer.h.10.mlp.c_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16785408 }, { "name": "transformer.h.11.ln_1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16787456 }, { "name": "transformer.h.11.ln_1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16789504 }, { "name": "transformer.h.11.attn.c_attn.weight", "shape": [ 3072, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 16791552 }, { "name": "transformer.h.11.attn.c_attn.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 23083008 }, { "name": "transformer.h.11.attn.c_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 23089152 }, { "name": "transformer.h.11.attn.c_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25186304 }, { "name": "transformer.h.11.ln_2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25188352 }, { "name": "transformer.h.11.ln_2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25190400 } ], "md5sum": "7926ce1b2972cc186b5d2c1470acfd37" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 25192448, "records": [ { "name": "transformer.h.11.mlp.c_fc.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "transformer.h.11.mlp.c_fc.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8388608 }, { "name": "transformer.h.11.mlp.c_proj.weight", "shape": [ 1024, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 8396800 }, { "name": "transformer.h.11.mlp.c_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16785408 }, { "name": "transformer.h.12.ln_1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16787456 }, { "name": "transformer.h.12.ln_1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16789504 }, { "name": "transformer.h.12.attn.c_attn.weight", "shape": [ 3072, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 16791552 }, { "name": "transformer.h.12.attn.c_attn.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 23083008 }, { "name": "transformer.h.12.attn.c_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 23089152 }, { "name": "transformer.h.12.attn.c_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25186304 }, { "name": "transformer.h.12.ln_2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25188352 }, { "name": "transformer.h.12.ln_2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25190400 } ], "md5sum": "3b05c244b0d40fb843b1de4fffbf15dd" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 25192448, "records": [ { "name": "transformer.h.12.mlp.c_fc.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "transformer.h.12.mlp.c_fc.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8388608 }, { "name": "transformer.h.12.mlp.c_proj.weight", "shape": [ 1024, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 8396800 }, { "name": "transformer.h.12.mlp.c_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16785408 }, { "name": "transformer.h.13.ln_1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16787456 }, { "name": "transformer.h.13.ln_1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16789504 }, { "name": "transformer.h.13.attn.c_attn.weight", "shape": [ 3072, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 16791552 }, { "name": "transformer.h.13.attn.c_attn.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 23083008 }, { "name": "transformer.h.13.attn.c_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 23089152 }, { "name": "transformer.h.13.attn.c_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25186304 }, { "name": "transformer.h.13.ln_2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25188352 }, { "name": "transformer.h.13.ln_2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25190400 } ], "md5sum": "bcb2f7f911485a1452aa000e2e2c0e9b" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 25192448, "records": [ { "name": "transformer.h.13.mlp.c_fc.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "transformer.h.13.mlp.c_fc.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8388608 }, { "name": "transformer.h.13.mlp.c_proj.weight", "shape": [ 1024, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 8396800 }, { "name": "transformer.h.13.mlp.c_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16785408 }, { "name": "transformer.h.14.ln_1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16787456 }, { "name": "transformer.h.14.ln_1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16789504 }, { "name": "transformer.h.14.attn.c_attn.weight", "shape": [ 3072, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 16791552 }, { "name": "transformer.h.14.attn.c_attn.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 23083008 }, { "name": "transformer.h.14.attn.c_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 23089152 }, { "name": "transformer.h.14.attn.c_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25186304 }, { "name": "transformer.h.14.ln_2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25188352 }, { "name": "transformer.h.14.ln_2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25190400 } ], "md5sum": "4722a67d2e51a955ef61a5d03c3bcf94" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 25192448, "records": [ { "name": "transformer.h.14.mlp.c_fc.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "transformer.h.14.mlp.c_fc.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8388608 }, { "name": "transformer.h.14.mlp.c_proj.weight", "shape": [ 1024, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 8396800 }, { "name": "transformer.h.14.mlp.c_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16785408 }, { "name": "transformer.h.15.ln_1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16787456 }, { "name": "transformer.h.15.ln_1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16789504 }, { "name": "transformer.h.15.attn.c_attn.weight", "shape": [ 3072, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 16791552 }, { "name": "transformer.h.15.attn.c_attn.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 23083008 }, { "name": "transformer.h.15.attn.c_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 23089152 }, { "name": "transformer.h.15.attn.c_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25186304 }, { "name": "transformer.h.15.ln_2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25188352 }, { "name": "transformer.h.15.ln_2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25190400 } ], "md5sum": "35ff9c86676c975dbf63b9912ab81f33" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 25192448, "records": [ { "name": "transformer.h.15.mlp.c_fc.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "transformer.h.15.mlp.c_fc.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8388608 }, { "name": "transformer.h.15.mlp.c_proj.weight", "shape": [ 1024, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 8396800 }, { "name": "transformer.h.15.mlp.c_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16785408 }, { "name": "transformer.h.16.ln_1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16787456 }, { "name": "transformer.h.16.ln_1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16789504 }, { "name": "transformer.h.16.attn.c_attn.weight", "shape": [ 3072, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 16791552 }, { "name": "transformer.h.16.attn.c_attn.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 23083008 }, { "name": "transformer.h.16.attn.c_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 23089152 }, { "name": "transformer.h.16.attn.c_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25186304 }, { "name": "transformer.h.16.ln_2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25188352 }, { "name": "transformer.h.16.ln_2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25190400 } ], "md5sum": "01df1b9923d63e9d0f9a560804782440" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 25192448, "records": [ { "name": "transformer.h.16.mlp.c_fc.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "transformer.h.16.mlp.c_fc.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8388608 }, { "name": "transformer.h.16.mlp.c_proj.weight", "shape": [ 1024, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 8396800 }, { "name": "transformer.h.16.mlp.c_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16785408 }, { "name": "transformer.h.17.ln_1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16787456 }, { "name": "transformer.h.17.ln_1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16789504 }, { "name": "transformer.h.17.attn.c_attn.weight", "shape": [ 3072, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 16791552 }, { "name": "transformer.h.17.attn.c_attn.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 23083008 }, { "name": "transformer.h.17.attn.c_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 23089152 }, { "name": "transformer.h.17.attn.c_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25186304 }, { "name": "transformer.h.17.ln_2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25188352 }, { "name": "transformer.h.17.ln_2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25190400 } ], "md5sum": "92f106c3b756a5a5e926e43fdb5741f1" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 25192448, "records": [ { "name": "transformer.h.17.mlp.c_fc.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "transformer.h.17.mlp.c_fc.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8388608 }, { "name": "transformer.h.17.mlp.c_proj.weight", "shape": [ 1024, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 8396800 }, { "name": "transformer.h.17.mlp.c_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16785408 }, { "name": "transformer.h.18.ln_1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16787456 }, { "name": "transformer.h.18.ln_1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16789504 }, { "name": "transformer.h.18.attn.c_attn.weight", "shape": [ 3072, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 16791552 }, { "name": "transformer.h.18.attn.c_attn.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 23083008 }, { "name": "transformer.h.18.attn.c_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 23089152 }, { "name": "transformer.h.18.attn.c_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25186304 }, { "name": "transformer.h.18.ln_2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25188352 }, { "name": "transformer.h.18.ln_2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25190400 } ], "md5sum": "983dbeeab7f0141aa067be28562ea043" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 25192448, "records": [ { "name": "transformer.h.18.mlp.c_fc.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "transformer.h.18.mlp.c_fc.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8388608 }, { "name": "transformer.h.18.mlp.c_proj.weight", "shape": [ 1024, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 8396800 }, { "name": "transformer.h.18.mlp.c_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16785408 }, { "name": "transformer.h.19.ln_1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16787456 }, { "name": "transformer.h.19.ln_1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16789504 }, { "name": "transformer.h.19.attn.c_attn.weight", "shape": [ 3072, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 16791552 }, { "name": "transformer.h.19.attn.c_attn.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 23083008 }, { "name": "transformer.h.19.attn.c_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 23089152 }, { "name": "transformer.h.19.attn.c_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25186304 }, { "name": "transformer.h.19.ln_2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25188352 }, { "name": "transformer.h.19.ln_2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25190400 } ], "md5sum": "1057439dfb17bfcc20ba72d11a21434f" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 25192448, "records": [ { "name": "transformer.h.19.mlp.c_fc.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "transformer.h.19.mlp.c_fc.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8388608 }, { "name": "transformer.h.19.mlp.c_proj.weight", "shape": [ 1024, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 8396800 }, { "name": "transformer.h.19.mlp.c_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16785408 }, { "name": "transformer.h.20.ln_1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16787456 }, { "name": "transformer.h.20.ln_1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16789504 }, { "name": "transformer.h.20.attn.c_attn.weight", "shape": [ 3072, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 16791552 }, { "name": "transformer.h.20.attn.c_attn.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 23083008 }, { "name": "transformer.h.20.attn.c_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 23089152 }, { "name": "transformer.h.20.attn.c_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25186304 }, { "name": "transformer.h.20.ln_2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25188352 }, { "name": "transformer.h.20.ln_2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25190400 } ], "md5sum": "e45f78377a8fdd0c1f43506acd580d6e" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 25192448, "records": [ { "name": "transformer.h.20.mlp.c_fc.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "transformer.h.20.mlp.c_fc.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8388608 }, { "name": "transformer.h.20.mlp.c_proj.weight", "shape": [ 1024, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 8396800 }, { "name": "transformer.h.20.mlp.c_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16785408 }, { "name": "transformer.h.21.ln_1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16787456 }, { "name": "transformer.h.21.ln_1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16789504 }, { "name": "transformer.h.21.attn.c_attn.weight", "shape": [ 3072, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 16791552 }, { "name": "transformer.h.21.attn.c_attn.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 23083008 }, { "name": "transformer.h.21.attn.c_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 23089152 }, { "name": "transformer.h.21.attn.c_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25186304 }, { "name": "transformer.h.21.ln_2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25188352 }, { "name": "transformer.h.21.ln_2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25190400 } ], "md5sum": "ba1df963ea81fc211f28da7fee9a9165" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 25192448, "records": [ { "name": "transformer.h.21.mlp.c_fc.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "transformer.h.21.mlp.c_fc.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8388608 }, { "name": "transformer.h.21.mlp.c_proj.weight", "shape": [ 1024, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 8396800 }, { "name": "transformer.h.21.mlp.c_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16785408 }, { "name": "transformer.h.22.ln_1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16787456 }, { "name": "transformer.h.22.ln_1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16789504 }, { "name": "transformer.h.22.attn.c_attn.weight", "shape": [ 3072, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 16791552 }, { "name": "transformer.h.22.attn.c_attn.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 23083008 }, { "name": "transformer.h.22.attn.c_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 23089152 }, { "name": "transformer.h.22.attn.c_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25186304 }, { "name": "transformer.h.22.ln_2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25188352 }, { "name": "transformer.h.22.ln_2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25190400 } ], "md5sum": "e25144d1411e5b0a29c5c8d9c35dd029" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 25192448, "records": [ { "name": "transformer.h.22.mlp.c_fc.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "transformer.h.22.mlp.c_fc.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8388608 }, { "name": "transformer.h.22.mlp.c_proj.weight", "shape": [ 1024, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 8396800 }, { "name": "transformer.h.22.mlp.c_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16785408 }, { "name": "transformer.h.23.ln_1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16787456 }, { "name": "transformer.h.23.ln_1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16789504 }, { "name": "transformer.h.23.attn.c_attn.weight", "shape": [ 3072, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6291456, "byteOffset": 16791552 }, { "name": "transformer.h.23.attn.c_attn.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 23083008 }, { "name": "transformer.h.23.attn.c_proj.weight", "shape": [ 1024, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2097152, "byteOffset": 23089152 }, { "name": "transformer.h.23.attn.c_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25186304 }, { "name": "transformer.h.23.ln_2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25188352 }, { "name": "transformer.h.23.ln_2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25190400 } ], "md5sum": "9c68c3d6b3ce48e1f4b5bebd16de3f13" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 16791552, "records": [ { "name": "transformer.h.23.mlp.c_fc.weight", "shape": [ 4096, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "transformer.h.23.mlp.c_fc.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8388608 }, { "name": "transformer.h.23.mlp.c_proj.weight", "shape": [ 1024, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 8396800 }, { "name": "transformer.h.23.mlp.c_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16785408 }, { "name": "transformer.ln_f.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16787456 }, { "name": "transformer.ln_f.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16789504 } ], "md5sum": "5529160aa774a49a742e440f2113dbd1" } ] }