diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,3246 @@ +{ + "metadata": { + "ParamSize": 293, + "ParamBytes": 812572672.0, + "BitsPerParam": 16.0 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 102926336, + "records": [ + { + "name": "lm_head.weight", + "shape": [ + 50257, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 102926336, + "byteOffset": 0 + } + ], + "md5sum": "eec32f50280ffcd1a236c7e7dbd83cb6" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 102926336, + "records": [ + { + "name": "transformer.wte.weight", + "shape": [ + 50257, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 102926336, + "byteOffset": 0 + } + ], + "md5sum": "eec32f50280ffcd1a236c7e7dbd83cb6" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 27293696, + "records": [ + { + "name": "transformer.wpe.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 0 + }, + { + "name": "transformer.h.0.ln_1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 2097152 + }, + { + "name": "transformer.h.0.ln_1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 2099200 + }, + { + "name": "transformer.h.0.attn.c_attn.weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 2101248 + }, + { + "name": "transformer.h.0.attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 8392704 + }, + { + "name": "transformer.h.0.attn.c_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 8398848 + }, + { + "name": "transformer.h.0.attn.c_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 10496000 + }, + { + "name": "transformer.h.0.ln_2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 10498048 + }, + { + "name": "transformer.h.0.ln_2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 10500096 + }, + { + "name": "transformer.h.0.mlp.c_fc.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 10502144 + }, + { + "name": "transformer.h.0.mlp.c_fc.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18890752 + }, + { + "name": "transformer.h.0.mlp.c_proj.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 18898944 + }, + { + "name": "transformer.h.0.mlp.c_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 27287552 + }, + { + "name": "transformer.h.1.ln_1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 27289600 + }, + { + "name": "transformer.h.1.ln_1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 27291648 + } + ], + "md5sum": "112a664a1110ab79703ac38f87b1fe8f" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 31490048, + "records": [ + { + "name": "transformer.h.1.attn.c_attn.weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 0 + }, + { + "name": "transformer.h.1.attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 6291456 + }, + { + "name": "transformer.h.1.attn.c_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 6297600 + }, + { + "name": "transformer.h.1.attn.c_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 8394752 + }, + { + "name": "transformer.h.1.ln_2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 8396800 + }, + { + "name": "transformer.h.1.ln_2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 8398848 + }, + { + "name": "transformer.h.1.mlp.c_fc.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 8400896 + }, + { + "name": "transformer.h.1.mlp.c_fc.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16789504 + }, + { + "name": "transformer.h.1.mlp.c_proj.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16797696 + }, + { + "name": "transformer.h.1.mlp.c_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25186304 + }, + { + "name": "transformer.h.2.ln_1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25188352 + }, + { + "name": "transformer.h.2.ln_1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25190400 + }, + { + "name": "transformer.h.2.attn.c_attn.weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 25192448 + }, + { + "name": "transformer.h.2.attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 31483904 + } + ], + "md5sum": "c8425e929b9cf76cb3643623b218aae3" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 27295744, + "records": [ + { + "name": "transformer.h.2.attn.c_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 0 + }, + { + "name": "transformer.h.2.attn.c_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 2097152 + }, + { + "name": "transformer.h.2.ln_2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 2099200 + }, + { + "name": "transformer.h.2.ln_2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 2101248 + }, + { + "name": "transformer.h.2.mlp.c_fc.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 2103296 + }, + { + "name": "transformer.h.2.mlp.c_fc.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 10491904 + }, + { + "name": "transformer.h.2.mlp.c_proj.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 10500096 + }, + { + "name": "transformer.h.2.mlp.c_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 18888704 + }, + { + "name": "transformer.h.3.ln_1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 18890752 + }, + { + "name": "transformer.h.3.ln_1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 18892800 + }, + { + "name": "transformer.h.3.attn.c_attn.weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 18894848 + }, + { + "name": "transformer.h.3.attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 25186304 + }, + { + "name": "transformer.h.3.attn.c_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 25192448 + }, + { + "name": "transformer.h.3.attn.c_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 27289600 + }, + { + "name": "transformer.h.3.ln_2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 27291648 + }, + { + "name": "transformer.h.3.ln_2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 27293696 + } + ], + "md5sum": "116731931ee1771b82ec10524a88cf3c" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "transformer.h.3.mlp.c_fc.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "transformer.h.3.mlp.c_fc.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8388608 + }, + { + "name": "transformer.h.3.mlp.c_proj.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 8396800 + }, + { + "name": "transformer.h.3.mlp.c_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "transformer.h.4.ln_1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "transformer.h.4.ln_1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "transformer.h.4.attn.c_attn.weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 16791552 + }, + { + "name": "transformer.h.4.attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 23083008 + }, + { + "name": "transformer.h.4.attn.c_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 23089152 + }, + { + "name": "transformer.h.4.attn.c_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25186304 + }, + { + "name": "transformer.h.4.ln_2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25188352 + }, + { + "name": "transformer.h.4.ln_2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "fb5c75c0b282fef20b386c538b4c7c3c" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "transformer.h.4.mlp.c_fc.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "transformer.h.4.mlp.c_fc.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8388608 + }, + { + "name": "transformer.h.4.mlp.c_proj.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 8396800 + }, + { + "name": "transformer.h.4.mlp.c_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "transformer.h.5.ln_1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "transformer.h.5.ln_1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "transformer.h.5.attn.c_attn.weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 16791552 + }, + { + "name": "transformer.h.5.attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 23083008 + }, + { + "name": "transformer.h.5.attn.c_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 23089152 + }, + { + "name": "transformer.h.5.attn.c_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25186304 + }, + { + "name": "transformer.h.5.ln_2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25188352 + }, + { + "name": "transformer.h.5.ln_2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "ea4bcbd38e010438180664d3501567da" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "transformer.h.5.mlp.c_fc.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "transformer.h.5.mlp.c_fc.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8388608 + }, + { + "name": "transformer.h.5.mlp.c_proj.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 8396800 + }, + { + "name": "transformer.h.5.mlp.c_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "transformer.h.6.ln_1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "transformer.h.6.ln_1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "transformer.h.6.attn.c_attn.weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 16791552 + }, + { + "name": "transformer.h.6.attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 23083008 + }, + { + "name": "transformer.h.6.attn.c_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 23089152 + }, + { + "name": "transformer.h.6.attn.c_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25186304 + }, + { + "name": "transformer.h.6.ln_2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25188352 + }, + { + "name": "transformer.h.6.ln_2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "ef7889578a63dab6c66b4c070159d182" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "transformer.h.6.mlp.c_fc.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "transformer.h.6.mlp.c_fc.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8388608 + }, + { + "name": "transformer.h.6.mlp.c_proj.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 8396800 + }, + { + "name": "transformer.h.6.mlp.c_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "transformer.h.7.ln_1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "transformer.h.7.ln_1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "transformer.h.7.attn.c_attn.weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 16791552 + }, + { + "name": "transformer.h.7.attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 23083008 + }, + { + "name": "transformer.h.7.attn.c_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 23089152 + }, + { + "name": "transformer.h.7.attn.c_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25186304 + }, + { + "name": "transformer.h.7.ln_2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25188352 + }, + { + "name": "transformer.h.7.ln_2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "7d3b89d1dccc117d0b7ad85fc5edd64c" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "transformer.h.7.mlp.c_fc.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "transformer.h.7.mlp.c_fc.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8388608 + }, + { + "name": "transformer.h.7.mlp.c_proj.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 8396800 + }, + { + "name": "transformer.h.7.mlp.c_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "transformer.h.8.ln_1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "transformer.h.8.ln_1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "transformer.h.8.attn.c_attn.weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 16791552 + }, + { + "name": "transformer.h.8.attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 23083008 + }, + { + "name": "transformer.h.8.attn.c_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 23089152 + }, + { + "name": "transformer.h.8.attn.c_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25186304 + }, + { + "name": "transformer.h.8.ln_2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25188352 + }, + { + "name": "transformer.h.8.ln_2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "52a567337e48aae8179a77c111464d1d" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "transformer.h.8.mlp.c_fc.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "transformer.h.8.mlp.c_fc.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8388608 + }, + { + "name": "transformer.h.8.mlp.c_proj.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 8396800 + }, + { + "name": "transformer.h.8.mlp.c_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "transformer.h.9.ln_1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "transformer.h.9.ln_1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "transformer.h.9.attn.c_attn.weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 16791552 + }, + { + "name": "transformer.h.9.attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 23083008 + }, + { + "name": "transformer.h.9.attn.c_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 23089152 + }, + { + "name": "transformer.h.9.attn.c_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25186304 + }, + { + "name": "transformer.h.9.ln_2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25188352 + }, + { + "name": "transformer.h.9.ln_2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "ef47f0009672c223059f783172e0f815" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "transformer.h.9.mlp.c_fc.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "transformer.h.9.mlp.c_fc.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8388608 + }, + { + "name": "transformer.h.9.mlp.c_proj.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 8396800 + }, + { + "name": "transformer.h.9.mlp.c_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "transformer.h.10.ln_1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "transformer.h.10.ln_1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "transformer.h.10.attn.c_attn.weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 16791552 + }, + { + "name": "transformer.h.10.attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 23083008 + }, + { + "name": "transformer.h.10.attn.c_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 23089152 + }, + { + "name": "transformer.h.10.attn.c_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25186304 + }, + { + "name": "transformer.h.10.ln_2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25188352 + }, + { + "name": "transformer.h.10.ln_2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "fdf1efa1d84efed4dc338323e5693d04" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "transformer.h.10.mlp.c_fc.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "transformer.h.10.mlp.c_fc.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8388608 + }, + { + "name": "transformer.h.10.mlp.c_proj.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 8396800 + }, + { + "name": "transformer.h.10.mlp.c_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "transformer.h.11.ln_1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "transformer.h.11.ln_1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "transformer.h.11.attn.c_attn.weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 16791552 + }, + { + "name": "transformer.h.11.attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 23083008 + }, + { + "name": "transformer.h.11.attn.c_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 23089152 + }, + { + "name": "transformer.h.11.attn.c_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25186304 + }, + { + "name": "transformer.h.11.ln_2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25188352 + }, + { + "name": "transformer.h.11.ln_2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "7926ce1b2972cc186b5d2c1470acfd37" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "transformer.h.11.mlp.c_fc.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "transformer.h.11.mlp.c_fc.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8388608 + }, + { + "name": "transformer.h.11.mlp.c_proj.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 8396800 + }, + { + "name": "transformer.h.11.mlp.c_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "transformer.h.12.ln_1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "transformer.h.12.ln_1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "transformer.h.12.attn.c_attn.weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 16791552 + }, + { + "name": "transformer.h.12.attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 23083008 + }, + { + "name": "transformer.h.12.attn.c_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 23089152 + }, + { + "name": "transformer.h.12.attn.c_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25186304 + }, + { + "name": "transformer.h.12.ln_2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25188352 + }, + { + "name": "transformer.h.12.ln_2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "3b05c244b0d40fb843b1de4fffbf15dd" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "transformer.h.12.mlp.c_fc.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "transformer.h.12.mlp.c_fc.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8388608 + }, + { + "name": "transformer.h.12.mlp.c_proj.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 8396800 + }, + { + "name": "transformer.h.12.mlp.c_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "transformer.h.13.ln_1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "transformer.h.13.ln_1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "transformer.h.13.attn.c_attn.weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 16791552 + }, + { + "name": "transformer.h.13.attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 23083008 + }, + { + "name": "transformer.h.13.attn.c_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 23089152 + }, + { + "name": "transformer.h.13.attn.c_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25186304 + }, + { + "name": "transformer.h.13.ln_2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25188352 + }, + { + "name": "transformer.h.13.ln_2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "bcb2f7f911485a1452aa000e2e2c0e9b" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "transformer.h.13.mlp.c_fc.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "transformer.h.13.mlp.c_fc.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8388608 + }, + { + "name": "transformer.h.13.mlp.c_proj.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 8396800 + }, + { + "name": "transformer.h.13.mlp.c_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "transformer.h.14.ln_1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "transformer.h.14.ln_1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "transformer.h.14.attn.c_attn.weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 16791552 + }, + { + "name": "transformer.h.14.attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 23083008 + }, + { + "name": "transformer.h.14.attn.c_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 23089152 + }, + { + "name": "transformer.h.14.attn.c_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25186304 + }, + { + "name": "transformer.h.14.ln_2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25188352 + }, + { + "name": "transformer.h.14.ln_2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "4722a67d2e51a955ef61a5d03c3bcf94" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "transformer.h.14.mlp.c_fc.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "transformer.h.14.mlp.c_fc.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8388608 + }, + { + "name": "transformer.h.14.mlp.c_proj.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 8396800 + }, + { + "name": "transformer.h.14.mlp.c_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "transformer.h.15.ln_1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "transformer.h.15.ln_1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "transformer.h.15.attn.c_attn.weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 16791552 + }, + { + "name": "transformer.h.15.attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 23083008 + }, + { + "name": "transformer.h.15.attn.c_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 23089152 + }, + { + "name": "transformer.h.15.attn.c_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25186304 + }, + { + "name": "transformer.h.15.ln_2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25188352 + }, + { + "name": "transformer.h.15.ln_2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "35ff9c86676c975dbf63b9912ab81f33" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "transformer.h.15.mlp.c_fc.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "transformer.h.15.mlp.c_fc.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8388608 + }, + { + "name": "transformer.h.15.mlp.c_proj.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 8396800 + }, + { + "name": "transformer.h.15.mlp.c_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "transformer.h.16.ln_1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "transformer.h.16.ln_1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "transformer.h.16.attn.c_attn.weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 16791552 + }, + { + "name": "transformer.h.16.attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 23083008 + }, + { + "name": "transformer.h.16.attn.c_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 23089152 + }, + { + "name": "transformer.h.16.attn.c_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25186304 + }, + { + "name": "transformer.h.16.ln_2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25188352 + }, + { + "name": "transformer.h.16.ln_2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "01df1b9923d63e9d0f9a560804782440" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "transformer.h.16.mlp.c_fc.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "transformer.h.16.mlp.c_fc.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8388608 + }, + { + "name": "transformer.h.16.mlp.c_proj.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 8396800 + }, + { + "name": "transformer.h.16.mlp.c_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "transformer.h.17.ln_1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "transformer.h.17.ln_1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "transformer.h.17.attn.c_attn.weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 16791552 + }, + { + "name": "transformer.h.17.attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 23083008 + }, + { + "name": "transformer.h.17.attn.c_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 23089152 + }, + { + "name": "transformer.h.17.attn.c_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25186304 + }, + { + "name": "transformer.h.17.ln_2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25188352 + }, + { + "name": "transformer.h.17.ln_2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "92f106c3b756a5a5e926e43fdb5741f1" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "transformer.h.17.mlp.c_fc.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "transformer.h.17.mlp.c_fc.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8388608 + }, + { + "name": "transformer.h.17.mlp.c_proj.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 8396800 + }, + { + "name": "transformer.h.17.mlp.c_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "transformer.h.18.ln_1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "transformer.h.18.ln_1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "transformer.h.18.attn.c_attn.weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 16791552 + }, + { + "name": "transformer.h.18.attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 23083008 + }, + { + "name": "transformer.h.18.attn.c_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 23089152 + }, + { + "name": "transformer.h.18.attn.c_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25186304 + }, + { + "name": "transformer.h.18.ln_2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25188352 + }, + { + "name": "transformer.h.18.ln_2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "983dbeeab7f0141aa067be28562ea043" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "transformer.h.18.mlp.c_fc.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "transformer.h.18.mlp.c_fc.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8388608 + }, + { + "name": "transformer.h.18.mlp.c_proj.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 8396800 + }, + { + "name": "transformer.h.18.mlp.c_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "transformer.h.19.ln_1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "transformer.h.19.ln_1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "transformer.h.19.attn.c_attn.weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 16791552 + }, + { + "name": "transformer.h.19.attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 23083008 + }, + { + "name": "transformer.h.19.attn.c_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 23089152 + }, + { + "name": "transformer.h.19.attn.c_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25186304 + }, + { + "name": "transformer.h.19.ln_2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25188352 + }, + { + "name": "transformer.h.19.ln_2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "1057439dfb17bfcc20ba72d11a21434f" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "transformer.h.19.mlp.c_fc.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "transformer.h.19.mlp.c_fc.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8388608 + }, + { + "name": "transformer.h.19.mlp.c_proj.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 8396800 + }, + { + "name": "transformer.h.19.mlp.c_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "transformer.h.20.ln_1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "transformer.h.20.ln_1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "transformer.h.20.attn.c_attn.weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 16791552 + }, + { + "name": "transformer.h.20.attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 23083008 + }, + { + "name": "transformer.h.20.attn.c_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 23089152 + }, + { + "name": "transformer.h.20.attn.c_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25186304 + }, + { + "name": "transformer.h.20.ln_2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25188352 + }, + { + "name": "transformer.h.20.ln_2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "e45f78377a8fdd0c1f43506acd580d6e" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "transformer.h.20.mlp.c_fc.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "transformer.h.20.mlp.c_fc.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8388608 + }, + { + "name": "transformer.h.20.mlp.c_proj.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 8396800 + }, + { + "name": "transformer.h.20.mlp.c_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "transformer.h.21.ln_1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "transformer.h.21.ln_1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "transformer.h.21.attn.c_attn.weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 16791552 + }, + { + "name": "transformer.h.21.attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 23083008 + }, + { + "name": "transformer.h.21.attn.c_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 23089152 + }, + { + "name": "transformer.h.21.attn.c_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25186304 + }, + { + "name": "transformer.h.21.ln_2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25188352 + }, + { + "name": "transformer.h.21.ln_2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "ba1df963ea81fc211f28da7fee9a9165" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "transformer.h.21.mlp.c_fc.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "transformer.h.21.mlp.c_fc.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8388608 + }, + { + "name": "transformer.h.21.mlp.c_proj.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 8396800 + }, + { + "name": "transformer.h.21.mlp.c_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "transformer.h.22.ln_1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "transformer.h.22.ln_1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "transformer.h.22.attn.c_attn.weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 16791552 + }, + { + "name": "transformer.h.22.attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 23083008 + }, + { + "name": "transformer.h.22.attn.c_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 23089152 + }, + { + "name": "transformer.h.22.attn.c_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25186304 + }, + { + "name": "transformer.h.22.ln_2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25188352 + }, + { + "name": "transformer.h.22.ln_2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "e25144d1411e5b0a29c5c8d9c35dd029" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "transformer.h.22.mlp.c_fc.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "transformer.h.22.mlp.c_fc.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8388608 + }, + { + "name": "transformer.h.22.mlp.c_proj.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 8396800 + }, + { + "name": "transformer.h.22.mlp.c_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "transformer.h.23.ln_1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "transformer.h.23.ln_1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "transformer.h.23.attn.c_attn.weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 16791552 + }, + { + "name": "transformer.h.23.attn.c_attn.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 23083008 + }, + { + "name": "transformer.h.23.attn.c_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 23089152 + }, + { + "name": "transformer.h.23.attn.c_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25186304 + }, + { + "name": "transformer.h.23.ln_2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25188352 + }, + { + "name": "transformer.h.23.ln_2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "9c68c3d6b3ce48e1f4b5bebd16de3f13" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 16791552, + "records": [ + { + "name": "transformer.h.23.mlp.c_fc.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "transformer.h.23.mlp.c_fc.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8388608 + }, + { + "name": "transformer.h.23.mlp.c_proj.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 8396800 + }, + { + "name": "transformer.h.23.mlp.c_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "transformer.ln_f.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "transformer.ln_f.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + } + ], + "md5sum": "5529160aa774a49a742e440f2113dbd1" + } + ] +} \ No newline at end of file