{ "metadata": { "ParamSize": 149, "ParamBytes": 326074368.0, "BitsPerParam": 16.0 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 77194752, "records": [ { "name": "lm_head.weight", "shape": [ 50257, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 77194752, "byteOffset": 0 } ], "md5sum": "8aa629e8739ff337f4983e485d0a145e" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 77194752, "records": [ { "name": "transformer.wte.weight", "shape": [ 50257, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 77194752, "byteOffset": 0 } ], "md5sum": "8aa629e8739ff337f4983e485d0a145e" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 33470976, "records": [ { "name": "transformer.wpe.weight", "shape": [ 1024, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 0 }, { "name": "transformer.h.0.ln_1.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 1572864 }, { "name": "transformer.h.0.ln_1.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 1574400 }, { "name": "transformer.h.0.attn.c_attn.weight", "shape": [ 2304, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3538944, "byteOffset": 1575936 }, { "name": "transformer.h.0.attn.c_attn.bias", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 5114880 }, { "name": "transformer.h.0.attn.c_proj.weight", "shape": [ 768, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 5119488 }, { "name": "transformer.h.0.attn.c_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 6299136 }, { "name": "transformer.h.0.ln_2.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 6300672 }, { "name": "transformer.h.0.ln_2.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 6302208 }, { "name": "transformer.h.0.mlp.c_fc.weight", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 6303744 }, { "name": "transformer.h.0.mlp.c_fc.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 11022336 }, { "name": "transformer.h.0.mlp.c_proj.weight", "shape": [ 768, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 11028480 }, { "name": "transformer.h.0.mlp.c_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 15747072 }, { "name": "transformer.h.1.ln_1.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 15748608 }, { "name": "transformer.h.1.ln_1.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 15750144 }, { "name": "transformer.h.1.attn.c_attn.weight", "shape": [ 2304, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3538944, "byteOffset": 15751680 }, { "name": "transformer.h.1.attn.c_attn.bias", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 19290624 }, { "name": "transformer.h.1.attn.c_proj.weight", "shape": [ 768, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 19295232 }, { "name": "transformer.h.1.attn.c_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 20474880 }, { "name": "transformer.h.1.ln_2.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 20476416 }, { "name": "transformer.h.1.ln_2.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 20477952 }, { "name": "transformer.h.1.mlp.c_fc.weight", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 20479488 }, { "name": "transformer.h.1.mlp.c_fc.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25198080 }, { "name": "transformer.h.1.mlp.c_proj.weight", "shape": [ 768, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 25204224 }, { "name": "transformer.h.1.mlp.c_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 29922816 }, { "name": "transformer.h.2.ln_1.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 29924352 }, { "name": "transformer.h.2.ln_1.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 29925888 }, { "name": "transformer.h.2.attn.c_attn.weight", "shape": [ 2304, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3538944, "byteOffset": 29927424 }, { "name": "transformer.h.2.attn.c_attn.bias", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 33466368 } ], "md5sum": "748d75aca928d18aac58d8f9301c3bcb" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 29535744, "records": [ { "name": "transformer.h.2.attn.c_proj.weight", "shape": [ 768, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 0 }, { "name": "transformer.h.2.attn.c_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 1179648 }, { "name": "transformer.h.2.ln_2.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 1181184 }, { "name": "transformer.h.2.ln_2.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 1182720 }, { "name": "transformer.h.2.mlp.c_fc.weight", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 1184256 }, { "name": "transformer.h.2.mlp.c_fc.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 5902848 }, { "name": "transformer.h.2.mlp.c_proj.weight", "shape": [ 768, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 5908992 }, { "name": "transformer.h.2.mlp.c_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 10627584 }, { "name": "transformer.h.3.ln_1.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 10629120 }, { "name": "transformer.h.3.ln_1.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 10630656 }, { "name": "transformer.h.3.attn.c_attn.weight", "shape": [ 2304, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3538944, "byteOffset": 10632192 }, { "name": "transformer.h.3.attn.c_attn.bias", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 14171136 }, { "name": "transformer.h.3.attn.c_proj.weight", "shape": [ 768, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 14175744 }, { "name": "transformer.h.3.attn.c_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 15355392 }, { "name": "transformer.h.3.ln_2.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 15356928 }, { "name": "transformer.h.3.ln_2.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 15358464 }, { "name": "transformer.h.3.mlp.c_fc.weight", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 15360000 }, { "name": "transformer.h.3.mlp.c_fc.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 20078592 }, { "name": "transformer.h.3.mlp.c_proj.weight", "shape": [ 768, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 20084736 }, { "name": "transformer.h.3.mlp.c_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 24803328 }, { "name": "transformer.h.4.ln_1.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 24804864 }, { "name": "transformer.h.4.ln_1.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 24806400 }, { "name": "transformer.h.4.attn.c_attn.weight", "shape": [ 2304, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3538944, "byteOffset": 24807936 }, { "name": "transformer.h.4.attn.c_attn.bias", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 28346880 }, { "name": "transformer.h.4.attn.c_proj.weight", "shape": [ 768, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 28351488 }, { "name": "transformer.h.4.attn.c_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 29531136 }, { "name": "transformer.h.4.ln_2.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 29532672 }, { "name": "transformer.h.4.ln_2.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 29534208 } ], "md5sum": "94d1cea00ea6a1492cb7bb23eb5d64b5" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 33076224, "records": [ { "name": "transformer.h.4.mlp.c_fc.weight", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "transformer.h.4.mlp.c_fc.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 4718592 }, { "name": "transformer.h.4.mlp.c_proj.weight", "shape": [ 768, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 4724736 }, { "name": "transformer.h.4.mlp.c_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 9443328 }, { "name": "transformer.h.5.ln_1.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 9444864 }, { "name": "transformer.h.5.ln_1.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 9446400 }, { "name": "transformer.h.5.attn.c_attn.weight", "shape": [ 2304, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3538944, "byteOffset": 9447936 }, { "name": "transformer.h.5.attn.c_attn.bias", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 12986880 }, { "name": "transformer.h.5.attn.c_proj.weight", "shape": [ 768, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 12991488 }, { "name": "transformer.h.5.attn.c_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 14171136 }, { "name": "transformer.h.5.ln_2.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 14172672 }, { "name": "transformer.h.5.ln_2.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 14174208 }, { "name": "transformer.h.5.mlp.c_fc.weight", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 14175744 }, { "name": "transformer.h.5.mlp.c_fc.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 18894336 }, { "name": "transformer.h.5.mlp.c_proj.weight", "shape": [ 768, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 18900480 }, { "name": "transformer.h.5.mlp.c_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 23619072 }, { "name": "transformer.h.6.ln_1.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 23620608 }, { "name": "transformer.h.6.ln_1.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 23622144 }, { "name": "transformer.h.6.attn.c_attn.weight", "shape": [ 2304, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3538944, "byteOffset": 23623680 }, { "name": "transformer.h.6.attn.c_attn.bias", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 27162624 }, { "name": "transformer.h.6.attn.c_proj.weight", "shape": [ 768, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 27167232 }, { "name": "transformer.h.6.attn.c_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 28346880 }, { "name": "transformer.h.6.ln_2.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 28348416 }, { "name": "transformer.h.6.ln_2.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 28349952 }, { "name": "transformer.h.6.mlp.c_fc.weight", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 28351488 }, { "name": "transformer.h.6.mlp.c_fc.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 33070080 } ], "md5sum": "43957cf0e0da3bc016aa35099e8a4e53" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 33074688, "records": [ { "name": "transformer.h.6.mlp.c_proj.weight", "shape": [ 768, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "transformer.h.6.mlp.c_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 4718592 }, { "name": "transformer.h.7.ln_1.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 4720128 }, { "name": "transformer.h.7.ln_1.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 4721664 }, { "name": "transformer.h.7.attn.c_attn.weight", "shape": [ 2304, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3538944, "byteOffset": 4723200 }, { "name": "transformer.h.7.attn.c_attn.bias", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 8262144 }, { "name": "transformer.h.7.attn.c_proj.weight", "shape": [ 768, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 8266752 }, { "name": "transformer.h.7.attn.c_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 9446400 }, { "name": "transformer.h.7.ln_2.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 9447936 }, { "name": "transformer.h.7.ln_2.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 9449472 }, { "name": "transformer.h.7.mlp.c_fc.weight", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 9451008 }, { "name": "transformer.h.7.mlp.c_fc.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 14169600 }, { "name": "transformer.h.7.mlp.c_proj.weight", "shape": [ 768, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 14175744 }, { "name": "transformer.h.7.mlp.c_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 18894336 }, { "name": "transformer.h.8.ln_1.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 18895872 }, { "name": "transformer.h.8.ln_1.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 18897408 }, { "name": "transformer.h.8.attn.c_attn.weight", "shape": [ 2304, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3538944, "byteOffset": 18898944 }, { "name": "transformer.h.8.attn.c_attn.bias", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 22437888 }, { "name": "transformer.h.8.attn.c_proj.weight", "shape": [ 768, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 22442496 }, { "name": "transformer.h.8.attn.c_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 23622144 }, { "name": "transformer.h.8.ln_2.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 23623680 }, { "name": "transformer.h.8.ln_2.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 23625216 }, { "name": "transformer.h.8.mlp.c_fc.weight", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 23626752 }, { "name": "transformer.h.8.mlp.c_fc.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 28345344 }, { "name": "transformer.h.8.mlp.c_proj.weight", "shape": [ 768, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 28351488 }, { "name": "transformer.h.8.mlp.c_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 33070080 }, { "name": "transformer.h.9.ln_1.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 33071616 }, { "name": "transformer.h.9.ln_1.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 33073152 } ], "md5sum": "4cbbe2cce00b37c72cd95d22b3aa03d3" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 33079296, "records": [ { "name": "transformer.h.9.attn.c_attn.weight", "shape": [ 2304, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3538944, "byteOffset": 0 }, { "name": "transformer.h.9.attn.c_attn.bias", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 3538944 }, { "name": "transformer.h.9.attn.c_proj.weight", "shape": [ 768, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 3543552 }, { "name": "transformer.h.9.attn.c_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 4723200 }, { "name": "transformer.h.9.ln_2.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 4724736 }, { "name": "transformer.h.9.ln_2.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 4726272 }, { "name": "transformer.h.9.mlp.c_fc.weight", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 4727808 }, { "name": "transformer.h.9.mlp.c_fc.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 9446400 }, { "name": "transformer.h.9.mlp.c_proj.weight", "shape": [ 768, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 9452544 }, { "name": "transformer.h.9.mlp.c_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 14171136 }, { "name": "transformer.h.10.ln_1.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 14172672 }, { "name": "transformer.h.10.ln_1.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 14174208 }, { "name": "transformer.h.10.attn.c_attn.weight", "shape": [ 2304, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3538944, "byteOffset": 14175744 }, { "name": "transformer.h.10.attn.c_attn.bias", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 17714688 }, { "name": "transformer.h.10.attn.c_proj.weight", "shape": [ 768, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 17719296 }, { "name": "transformer.h.10.attn.c_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 18898944 }, { "name": "transformer.h.10.ln_2.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 18900480 }, { "name": "transformer.h.10.ln_2.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 18902016 }, { "name": "transformer.h.10.mlp.c_fc.weight", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 18903552 }, { "name": "transformer.h.10.mlp.c_fc.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 23622144 }, { "name": "transformer.h.10.mlp.c_proj.weight", "shape": [ 768, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 23628288 }, { "name": "transformer.h.10.mlp.c_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 28346880 }, { "name": "transformer.h.11.ln_1.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 28348416 }, { "name": "transformer.h.11.ln_1.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 28349952 }, { "name": "transformer.h.11.attn.c_attn.weight", "shape": [ 2304, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3538944, "byteOffset": 28351488 }, { "name": "transformer.h.11.attn.c_attn.bias", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 31890432 }, { "name": "transformer.h.11.attn.c_proj.weight", "shape": [ 768, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 31895040 }, { "name": "transformer.h.11.attn.c_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 33074688 }, { "name": "transformer.h.11.ln_2.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 33076224 }, { "name": "transformer.h.11.ln_2.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 33077760 } ], "md5sum": "4f1aa298f8b0af477effc227d22f5ef1" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 9447936, "records": [ { "name": "transformer.h.11.mlp.c_fc.weight", "shape": [ 3072, 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "transformer.h.11.mlp.c_fc.bias", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 4718592 }, { "name": "transformer.h.11.mlp.c_proj.weight", "shape": [ 768, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 4724736 }, { "name": "transformer.h.11.mlp.c_proj.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 9443328 }, { "name": "transformer.ln_f.weight", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 9444864 }, { "name": "transformer.ln_f.bias", "shape": [ 768 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 9446400 } ], "md5sum": "1532478e2b064b31091ff718a8c67188" } ] }