{ "metadata": { "ParamSize": 149, "ParamBytes": 435566592.0, "BitsPerParam": 32.0 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 46881792, "records": [ { "name": "embeddings.word_embeddings.weight", "shape": [ 30522, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 46881792, "byteOffset": 0 } ], "md5sum": "2eeaa439340fec525d791ca37a3dd753" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 30332928, "records": [ { "name": "embeddings.LayerNorm.bias", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 0 }, { "name": "embeddings.LayerNorm.weight", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 1536 }, { "name": "embeddings.position_embeddings.weight", "shape": [ 512, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 3072 }, { "name": "embeddings.token_type_embeddings.weight", "shape": [ 2, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3072, "byteOffset": 789504 }, { "name": "encoder.layer.0.attention.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 792576 }, { "name": "encoder.layer.0.attention.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 794112 }, { "name": "encoder.layer.0.attention.output.dense.bias", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 795648 }, { "name": "encoder.layer.0.attention.output.dense.weight", "shape": [ 768, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 797184 }, { "name": "encoder.layer.0.attention.self.qkv.bias", "shape": [ 2304 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 1976832 }, { "name": "encoder.layer.0.attention.self.qkv.weight", "shape": [ 2304, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3538944, "byteOffset": 1981440 }, { "name": "encoder.layer.0.intermediate.dense.bias", "shape": [ 3072 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 5520384 }, { "name": "encoder.layer.0.intermediate.dense.weight", "shape": [ 3072, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 5526528 }, { "name": "encoder.layer.0.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 10245120 }, { "name": "encoder.layer.0.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 10246656 }, { "name": "encoder.layer.0.output.dense.bias", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 10248192 }, { "name": "encoder.layer.0.output.dense.weight", "shape": [ 768, 3072 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 10249728 }, { "name": "encoder.layer.1.attention.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 14968320 }, { "name": "encoder.layer.1.attention.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 14969856 }, { "name": "encoder.layer.1.attention.output.dense.bias", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 14971392 }, { "name": "encoder.layer.1.attention.output.dense.weight", "shape": [ 768, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 14972928 }, { "name": "encoder.layer.1.attention.self.qkv.bias", "shape": [ 2304 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 16152576 }, { "name": "encoder.layer.1.attention.self.qkv.weight", "shape": [ 2304, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3538944, "byteOffset": 16157184 }, { "name": "encoder.layer.1.intermediate.dense.bias", "shape": [ 3072 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19696128 }, { "name": "encoder.layer.1.intermediate.dense.weight", "shape": [ 3072, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 19702272 }, { "name": "encoder.layer.1.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 24420864 }, { "name": "encoder.layer.1.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 24422400 }, { "name": "encoder.layer.1.output.dense.bias", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 24423936 }, { "name": "encoder.layer.1.output.dense.weight", "shape": [ 768, 3072 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 24425472 }, { "name": "encoder.layer.10.attention.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 29144064 }, { "name": "encoder.layer.10.attention.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 29145600 }, { "name": "encoder.layer.10.attention.output.dense.bias", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 29147136 }, { "name": "encoder.layer.10.attention.output.dense.weight", "shape": [ 768, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 29148672 }, { "name": "encoder.layer.10.attention.self.qkv.bias", "shape": [ 2304 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 30328320 } ], "md5sum": "d19301ea1b244630109761e9a47e8c0f" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 31896576, "records": [ { "name": "encoder.layer.10.attention.self.qkv.weight", "shape": [ 2304, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3538944, "byteOffset": 0 }, { "name": "encoder.layer.10.intermediate.dense.bias", "shape": [ 3072 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 3538944 }, { "name": "encoder.layer.10.intermediate.dense.weight", "shape": [ 3072, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 3545088 }, { "name": "encoder.layer.10.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 8263680 }, { "name": "encoder.layer.10.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 8265216 }, { "name": "encoder.layer.10.output.dense.bias", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 8266752 }, { "name": "encoder.layer.10.output.dense.weight", "shape": [ 768, 3072 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 8268288 }, { "name": "encoder.layer.11.attention.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 12986880 }, { "name": "encoder.layer.11.attention.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 12988416 }, { "name": "encoder.layer.11.attention.output.dense.bias", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 12989952 }, { "name": "encoder.layer.11.attention.output.dense.weight", "shape": [ 768, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 12991488 }, { "name": "encoder.layer.11.attention.self.qkv.bias", "shape": [ 2304 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 14171136 }, { "name": "encoder.layer.11.attention.self.qkv.weight", "shape": [ 2304, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3538944, "byteOffset": 14175744 }, { "name": "encoder.layer.11.intermediate.dense.bias", "shape": [ 3072 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17714688 }, { "name": "encoder.layer.11.intermediate.dense.weight", "shape": [ 3072, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 17720832 }, { "name": "encoder.layer.11.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 22439424 }, { "name": "encoder.layer.11.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 22440960 }, { "name": "encoder.layer.11.output.dense.bias", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 22442496 }, { "name": "encoder.layer.11.output.dense.weight", "shape": [ 768, 3072 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 22444032 }, { "name": "encoder.layer.2.attention.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 27162624 }, { "name": "encoder.layer.2.attention.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 27164160 }, { "name": "encoder.layer.2.attention.output.dense.bias", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 27165696 }, { "name": "encoder.layer.2.attention.output.dense.weight", "shape": [ 768, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 27167232 }, { "name": "encoder.layer.2.attention.self.qkv.bias", "shape": [ 2304 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 28346880 }, { "name": "encoder.layer.2.attention.self.qkv.weight", "shape": [ 2304, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3538944, "byteOffset": 28351488 }, { "name": "encoder.layer.2.intermediate.dense.bias", "shape": [ 3072 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 31890432 } ], "md5sum": "97adefcd2277d459f53c9bf2d25bf264" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 33074688, "records": [ { "name": "encoder.layer.2.intermediate.dense.weight", "shape": [ 3072, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "encoder.layer.2.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 4718592 }, { "name": "encoder.layer.2.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 4720128 }, { "name": "encoder.layer.2.output.dense.bias", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 4721664 }, { "name": "encoder.layer.2.output.dense.weight", "shape": [ 768, 3072 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 4723200 }, { "name": "encoder.layer.3.attention.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 9441792 }, { "name": "encoder.layer.3.attention.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 9443328 }, { "name": "encoder.layer.3.attention.output.dense.bias", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 9444864 }, { "name": "encoder.layer.3.attention.output.dense.weight", "shape": [ 768, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 9446400 }, { "name": "encoder.layer.3.attention.self.qkv.bias", "shape": [ 2304 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 10626048 }, { "name": "encoder.layer.3.attention.self.qkv.weight", "shape": [ 2304, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3538944, "byteOffset": 10630656 }, { "name": "encoder.layer.3.intermediate.dense.bias", "shape": [ 3072 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 14169600 }, { "name": "encoder.layer.3.intermediate.dense.weight", "shape": [ 3072, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 14175744 }, { "name": "encoder.layer.3.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 18894336 }, { "name": "encoder.layer.3.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 18895872 }, { "name": "encoder.layer.3.output.dense.bias", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 18897408 }, { "name": "encoder.layer.3.output.dense.weight", "shape": [ 768, 3072 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 18898944 }, { "name": "encoder.layer.4.attention.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 23617536 }, { "name": "encoder.layer.4.attention.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 23619072 }, { "name": "encoder.layer.4.attention.output.dense.bias", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 23620608 }, { "name": "encoder.layer.4.attention.output.dense.weight", "shape": [ 768, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 23622144 }, { "name": "encoder.layer.4.attention.self.qkv.bias", "shape": [ 2304 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 24801792 }, { "name": "encoder.layer.4.attention.self.qkv.weight", "shape": [ 2304, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3538944, "byteOffset": 24806400 }, { "name": "encoder.layer.4.intermediate.dense.bias", "shape": [ 3072 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 28345344 }, { "name": "encoder.layer.4.intermediate.dense.weight", "shape": [ 3072, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 28351488 }, { "name": "encoder.layer.4.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 33070080 }, { "name": "encoder.layer.4.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 33071616 }, { "name": "encoder.layer.4.output.dense.bias", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 33073152 } ], "md5sum": "e9dd727b06f09c2a5284809b08e3eff9" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 33074688, "records": [ { "name": "encoder.layer.4.output.dense.weight", "shape": [ 768, 3072 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "encoder.layer.5.attention.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 4718592 }, { "name": "encoder.layer.5.attention.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 4720128 }, { "name": "encoder.layer.5.attention.output.dense.bias", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 4721664 }, { "name": "encoder.layer.5.attention.output.dense.weight", "shape": [ 768, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 4723200 }, { "name": "encoder.layer.5.attention.self.qkv.bias", "shape": [ 2304 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 5902848 }, { "name": "encoder.layer.5.attention.self.qkv.weight", "shape": [ 2304, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3538944, "byteOffset": 5907456 }, { "name": "encoder.layer.5.intermediate.dense.bias", "shape": [ 3072 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 9446400 }, { "name": "encoder.layer.5.intermediate.dense.weight", "shape": [ 3072, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 9452544 }, { "name": "encoder.layer.5.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 14171136 }, { "name": "encoder.layer.5.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 14172672 }, { "name": "encoder.layer.5.output.dense.bias", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 14174208 }, { "name": "encoder.layer.5.output.dense.weight", "shape": [ 768, 3072 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 14175744 }, { "name": "encoder.layer.6.attention.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 18894336 }, { "name": "encoder.layer.6.attention.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 18895872 }, { "name": "encoder.layer.6.attention.output.dense.bias", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 18897408 }, { "name": "encoder.layer.6.attention.output.dense.weight", "shape": [ 768, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 18898944 }, { "name": "encoder.layer.6.attention.self.qkv.bias", "shape": [ 2304 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 20078592 }, { "name": "encoder.layer.6.attention.self.qkv.weight", "shape": [ 2304, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3538944, "byteOffset": 20083200 }, { "name": "encoder.layer.6.intermediate.dense.bias", "shape": [ 3072 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 23622144 }, { "name": "encoder.layer.6.intermediate.dense.weight", "shape": [ 3072, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 23628288 }, { "name": "encoder.layer.6.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 28346880 }, { "name": "encoder.layer.6.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 28348416 }, { "name": "encoder.layer.6.output.dense.bias", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 28349952 }, { "name": "encoder.layer.6.output.dense.weight", "shape": [ 768, 3072 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 28351488 }, { "name": "encoder.layer.7.attention.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 33070080 }, { "name": "encoder.layer.7.attention.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 33071616 }, { "name": "encoder.layer.7.attention.output.dense.bias", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 33073152 } ], "md5sum": "9fbad31fed16b934acb1158c464c9774" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 33080832, "records": [ { "name": "encoder.layer.7.attention.output.dense.weight", "shape": [ 768, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 0 }, { "name": "encoder.layer.7.attention.self.qkv.bias", "shape": [ 2304 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 1179648 }, { "name": "encoder.layer.7.attention.self.qkv.weight", "shape": [ 2304, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3538944, "byteOffset": 1184256 }, { "name": "encoder.layer.7.intermediate.dense.bias", "shape": [ 3072 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 4723200 }, { "name": "encoder.layer.7.intermediate.dense.weight", "shape": [ 3072, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 4729344 }, { "name": "encoder.layer.7.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 9447936 }, { "name": "encoder.layer.7.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 9449472 }, { "name": "encoder.layer.7.output.dense.bias", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 9451008 }, { "name": "encoder.layer.7.output.dense.weight", "shape": [ 768, 3072 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 9452544 }, { "name": "encoder.layer.8.attention.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 14171136 }, { "name": "encoder.layer.8.attention.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 14172672 }, { "name": "encoder.layer.8.attention.output.dense.bias", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 14174208 }, { "name": "encoder.layer.8.attention.output.dense.weight", "shape": [ 768, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 14175744 }, { "name": "encoder.layer.8.attention.self.qkv.bias", "shape": [ 2304 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 15355392 }, { "name": "encoder.layer.8.attention.self.qkv.weight", "shape": [ 2304, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3538944, "byteOffset": 15360000 }, { "name": "encoder.layer.8.intermediate.dense.bias", "shape": [ 3072 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 18898944 }, { "name": "encoder.layer.8.intermediate.dense.weight", "shape": [ 3072, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 18905088 }, { "name": "encoder.layer.8.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 23623680 }, { "name": "encoder.layer.8.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 23625216 }, { "name": "encoder.layer.8.output.dense.bias", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 23626752 }, { "name": "encoder.layer.8.output.dense.weight", "shape": [ 768, 3072 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 23628288 }, { "name": "encoder.layer.9.attention.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 28346880 }, { "name": "encoder.layer.9.attention.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 28348416 }, { "name": "encoder.layer.9.attention.output.dense.bias", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 28349952 }, { "name": "encoder.layer.9.attention.output.dense.weight", "shape": [ 768, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 28351488 }, { "name": "encoder.layer.9.attention.self.qkv.bias", "shape": [ 2304 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 29531136 }, { "name": "encoder.layer.9.attention.self.qkv.weight", "shape": [ 2304, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 3538944, "byteOffset": 29535744 }, { "name": "encoder.layer.9.intermediate.dense.bias", "shape": [ 3072 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 33074688 } ], "md5sum": "7a49f1f4cfd562bb32cb568fc622b50e" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 9441792, "records": [ { "name": "encoder.layer.9.intermediate.dense.weight", "shape": [ 3072, 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "encoder.layer.9.output.LayerNorm.bias", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 4718592 }, { "name": "encoder.layer.9.output.LayerNorm.weight", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 4720128 }, { "name": "encoder.layer.9.output.dense.bias", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 4721664 }, { "name": "encoder.layer.9.output.dense.weight", "shape": [ 768, 3072 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 4723200 } ], "md5sum": "2d4a5c41b3ca6eab2b402b5731cbc3cf" } ] }