{ "metadata": { "ParamSize": 867, "ParamBytes": 3198013908.0, "BitsPerParam": 3.62205357303362 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 52841472, "records": [ { "name": "language_model.lm_head.q_weight", "shape": [ 412, 32064 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52841472, "byteOffset": 0 } ], "md5sum": "84586fb03baf274ab84216d4210df870" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "language_model.model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 412, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "afd67ee58f41becaaa6253875f403836" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 18087936, "records": [ { "name": "language_model.model.layers.23.mlp.down_proj.q_weight", "shape": [ 1104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 0 } ], "md5sum": "64148bf5de51a4329541b84403fe6848" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 31513984, "records": [ { "name": "language_model.lm_head.q_scale", "shape": [ 103, 32064 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6605184, "byteOffset": 0 }, { "name": "language_model.model.layers.22.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 6605184 }, { "name": "language_model.model.layers.22.mlp.down_proj.q_weight", "shape": [ 1104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 6613376 }, { "name": "language_model.model.layers.22.mlp.down_proj.q_scale", "shape": [ 276, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 24701312 }, { "name": "language_model.model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 103, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 26962304 }, { "name": "language_model.model.layers.22.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 31497600 }, { "name": "language_model.model.layers.23.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 31505792 } ], "md5sum": "2e46b3c9297a05221339f4bce9b6ff2f" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "language_model.model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 412, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "b0bb14b68930b739d51e0c259c04ea95" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 29586432, "records": [ { "name": "language_model.model.layers.23.mlp.down_proj.q_scale", "shape": [ 276, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 0 }, { "name": "language_model.model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 103, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 2260992 }, { "name": "language_model.model.layers.23.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 6796288 }, { "name": "language_model.model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 412, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 6804480 }, { "name": "language_model.model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 103, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 27055104 } ], "md5sum": "0dd50843bf49f2c7fffbdf89a8e1815b" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "language_model.model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 412, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "2965bbf59e3becf0b754f36d41c6defd" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 20250624, "records": [ { "name": "language_model.model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 412, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 0 } ], "md5sum": "858243d37e53ace4418aa82068616c1c" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 32494592, "records": [ { "name": "language_model.model.layers.23.self_attn.o_proj.q_weight", "shape": [ 412, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 0 }, { "name": "language_model.model.layers.23.self_attn.o_proj.q_scale", "shape": [ 103, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 6750208 }, { "name": "language_model.model.layers.24.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7593984 }, { "name": "language_model.model.layers.24.mlp.down_proj.q_weight", "shape": [ 1104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 7602176 }, { "name": "language_model.model.layers.24.mlp.down_proj.q_scale", "shape": [ 276, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 25690112 }, { "name": "language_model.model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 103, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27951104 }, { "name": "language_model.model.layers.24.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32486400 } ], "md5sum": "6c3282b551bc8542707fb8a377f3f20a" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "language_model.model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 412, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "5aba10f2118008807ca368ea3a237961" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 30482432, "records": [ { "name": "language_model.model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 103, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 0 }, { "name": "language_model.model.layers.24.self_attn.o_proj.q_weight", "shape": [ 412, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 2531328 }, { "name": "language_model.model.layers.24.self_attn.o_proj.q_scale", "shape": [ 103, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 9281536 }, { "name": "language_model.model.layers.25.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10125312 }, { "name": "language_model.model.layers.25.mlp.down_proj.q_weight", "shape": [ 1104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 10133504 }, { "name": "language_model.model.layers.25.mlp.down_proj.q_scale", "shape": [ 276, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 28221440 } ], "md5sum": "cae14fff83a6c849e66dbdc430ed8b7d" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 27325440, "records": [ { "name": "language_model.model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 103, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 0 }, { "name": "language_model.model.layers.25.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4535296 }, { "name": "language_model.model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 412, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 4543488 }, { "name": "language_model.model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 103, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 24794112 } ], "md5sum": "9d5b7ed5d6e3e0a089fbeb0084e205e9" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "language_model.model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 412, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "320f00dc6c453b6b95fc212b17fde4a5" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 20250624, "records": [ { "name": "language_model.model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 412, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 0 } ], "md5sum": "5ca33785808174c74632a9683fc8279c" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 32494592, "records": [ { "name": "language_model.model.layers.25.self_attn.o_proj.q_weight", "shape": [ 412, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 0 }, { "name": "language_model.model.layers.25.self_attn.o_proj.q_scale", "shape": [ 103, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 6750208 }, { "name": "language_model.model.layers.26.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7593984 }, { "name": "language_model.model.layers.26.mlp.down_proj.q_weight", "shape": [ 1104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 7602176 }, { "name": "language_model.model.layers.26.mlp.down_proj.q_scale", "shape": [ 276, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 25690112 }, { "name": "language_model.model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 103, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27951104 }, { "name": "language_model.model.layers.26.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32486400 } ], "md5sum": "9f512026f3ba68b069fa27a1d4575937" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "language_model.model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 412, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "adc4f43774e2345c95f46a91433538ba" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 30482432, "records": [ { "name": "language_model.model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 103, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 0 }, { "name": "language_model.model.layers.26.self_attn.o_proj.q_weight", "shape": [ 412, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 2531328 }, { "name": "language_model.model.layers.26.self_attn.o_proj.q_scale", "shape": [ 103, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 9281536 }, { "name": "language_model.model.layers.27.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10125312 }, { "name": "language_model.model.layers.27.mlp.down_proj.q_weight", "shape": [ 1104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 10133504 }, { "name": "language_model.model.layers.27.mlp.down_proj.q_scale", "shape": [ 276, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 28221440 } ], "md5sum": "8e28aaddaef44482ed75695a368dca54" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 27325440, "records": [ { "name": "language_model.model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 103, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 0 }, { "name": "language_model.model.layers.27.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4535296 }, { "name": "language_model.model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 412, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 4543488 }, { "name": "language_model.model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 103, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 24794112 } ], "md5sum": "ddd9d835386f3de49245e450f80db04c" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "language_model.model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 412, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "12d99077498df359756789870e2e59e6" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 20250624, "records": [ { "name": "language_model.model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 412, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 0 } ], "md5sum": "4240db178db1b855bced821195303e09" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 32494592, "records": [ { "name": "language_model.model.layers.27.self_attn.o_proj.q_weight", "shape": [ 412, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 0 }, { "name": "language_model.model.layers.27.self_attn.o_proj.q_scale", "shape": [ 103, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 6750208 }, { "name": "language_model.model.layers.28.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7593984 }, { "name": "language_model.model.layers.28.mlp.down_proj.q_weight", "shape": [ 1104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 7602176 }, { "name": "language_model.model.layers.28.mlp.down_proj.q_scale", "shape": [ 276, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 25690112 }, { "name": "language_model.model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 103, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27951104 }, { "name": "language_model.model.layers.28.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32486400 } ], "md5sum": "28831ab4ab13fd9dcf9f3109a9cb00eb" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "language_model.model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 412, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "19737e3b4edd297bf71808e686603699" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 30482432, "records": [ { "name": "language_model.model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 103, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 0 }, { "name": "language_model.model.layers.28.self_attn.o_proj.q_weight", "shape": [ 412, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 2531328 }, { "name": "language_model.model.layers.28.self_attn.o_proj.q_scale", "shape": [ 103, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 9281536 }, { "name": "language_model.model.layers.29.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10125312 }, { "name": "language_model.model.layers.29.mlp.down_proj.q_weight", "shape": [ 1104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 10133504 }, { "name": "language_model.model.layers.29.mlp.down_proj.q_scale", "shape": [ 276, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 28221440 } ], "md5sum": "745f78ebdd457563839d50bc0ea39723" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 27325440, "records": [ { "name": "language_model.model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 103, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 0 }, { "name": "language_model.model.layers.29.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4535296 }, { "name": "language_model.model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 412, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 4543488 }, { "name": "language_model.model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 103, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 24794112 } ], "md5sum": "0f7e4e59a1569bab66b1099800689197" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "language_model.model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 412, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "d29ba45c8a3b5bc6e551a560eac65de0" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 20250624, "records": [ { "name": "language_model.model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 412, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 0 } ], "md5sum": "bda71624d94172d425205b540e405928" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 32494592, "records": [ { "name": "language_model.model.layers.29.self_attn.o_proj.q_weight", "shape": [ 412, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 0 }, { "name": "language_model.model.layers.29.self_attn.o_proj.q_scale", "shape": [ 103, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 6750208 }, { "name": "language_model.model.layers.30.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7593984 }, { "name": "language_model.model.layers.30.mlp.down_proj.q_weight", "shape": [ 1104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 7602176 }, { "name": "language_model.model.layers.30.mlp.down_proj.q_scale", "shape": [ 276, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 25690112 }, { "name": "language_model.model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 103, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27951104 }, { "name": "language_model.model.layers.30.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32486400 } ], "md5sum": "eaec2a9c00d542f984b1270100e16e71" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "language_model.model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 412, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "5ec7f96904ffca4c09f4ef370f5c5eb4" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 30482432, "records": [ { "name": "language_model.model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 103, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 0 }, { "name": "language_model.model.layers.30.self_attn.o_proj.q_weight", "shape": [ 412, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 2531328 }, { "name": "language_model.model.layers.30.self_attn.o_proj.q_scale", "shape": [ 103, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 9281536 }, { "name": "language_model.model.layers.31.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10125312 }, { "name": "language_model.model.layers.31.mlp.down_proj.q_weight", "shape": [ 1104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 10133504 }, { "name": "language_model.model.layers.31.mlp.down_proj.q_scale", "shape": [ 276, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 28221440 } ], "md5sum": "bc24847b934b064ebeb077db72853b3a" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 27325440, "records": [ { "name": "language_model.model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 103, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 0 }, { "name": "language_model.model.layers.31.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4535296 }, { "name": "language_model.model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 412, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 4543488 }, { "name": "language_model.model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 103, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 24794112 } ], "md5sum": "00210150b070126fac197899ed07f02c" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 52841472, "records": [ { "name": "language_model.model.embed_tokens.q_weight", "shape": [ 32064, 412 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 52841472, "byteOffset": 0 } ], "md5sum": "b5ec2ae494d69c772ae124fb0c18cc75" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 32303488, "records": [ { "name": "language_model.model.layers.31.self_attn.o_proj.q_weight", "shape": [ 412, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 0 }, { "name": "language_model.model.layers.31.self_attn.o_proj.q_scale", "shape": [ 103, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 6750208 }, { "name": "language_model.model.norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7593984 }, { "name": "language_model.model.embed_tokens.q_scale", "shape": [ 32064, 103 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6605184, "byteOffset": 7602176 }, { "name": "language_model.model.layers.0.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 14207360 }, { "name": "language_model.model.layers.0.mlp.down_proj.q_weight", "shape": [ 1104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 14215552 } ], "md5sum": "296efbe1e60eeccc031937a0bccd984d" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "language_model.model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 412, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "968ce20f6c2d3507ab5ef8e8af5ac8ce" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 29586432, "records": [ { "name": "language_model.model.layers.0.mlp.down_proj.q_scale", "shape": [ 276, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 0 }, { "name": "language_model.model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 103, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 2260992 }, { "name": "language_model.model.layers.0.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 6796288 }, { "name": "language_model.model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 412, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 6804480 }, { "name": "language_model.model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 103, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 27055104 } ], "md5sum": "5b8a676c66844993602e11f2fa174d36" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "language_model.model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 412, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "4c8f32cefbd14740b3e040cba2bc464a" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 20250624, "records": [ { "name": "language_model.model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 412, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 0 } ], "md5sum": "f2e53ef99b3d90b8f9fbc5206312607a" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 32494592, "records": [ { "name": "language_model.model.layers.0.self_attn.o_proj.q_weight", "shape": [ 412, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 0 }, { "name": "language_model.model.layers.0.self_attn.o_proj.q_scale", "shape": [ 103, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 6750208 }, { "name": "language_model.model.layers.1.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7593984 }, { "name": "language_model.model.layers.1.mlp.down_proj.q_weight", "shape": [ 1104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 7602176 }, { "name": "language_model.model.layers.1.mlp.down_proj.q_scale", "shape": [ 276, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 25690112 }, { "name": "language_model.model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 103, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27951104 }, { "name": "language_model.model.layers.1.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32486400 } ], "md5sum": "33cb2c86114298eb22c62b6c53c13541" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 18087936, "records": [ { "name": "language_model.model.layers.2.mlp.down_proj.q_weight", "shape": [ 1104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 0 } ], "md5sum": "80e5985c0ead988c62c8e97f9368da27" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 32915456, "records": [ { "name": "language_model.model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 103, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 0 }, { "name": "language_model.model.layers.1.self_attn.o_proj.q_weight", "shape": [ 412, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 2531328 }, { "name": "language_model.model.layers.1.self_attn.o_proj.q_scale", "shape": [ 103, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 9281536 }, { "name": "language_model.model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 412, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 10125312 }, { "name": "language_model.model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 103, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 30375936 }, { "name": "language_model.model.layers.2.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32907264 } ], "md5sum": "12b18ffaa04c58a32ca2e3d95b5b2ae1" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "language_model.model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 412, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "a261a2e7fb3b8898d0742caa9da5cd81" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 29586432, "records": [ { "name": "language_model.model.layers.2.mlp.down_proj.q_scale", "shape": [ 276, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 0 }, { "name": "language_model.model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 103, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 2260992 }, { "name": "language_model.model.layers.2.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 6796288 }, { "name": "language_model.model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 412, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 6804480 }, { "name": "language_model.model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 103, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 27055104 } ], "md5sum": "c7fca225f0f994560a3314f5a484f277" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "language_model.model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 412, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "4327fe72304bc4604d83ab3b9d7310ea" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 20250624, "records": [ { "name": "language_model.model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 412, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 0 } ], "md5sum": "03716c16f46e959e579ae88f445b6aee" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 32494592, "records": [ { "name": "language_model.model.layers.2.self_attn.o_proj.q_weight", "shape": [ 412, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 0 }, { "name": "language_model.model.layers.2.self_attn.o_proj.q_scale", "shape": [ 103, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 6750208 }, { "name": "language_model.model.layers.3.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7593984 }, { "name": "language_model.model.layers.3.mlp.down_proj.q_weight", "shape": [ 1104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 7602176 }, { "name": "language_model.model.layers.3.mlp.down_proj.q_scale", "shape": [ 276, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 25690112 }, { "name": "language_model.model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 103, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27951104 }, { "name": "language_model.model.layers.3.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32486400 } ], "md5sum": "d3165ac3f44eff0cf2b7be2945a9995f" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "language_model.model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 412, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "8ed449cf1dd5d62a6ca127e055260714" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 30482432, "records": [ { "name": "language_model.model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 103, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 0 }, { "name": "language_model.model.layers.3.self_attn.o_proj.q_weight", "shape": [ 412, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 2531328 }, { "name": "language_model.model.layers.3.self_attn.o_proj.q_scale", "shape": [ 103, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 9281536 }, { "name": "language_model.model.layers.4.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10125312 }, { "name": "language_model.model.layers.4.mlp.down_proj.q_weight", "shape": [ 1104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 10133504 }, { "name": "language_model.model.layers.4.mlp.down_proj.q_scale", "shape": [ 276, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 28221440 } ], "md5sum": "a955140d589171509af6b8b20eb1274c" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 27325440, "records": [ { "name": "language_model.model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 103, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 0 }, { "name": "language_model.model.layers.4.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4535296 }, { "name": "language_model.model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 412, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 4543488 }, { "name": "language_model.model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 103, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 24794112 } ], "md5sum": "817f16b91cd6c0c3c94c6fa134d81a14" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "language_model.model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 412, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "b6d99b9f1ed12f60ada440d465ed940d" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 20250624, "records": [ { "name": "language_model.model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 412, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 0 } ], "md5sum": "9245dfc619dd8ecbb8826af72c24447f" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 32494592, "records": [ { "name": "language_model.model.layers.4.self_attn.o_proj.q_weight", "shape": [ 412, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 0 }, { "name": "language_model.model.layers.4.self_attn.o_proj.q_scale", "shape": [ 103, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 6750208 }, { "name": "language_model.model.layers.5.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7593984 }, { "name": "language_model.model.layers.5.mlp.down_proj.q_weight", "shape": [ 1104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 7602176 }, { "name": "language_model.model.layers.5.mlp.down_proj.q_scale", "shape": [ 276, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 25690112 }, { "name": "language_model.model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 103, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27951104 }, { "name": "language_model.model.layers.5.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32486400 } ], "md5sum": "49cfe0a02c39da348644384b365377b3" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "language_model.model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 412, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "a708d8eb035a14f04d48a18d39ebbd97" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 30482432, "records": [ { "name": "language_model.model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 103, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 0 }, { "name": "language_model.model.layers.5.self_attn.o_proj.q_weight", "shape": [ 412, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 2531328 }, { "name": "language_model.model.layers.5.self_attn.o_proj.q_scale", "shape": [ 103, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 9281536 }, { "name": "language_model.model.layers.6.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10125312 }, { "name": "language_model.model.layers.6.mlp.down_proj.q_weight", "shape": [ 1104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 10133504 }, { "name": "language_model.model.layers.6.mlp.down_proj.q_scale", "shape": [ 276, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 28221440 } ], "md5sum": "d855e9320ab4c99b60a115924b5cde9e" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 27325440, "records": [ { "name": "language_model.model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 103, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 0 }, { "name": "language_model.model.layers.6.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4535296 }, { "name": "language_model.model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 412, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 4543488 }, { "name": "language_model.model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 103, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 24794112 } ], "md5sum": "b51bfdf6cab8d1c41df5bb28339ec840" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "language_model.model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 412, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "8f93557b1fc8b44bda42fb1bc017f5bf" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 20250624, "records": [ { "name": "language_model.model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 412, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 0 } ], "md5sum": "cdc4402c1b375e2bc0e23311e8cc4149" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 32494592, "records": [ { "name": "language_model.model.layers.6.self_attn.o_proj.q_weight", "shape": [ 412, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 0 }, { "name": "language_model.model.layers.6.self_attn.o_proj.q_scale", "shape": [ 103, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 6750208 }, { "name": "language_model.model.layers.7.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7593984 }, { "name": "language_model.model.layers.7.mlp.down_proj.q_weight", "shape": [ 1104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 7602176 }, { "name": "language_model.model.layers.7.mlp.down_proj.q_scale", "shape": [ 276, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 25690112 }, { "name": "language_model.model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 103, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27951104 }, { "name": "language_model.model.layers.7.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32486400 } ], "md5sum": "1fb4c3389fad180ace88d730cd6f670a" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "language_model.model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 412, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "650323dcfd69f931bce73c0832800898" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 30482432, "records": [ { "name": "language_model.model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 103, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 0 }, { "name": "language_model.model.layers.7.self_attn.o_proj.q_weight", "shape": [ 412, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 2531328 }, { "name": "language_model.model.layers.7.self_attn.o_proj.q_scale", "shape": [ 103, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 9281536 }, { "name": "language_model.model.layers.8.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10125312 }, { "name": "language_model.model.layers.8.mlp.down_proj.q_weight", "shape": [ 1104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 10133504 }, { "name": "language_model.model.layers.8.mlp.down_proj.q_scale", "shape": [ 276, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 28221440 } ], "md5sum": "6c6a2ad9c1a7d6c6f7f08e460e535c6b" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 27325440, "records": [ { "name": "language_model.model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 103, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 0 }, { "name": "language_model.model.layers.8.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4535296 }, { "name": "language_model.model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 412, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 4543488 }, { "name": "language_model.model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 103, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 24794112 } ], "md5sum": "e026cca7771d92ae14a82d235bb41300" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "language_model.model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 412, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "b73a429a22af62734d63cb663fdb91dc" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 20250624, "records": [ { "name": "language_model.model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 412, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 0 } ], "md5sum": "6e807fd999ed090c41ea14dec5b8cb8f" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 32494592, "records": [ { "name": "language_model.model.layers.8.self_attn.o_proj.q_weight", "shape": [ 412, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 0 }, { "name": "language_model.model.layers.8.self_attn.o_proj.q_scale", "shape": [ 103, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 6750208 }, { "name": "language_model.model.layers.9.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7593984 }, { "name": "language_model.model.layers.9.mlp.down_proj.q_weight", "shape": [ 1104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 7602176 }, { "name": "language_model.model.layers.9.mlp.down_proj.q_scale", "shape": [ 276, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 25690112 }, { "name": "language_model.model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 103, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27951104 }, { "name": "language_model.model.layers.9.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32486400 } ], "md5sum": "3e36c7e3c2ac27d5484ee945f81ad9d5" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 32663252, "records": [ { "name": "language_model.model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 103, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 0 }, { "name": "language_model.model.layers.9.self_attn.o_proj.q_weight", "shape": [ 412, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 2531328 }, { "name": "language_model.model.layers.9.self_attn.o_proj.q_scale", "shape": [ 103, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 9281536 }, { "name": "multi_modal_projector.linear_1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10125312 }, { "name": "multi_modal_projector.linear_1.q_weight", "shape": [ 104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 10133504 }, { "name": "multi_modal_projector.linear_1.q_scale", "shape": [ 26, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 212992, "byteOffset": 11837440 }, { "name": "multi_modal_projector.linear_2.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 12050432 }, { "name": "multi_modal_projector.linear_2.q_weight", "shape": [ 412, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 12058624 }, { "name": "multi_modal_projector.linear_2.q_scale", "shape": [ 103, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 18808832 }, { "name": "vision_tower.vision_model.embeddings.class_embedding", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 19652608 }, { "name": "vision_tower.vision_model.embeddings.patch_embedding.weight", "shape": [ 1024, 3, 14, 14 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1204224, "byteOffset": 19654656 }, { "name": "vision_tower.vision_model.embeddings.position_embedding.q_weight", "shape": [ 577, 104 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 240032, "byteOffset": 20858880 }, { "name": "vision_tower.vision_model.embeddings.position_embedding.q_scale", "shape": [ 577, 26 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 30004, "byteOffset": 21098912 }, { "name": "vision_tower.vision_model.encoder.layers.0.layer_norm1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 21128916 }, { "name": "vision_tower.vision_model.encoder.layers.0.layer_norm1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 21130964 }, { "name": "vision_tower.vision_model.encoder.layers.0.layer_norm2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 21133012 }, { "name": "vision_tower.vision_model.encoder.layers.0.layer_norm2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 21135060 }, { "name": "vision_tower.vision_model.encoder.layers.0.mlp.fc1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21137108 }, { "name": "vision_tower.vision_model.encoder.layers.0.mlp.fc1.q_weight", "shape": [ 104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 21145300 }, { "name": "vision_tower.vision_model.encoder.layers.0.mlp.fc1.q_scale", "shape": [ 26, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 212992, "byteOffset": 22849236 }, { "name": "vision_tower.vision_model.encoder.layers.0.mlp.fc2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 23062228 }, { "name": "vision_tower.vision_model.encoder.layers.0.mlp.fc2.q_weight", "shape": [ 412, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1687552, "byteOffset": 23064276 }, { "name": "vision_tower.vision_model.encoder.layers.0.mlp.fc2.q_scale", "shape": [ 103, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 210944, "byteOffset": 24751828 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 24962772 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 24964820 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 25390804 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25444052 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 25446100 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 25872084 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25925332 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 25927380 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 26353364 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 26406612 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 26408660 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 26834644 }, { "name": "vision_tower.vision_model.encoder.layers.1.layer_norm1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 26887892 }, { "name": "vision_tower.vision_model.encoder.layers.1.layer_norm1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 26889940 }, { "name": "vision_tower.vision_model.encoder.layers.1.layer_norm2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 26891988 }, { "name": "vision_tower.vision_model.encoder.layers.1.layer_norm2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 26894036 }, { "name": "vision_tower.vision_model.encoder.layers.1.mlp.fc1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26896084 }, { "name": "vision_tower.vision_model.encoder.layers.1.mlp.fc1.q_weight", "shape": [ 104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 26904276 }, { "name": "vision_tower.vision_model.encoder.layers.1.mlp.fc1.q_scale", "shape": [ 26, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 212992, "byteOffset": 28608212 }, { "name": "vision_tower.vision_model.encoder.layers.1.mlp.fc2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 28821204 }, { "name": "vision_tower.vision_model.encoder.layers.1.mlp.fc2.q_weight", "shape": [ 412, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1687552, "byteOffset": 28823252 }, { "name": "vision_tower.vision_model.encoder.layers.1.mlp.fc2.q_scale", "shape": [ 103, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 210944, "byteOffset": 30510804 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 30721748 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 30723796 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 31149780 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 31203028 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 31205076 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 31631060 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 31684308 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 31686356 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 32112340 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 32165588 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 32167636 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 32593620 }, { "name": "vision_tower.vision_model.encoder.layers.10.layer_norm1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 32646868 }, { "name": "vision_tower.vision_model.encoder.layers.10.layer_norm1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 32648916 }, { "name": "vision_tower.vision_model.encoder.layers.10.layer_norm2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 32650964 }, { "name": "vision_tower.vision_model.encoder.layers.10.layer_norm2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 32653012 }, { "name": "vision_tower.vision_model.encoder.layers.10.mlp.fc1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32655060 } ], "md5sum": "49285adf00fa5d96587b32ee49347761" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 33521664, "records": [ { "name": "vision_tower.vision_model.encoder.layers.10.mlp.fc1.q_weight", "shape": [ 104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 0 }, { "name": "vision_tower.vision_model.encoder.layers.10.mlp.fc1.q_scale", "shape": [ 26, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 212992, "byteOffset": 1703936 }, { "name": "vision_tower.vision_model.encoder.layers.10.mlp.fc2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 1916928 }, { "name": "vision_tower.vision_model.encoder.layers.10.mlp.fc2.q_weight", "shape": [ 412, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1687552, "byteOffset": 1918976 }, { "name": "vision_tower.vision_model.encoder.layers.10.mlp.fc2.q_scale", "shape": [ 103, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 210944, "byteOffset": 3606528 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 3817472 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 3819520 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 4245504 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 4298752 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 4300800 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 4726784 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 4780032 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 4782080 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 5208064 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 5261312 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 5263360 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 5689344 }, { "name": "vision_tower.vision_model.encoder.layers.11.layer_norm1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 5742592 }, { "name": "vision_tower.vision_model.encoder.layers.11.layer_norm1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 5744640 }, { "name": "vision_tower.vision_model.encoder.layers.11.layer_norm2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 5746688 }, { "name": "vision_tower.vision_model.encoder.layers.11.layer_norm2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 5748736 }, { "name": "vision_tower.vision_model.encoder.layers.11.mlp.fc1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 5750784 }, { "name": "vision_tower.vision_model.encoder.layers.11.mlp.fc1.q_weight", "shape": [ 104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 5758976 }, { "name": "vision_tower.vision_model.encoder.layers.11.mlp.fc1.q_scale", "shape": [ 26, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 212992, "byteOffset": 7462912 }, { "name": "vision_tower.vision_model.encoder.layers.11.mlp.fc2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 7675904 }, { "name": "vision_tower.vision_model.encoder.layers.11.mlp.fc2.q_weight", "shape": [ 412, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1687552, "byteOffset": 7677952 }, { "name": "vision_tower.vision_model.encoder.layers.11.mlp.fc2.q_scale", "shape": [ 103, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 210944, "byteOffset": 9365504 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 9576448 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 9578496 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 10004480 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 10057728 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 10059776 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 10485760 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 10539008 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 10541056 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 10967040 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 11020288 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 11022336 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 11448320 }, { "name": "vision_tower.vision_model.encoder.layers.12.layer_norm1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 11501568 }, { "name": "vision_tower.vision_model.encoder.layers.12.layer_norm1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 11503616 }, { "name": "vision_tower.vision_model.encoder.layers.12.layer_norm2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 11505664 }, { "name": "vision_tower.vision_model.encoder.layers.12.layer_norm2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 11507712 }, { "name": "vision_tower.vision_model.encoder.layers.12.mlp.fc1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 11509760 }, { "name": "vision_tower.vision_model.encoder.layers.12.mlp.fc1.q_weight", "shape": [ 104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 11517952 }, { "name": "vision_tower.vision_model.encoder.layers.12.mlp.fc1.q_scale", "shape": [ 26, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 212992, "byteOffset": 13221888 }, { "name": "vision_tower.vision_model.encoder.layers.12.mlp.fc2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 13434880 }, { "name": "vision_tower.vision_model.encoder.layers.12.mlp.fc2.q_weight", "shape": [ 412, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1687552, "byteOffset": 13436928 }, { "name": "vision_tower.vision_model.encoder.layers.12.mlp.fc2.q_scale", "shape": [ 103, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 210944, "byteOffset": 15124480 }, { "name": "vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 15335424 }, { "name": "vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 15337472 }, { "name": "vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 15763456 }, { "name": "vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 15816704 }, { "name": "vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 15818752 }, { "name": "vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 16244736 }, { "name": "vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16297984 }, { "name": "vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 16300032 }, { "name": "vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 16726016 }, { "name": "vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16779264 }, { "name": "vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 16781312 }, { "name": "vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 17207296 }, { "name": "vision_tower.vision_model.encoder.layers.13.layer_norm1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 17260544 }, { "name": "vision_tower.vision_model.encoder.layers.13.layer_norm1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 17262592 }, { "name": "vision_tower.vision_model.encoder.layers.13.layer_norm2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 17264640 }, { "name": "vision_tower.vision_model.encoder.layers.13.layer_norm2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 17266688 }, { "name": "vision_tower.vision_model.encoder.layers.13.mlp.fc1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 17268736 }, { "name": "vision_tower.vision_model.encoder.layers.13.mlp.fc1.q_weight", "shape": [ 104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 17276928 }, { "name": "vision_tower.vision_model.encoder.layers.13.mlp.fc1.q_scale", "shape": [ 26, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 212992, "byteOffset": 18980864 }, { "name": "vision_tower.vision_model.encoder.layers.13.mlp.fc2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 19193856 }, { "name": "vision_tower.vision_model.encoder.layers.13.mlp.fc2.q_weight", "shape": [ 412, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1687552, "byteOffset": 19195904 }, { "name": "vision_tower.vision_model.encoder.layers.13.mlp.fc2.q_scale", "shape": [ 103, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 210944, "byteOffset": 20883456 }, { "name": "vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 21094400 }, { "name": "vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 21096448 }, { "name": "vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 21522432 }, { "name": "vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 21575680 }, { "name": "vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 21577728 }, { "name": "vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 22003712 }, { "name": "vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 22056960 }, { "name": "vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 22059008 }, { "name": "vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 22484992 }, { "name": "vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 22538240 }, { "name": "vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 22540288 }, { "name": "vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 22966272 }, { "name": "vision_tower.vision_model.encoder.layers.14.layer_norm1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 23019520 }, { "name": "vision_tower.vision_model.encoder.layers.14.layer_norm1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 23021568 }, { "name": "vision_tower.vision_model.encoder.layers.14.layer_norm2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 23023616 }, { "name": "vision_tower.vision_model.encoder.layers.14.layer_norm2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 23025664 }, { "name": "vision_tower.vision_model.encoder.layers.14.mlp.fc1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 23027712 }, { "name": "vision_tower.vision_model.encoder.layers.14.mlp.fc1.q_weight", "shape": [ 104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 23035904 }, { "name": "vision_tower.vision_model.encoder.layers.14.mlp.fc1.q_scale", "shape": [ 26, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 212992, "byteOffset": 24739840 }, { "name": "vision_tower.vision_model.encoder.layers.14.mlp.fc2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 24952832 }, { "name": "vision_tower.vision_model.encoder.layers.14.mlp.fc2.q_weight", "shape": [ 412, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1687552, "byteOffset": 24954880 }, { "name": "vision_tower.vision_model.encoder.layers.14.mlp.fc2.q_scale", "shape": [ 103, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 210944, "byteOffset": 26642432 }, { "name": "vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 26853376 }, { "name": "vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 26855424 }, { "name": "vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 27281408 }, { "name": "vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 27334656 }, { "name": "vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 27336704 }, { "name": "vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 27762688 }, { "name": "vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 27815936 }, { "name": "vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 27817984 }, { "name": "vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 28243968 }, { "name": "vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 28297216 }, { "name": "vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 28299264 }, { "name": "vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 28725248 }, { "name": "vision_tower.vision_model.encoder.layers.15.layer_norm1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 28778496 }, { "name": "vision_tower.vision_model.encoder.layers.15.layer_norm1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 28780544 }, { "name": "vision_tower.vision_model.encoder.layers.15.layer_norm2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 28782592 }, { "name": "vision_tower.vision_model.encoder.layers.15.layer_norm2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 28784640 }, { "name": "vision_tower.vision_model.encoder.layers.15.mlp.fc1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 28786688 }, { "name": "vision_tower.vision_model.encoder.layers.15.mlp.fc1.q_weight", "shape": [ 104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 28794880 }, { "name": "vision_tower.vision_model.encoder.layers.15.mlp.fc1.q_scale", "shape": [ 26, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 212992, "byteOffset": 30498816 }, { "name": "vision_tower.vision_model.encoder.layers.15.mlp.fc2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 30711808 }, { "name": "vision_tower.vision_model.encoder.layers.15.mlp.fc2.q_weight", "shape": [ 412, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1687552, "byteOffset": 30713856 }, { "name": "vision_tower.vision_model.encoder.layers.15.mlp.fc2.q_scale", "shape": [ 103, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 210944, "byteOffset": 32401408 }, { "name": "vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 32612352 }, { "name": "vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 32614400 }, { "name": "vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 33040384 }, { "name": "vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 33093632 }, { "name": "vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 33095680 } ], "md5sum": "ad5ee18d8ea518a57bf5f83caf74ea42" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 33433600, "records": [ { "name": "vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 0 }, { "name": "vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 53248 }, { "name": "vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 55296 }, { "name": "vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 481280 }, { "name": "vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 534528 }, { "name": "vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 536576 }, { "name": "vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 962560 }, { "name": "vision_tower.vision_model.encoder.layers.16.layer_norm1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 1015808 }, { "name": "vision_tower.vision_model.encoder.layers.16.layer_norm1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 1017856 }, { "name": "vision_tower.vision_model.encoder.layers.16.layer_norm2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 1019904 }, { "name": "vision_tower.vision_model.encoder.layers.16.layer_norm2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 1021952 }, { "name": "vision_tower.vision_model.encoder.layers.16.mlp.fc1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 1024000 }, { "name": "vision_tower.vision_model.encoder.layers.16.mlp.fc1.q_weight", "shape": [ 104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 1032192 }, { "name": "vision_tower.vision_model.encoder.layers.16.mlp.fc1.q_scale", "shape": [ 26, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 212992, "byteOffset": 2736128 }, { "name": "vision_tower.vision_model.encoder.layers.16.mlp.fc2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 2949120 }, { "name": "vision_tower.vision_model.encoder.layers.16.mlp.fc2.q_weight", "shape": [ 412, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1687552, "byteOffset": 2951168 }, { "name": "vision_tower.vision_model.encoder.layers.16.mlp.fc2.q_scale", "shape": [ 103, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 210944, "byteOffset": 4638720 }, { "name": "vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 4849664 }, { "name": "vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 4851712 }, { "name": "vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 5277696 }, { "name": "vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 5330944 }, { "name": "vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 5332992 }, { "name": "vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 5758976 }, { "name": "vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 5812224 }, { "name": "vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 5814272 }, { "name": "vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 6240256 }, { "name": "vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 6293504 }, { "name": "vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 6295552 }, { "name": "vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 6721536 }, { "name": "vision_tower.vision_model.encoder.layers.17.layer_norm1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 6774784 }, { "name": "vision_tower.vision_model.encoder.layers.17.layer_norm1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 6776832 }, { "name": "vision_tower.vision_model.encoder.layers.17.layer_norm2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 6778880 }, { "name": "vision_tower.vision_model.encoder.layers.17.layer_norm2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 6780928 }, { "name": "vision_tower.vision_model.encoder.layers.17.mlp.fc1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 6782976 }, { "name": "vision_tower.vision_model.encoder.layers.17.mlp.fc1.q_weight", "shape": [ 104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 6791168 }, { "name": "vision_tower.vision_model.encoder.layers.17.mlp.fc1.q_scale", "shape": [ 26, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 212992, "byteOffset": 8495104 }, { "name": "vision_tower.vision_model.encoder.layers.17.mlp.fc2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 8708096 }, { "name": "vision_tower.vision_model.encoder.layers.17.mlp.fc2.q_weight", "shape": [ 412, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1687552, "byteOffset": 8710144 }, { "name": "vision_tower.vision_model.encoder.layers.17.mlp.fc2.q_scale", "shape": [ 103, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 210944, "byteOffset": 10397696 }, { "name": "vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 10608640 }, { "name": "vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 10610688 }, { "name": "vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 11036672 }, { "name": "vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 11089920 }, { "name": "vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 11091968 }, { "name": "vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 11517952 }, { "name": "vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 11571200 }, { "name": "vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 11573248 }, { "name": "vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 11999232 }, { "name": "vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 12052480 }, { "name": "vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 12054528 }, { "name": "vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 12480512 }, { "name": "vision_tower.vision_model.encoder.layers.18.layer_norm1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 12533760 }, { "name": "vision_tower.vision_model.encoder.layers.18.layer_norm1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 12535808 }, { "name": "vision_tower.vision_model.encoder.layers.18.layer_norm2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 12537856 }, { "name": "vision_tower.vision_model.encoder.layers.18.layer_norm2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 12539904 }, { "name": "vision_tower.vision_model.encoder.layers.18.mlp.fc1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 12541952 }, { "name": "vision_tower.vision_model.encoder.layers.18.mlp.fc1.q_weight", "shape": [ 104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 12550144 }, { "name": "vision_tower.vision_model.encoder.layers.18.mlp.fc1.q_scale", "shape": [ 26, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 212992, "byteOffset": 14254080 }, { "name": "vision_tower.vision_model.encoder.layers.18.mlp.fc2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 14467072 }, { "name": "vision_tower.vision_model.encoder.layers.18.mlp.fc2.q_weight", "shape": [ 412, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1687552, "byteOffset": 14469120 }, { "name": "vision_tower.vision_model.encoder.layers.18.mlp.fc2.q_scale", "shape": [ 103, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 210944, "byteOffset": 16156672 }, { "name": "vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16367616 }, { "name": "vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 16369664 }, { "name": "vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 16795648 }, { "name": "vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 16848896 }, { "name": "vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 16850944 }, { "name": "vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 17276928 }, { "name": "vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 17330176 }, { "name": "vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 17332224 }, { "name": "vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 17758208 }, { "name": "vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 17811456 }, { "name": "vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 17813504 }, { "name": "vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 18239488 }, { "name": "vision_tower.vision_model.encoder.layers.19.layer_norm1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 18292736 }, { "name": "vision_tower.vision_model.encoder.layers.19.layer_norm1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 18294784 }, { "name": "vision_tower.vision_model.encoder.layers.19.layer_norm2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 18296832 }, { "name": "vision_tower.vision_model.encoder.layers.19.layer_norm2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 18298880 }, { "name": "vision_tower.vision_model.encoder.layers.19.mlp.fc1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 18300928 }, { "name": "vision_tower.vision_model.encoder.layers.19.mlp.fc1.q_weight", "shape": [ 104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 18309120 }, { "name": "vision_tower.vision_model.encoder.layers.19.mlp.fc1.q_scale", "shape": [ 26, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 212992, "byteOffset": 20013056 }, { "name": "vision_tower.vision_model.encoder.layers.19.mlp.fc2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 20226048 }, { "name": "vision_tower.vision_model.encoder.layers.19.mlp.fc2.q_weight", "shape": [ 412, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1687552, "byteOffset": 20228096 }, { "name": "vision_tower.vision_model.encoder.layers.19.mlp.fc2.q_scale", "shape": [ 103, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 210944, "byteOffset": 21915648 }, { "name": "vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 22126592 }, { "name": "vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 22128640 }, { "name": "vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 22554624 }, { "name": "vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 22607872 }, { "name": "vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 22609920 }, { "name": "vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 23035904 }, { "name": "vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 23089152 }, { "name": "vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 23091200 }, { "name": "vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 23517184 }, { "name": "vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 23570432 }, { "name": "vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 23572480 }, { "name": "vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 23998464 }, { "name": "vision_tower.vision_model.encoder.layers.2.layer_norm1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 24051712 }, { "name": "vision_tower.vision_model.encoder.layers.2.layer_norm1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 24053760 }, { "name": "vision_tower.vision_model.encoder.layers.2.layer_norm2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 24055808 }, { "name": "vision_tower.vision_model.encoder.layers.2.layer_norm2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 24057856 }, { "name": "vision_tower.vision_model.encoder.layers.2.mlp.fc1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24059904 }, { "name": "vision_tower.vision_model.encoder.layers.2.mlp.fc1.q_weight", "shape": [ 104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 24068096 }, { "name": "vision_tower.vision_model.encoder.layers.2.mlp.fc1.q_scale", "shape": [ 26, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 212992, "byteOffset": 25772032 }, { "name": "vision_tower.vision_model.encoder.layers.2.mlp.fc2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25985024 }, { "name": "vision_tower.vision_model.encoder.layers.2.mlp.fc2.q_weight", "shape": [ 412, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1687552, "byteOffset": 25987072 }, { "name": "vision_tower.vision_model.encoder.layers.2.mlp.fc2.q_scale", "shape": [ 103, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 210944, "byteOffset": 27674624 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 27885568 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 27887616 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 28313600 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 28366848 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 28368896 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 28794880 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 28848128 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 28850176 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 29276160 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 29329408 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 29331456 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 29757440 }, { "name": "vision_tower.vision_model.encoder.layers.20.layer_norm1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 29810688 }, { "name": "vision_tower.vision_model.encoder.layers.20.layer_norm1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 29812736 }, { "name": "vision_tower.vision_model.encoder.layers.20.layer_norm2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 29814784 }, { "name": "vision_tower.vision_model.encoder.layers.20.layer_norm2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 29816832 }, { "name": "vision_tower.vision_model.encoder.layers.20.mlp.fc1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29818880 }, { "name": "vision_tower.vision_model.encoder.layers.20.mlp.fc1.q_weight", "shape": [ 104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 29827072 }, { "name": "vision_tower.vision_model.encoder.layers.20.mlp.fc1.q_scale", "shape": [ 26, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 212992, "byteOffset": 31531008 }, { "name": "vision_tower.vision_model.encoder.layers.20.mlp.fc2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 31744000 }, { "name": "vision_tower.vision_model.encoder.layers.20.mlp.fc2.q_weight", "shape": [ 412, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1687552, "byteOffset": 31746048 } ], "md5sum": "53e33b8ce94813a71243f8b69d8cf2dd" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 32866304, "records": [ { "name": "vision_tower.vision_model.encoder.layers.20.mlp.fc2.q_scale", "shape": [ 103, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 210944, "byteOffset": 0 }, { "name": "vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 210944 }, { "name": "vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 212992 }, { "name": "vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 638976 }, { "name": "vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 692224 }, { "name": "vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 694272 }, { "name": "vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 1120256 }, { "name": "vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 1173504 }, { "name": "vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 1175552 }, { "name": "vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 1601536 }, { "name": "vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 1654784 }, { "name": "vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 1656832 }, { "name": "vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 2082816 }, { "name": "vision_tower.vision_model.encoder.layers.21.layer_norm1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 2136064 }, { "name": "vision_tower.vision_model.encoder.layers.21.layer_norm1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 2138112 }, { "name": "vision_tower.vision_model.encoder.layers.21.layer_norm2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 2140160 }, { "name": "vision_tower.vision_model.encoder.layers.21.layer_norm2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 2142208 }, { "name": "vision_tower.vision_model.encoder.layers.21.mlp.fc1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 2144256 }, { "name": "vision_tower.vision_model.encoder.layers.21.mlp.fc1.q_weight", "shape": [ 104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 2152448 }, { "name": "vision_tower.vision_model.encoder.layers.21.mlp.fc1.q_scale", "shape": [ 26, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 212992, "byteOffset": 3856384 }, { "name": "vision_tower.vision_model.encoder.layers.21.mlp.fc2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 4069376 }, { "name": "vision_tower.vision_model.encoder.layers.21.mlp.fc2.q_weight", "shape": [ 412, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1687552, "byteOffset": 4071424 }, { "name": "vision_tower.vision_model.encoder.layers.21.mlp.fc2.q_scale", "shape": [ 103, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 210944, "byteOffset": 5758976 }, { "name": "vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 5969920 }, { "name": "vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 5971968 }, { "name": "vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 6397952 }, { "name": "vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 6451200 }, { "name": "vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 6453248 }, { "name": "vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 6879232 }, { "name": "vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 6932480 }, { "name": "vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 6934528 }, { "name": "vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 7360512 }, { "name": "vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 7413760 }, { "name": "vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 7415808 }, { "name": "vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 7841792 }, { "name": "vision_tower.vision_model.encoder.layers.22.layer_norm1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 7895040 }, { "name": "vision_tower.vision_model.encoder.layers.22.layer_norm1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 7897088 }, { "name": "vision_tower.vision_model.encoder.layers.22.layer_norm2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 7899136 }, { "name": "vision_tower.vision_model.encoder.layers.22.layer_norm2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 7901184 }, { "name": "vision_tower.vision_model.encoder.layers.22.mlp.fc1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7903232 }, { "name": "vision_tower.vision_model.encoder.layers.22.mlp.fc1.q_weight", "shape": [ 104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 7911424 }, { "name": "vision_tower.vision_model.encoder.layers.22.mlp.fc1.q_scale", "shape": [ 26, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 212992, "byteOffset": 9615360 }, { "name": "vision_tower.vision_model.encoder.layers.22.mlp.fc2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 9828352 }, { "name": "vision_tower.vision_model.encoder.layers.22.mlp.fc2.q_weight", "shape": [ 412, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1687552, "byteOffset": 9830400 }, { "name": "vision_tower.vision_model.encoder.layers.22.mlp.fc2.q_scale", "shape": [ 103, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 210944, "byteOffset": 11517952 }, { "name": "vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 11728896 }, { "name": "vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 11730944 }, { "name": "vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 12156928 }, { "name": "vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 12210176 }, { "name": "vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 12212224 }, { "name": "vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 12638208 }, { "name": "vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 12691456 }, { "name": "vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 12693504 }, { "name": "vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 13119488 }, { "name": "vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 13172736 }, { "name": "vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 13174784 }, { "name": "vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 13600768 }, { "name": "vision_tower.vision_model.encoder.layers.23.layer_norm1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 13654016 }, { "name": "vision_tower.vision_model.encoder.layers.23.layer_norm1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 13656064 }, { "name": "vision_tower.vision_model.encoder.layers.23.layer_norm2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 13658112 }, { "name": "vision_tower.vision_model.encoder.layers.23.layer_norm2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 13660160 }, { "name": "vision_tower.vision_model.encoder.layers.23.mlp.fc1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 13662208 }, { "name": "vision_tower.vision_model.encoder.layers.23.mlp.fc1.q_weight", "shape": [ 104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 13670400 }, { "name": "vision_tower.vision_model.encoder.layers.23.mlp.fc1.q_scale", "shape": [ 26, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 212992, "byteOffset": 15374336 }, { "name": "vision_tower.vision_model.encoder.layers.23.mlp.fc2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 15587328 }, { "name": "vision_tower.vision_model.encoder.layers.23.mlp.fc2.q_weight", "shape": [ 412, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1687552, "byteOffset": 15589376 }, { "name": "vision_tower.vision_model.encoder.layers.23.mlp.fc2.q_scale", "shape": [ 103, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 210944, "byteOffset": 17276928 }, { "name": "vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 17487872 }, { "name": "vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 17489920 }, { "name": "vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 17915904 }, { "name": "vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 17969152 }, { "name": "vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 17971200 }, { "name": "vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 18397184 }, { "name": "vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 18450432 }, { "name": "vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 18452480 }, { "name": "vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 18878464 }, { "name": "vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 18931712 }, { "name": "vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 18933760 }, { "name": "vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 19359744 }, { "name": "vision_tower.vision_model.encoder.layers.3.layer_norm1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 19412992 }, { "name": "vision_tower.vision_model.encoder.layers.3.layer_norm1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 19415040 }, { "name": "vision_tower.vision_model.encoder.layers.3.layer_norm2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 19417088 }, { "name": "vision_tower.vision_model.encoder.layers.3.layer_norm2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 19419136 }, { "name": "vision_tower.vision_model.encoder.layers.3.mlp.fc1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 19421184 }, { "name": "vision_tower.vision_model.encoder.layers.3.mlp.fc1.q_weight", "shape": [ 104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 19429376 }, { "name": "vision_tower.vision_model.encoder.layers.3.mlp.fc1.q_scale", "shape": [ 26, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 212992, "byteOffset": 21133312 }, { "name": "vision_tower.vision_model.encoder.layers.3.mlp.fc2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 21346304 }, { "name": "vision_tower.vision_model.encoder.layers.3.mlp.fc2.q_weight", "shape": [ 412, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1687552, "byteOffset": 21348352 }, { "name": "vision_tower.vision_model.encoder.layers.3.mlp.fc2.q_scale", "shape": [ 103, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 210944, "byteOffset": 23035904 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 23246848 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 23248896 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 23674880 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 23728128 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 23730176 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 24156160 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 24209408 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 24211456 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 24637440 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 24690688 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 24692736 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 25118720 }, { "name": "vision_tower.vision_model.encoder.layers.4.layer_norm1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25171968 }, { "name": "vision_tower.vision_model.encoder.layers.4.layer_norm1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25174016 }, { "name": "vision_tower.vision_model.encoder.layers.4.layer_norm2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25176064 }, { "name": "vision_tower.vision_model.encoder.layers.4.layer_norm2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25178112 }, { "name": "vision_tower.vision_model.encoder.layers.4.mlp.fc1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 25180160 }, { "name": "vision_tower.vision_model.encoder.layers.4.mlp.fc1.q_weight", "shape": [ 104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 25188352 }, { "name": "vision_tower.vision_model.encoder.layers.4.mlp.fc1.q_scale", "shape": [ 26, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 212992, "byteOffset": 26892288 }, { "name": "vision_tower.vision_model.encoder.layers.4.mlp.fc2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 27105280 }, { "name": "vision_tower.vision_model.encoder.layers.4.mlp.fc2.q_weight", "shape": [ 412, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1687552, "byteOffset": 27107328 }, { "name": "vision_tower.vision_model.encoder.layers.4.mlp.fc2.q_scale", "shape": [ 103, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 210944, "byteOffset": 28794880 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 29005824 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 29007872 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 29433856 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 29487104 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 29489152 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 29915136 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 29968384 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 29970432 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 30396416 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 30449664 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 30451712 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 30877696 }, { "name": "vision_tower.vision_model.encoder.layers.5.layer_norm1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 30930944 }, { "name": "vision_tower.vision_model.encoder.layers.5.layer_norm1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 30932992 }, { "name": "vision_tower.vision_model.encoder.layers.5.layer_norm2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 30935040 }, { "name": "vision_tower.vision_model.encoder.layers.5.layer_norm2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 30937088 }, { "name": "vision_tower.vision_model.encoder.layers.5.mlp.fc1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 30939136 }, { "name": "vision_tower.vision_model.encoder.layers.5.mlp.fc1.q_weight", "shape": [ 104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 30947328 }, { "name": "vision_tower.vision_model.encoder.layers.5.mlp.fc1.q_scale", "shape": [ 26, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 212992, "byteOffset": 32651264 }, { "name": "vision_tower.vision_model.encoder.layers.5.mlp.fc2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 32864256 } ], "md5sum": "43dcc3161c86ba5f6a30fb4b03899fa9" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 18087936, "records": [ { "name": "language_model.model.layers.10.mlp.down_proj.q_weight", "shape": [ 1104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 0 } ], "md5sum": "93b80910a5644378a5c4a1c642391d76" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "language_model.model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 412, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "8c3d846d21124af52575d10f9f37a46c" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 29136896, "records": [ { "name": "vision_tower.vision_model.encoder.layers.5.mlp.fc2.q_weight", "shape": [ 412, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1687552, "byteOffset": 0 }, { "name": "vision_tower.vision_model.encoder.layers.5.mlp.fc2.q_scale", "shape": [ 103, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 210944, "byteOffset": 1687552 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 1898496 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 1900544 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 2326528 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 2379776 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 2381824 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 2807808 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 2861056 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 2863104 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 3289088 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 3342336 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 3344384 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 3770368 }, { "name": "vision_tower.vision_model.encoder.layers.6.layer_norm1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 3823616 }, { "name": "vision_tower.vision_model.encoder.layers.6.layer_norm1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 3825664 }, { "name": "vision_tower.vision_model.encoder.layers.6.layer_norm2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 3827712 }, { "name": "vision_tower.vision_model.encoder.layers.6.layer_norm2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 3829760 }, { "name": "vision_tower.vision_model.encoder.layers.6.mlp.fc1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 3831808 }, { "name": "vision_tower.vision_model.encoder.layers.6.mlp.fc1.q_weight", "shape": [ 104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 3840000 }, { "name": "vision_tower.vision_model.encoder.layers.6.mlp.fc1.q_scale", "shape": [ 26, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 212992, "byteOffset": 5543936 }, { "name": "vision_tower.vision_model.encoder.layers.6.mlp.fc2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 5756928 }, { "name": "vision_tower.vision_model.encoder.layers.6.mlp.fc2.q_weight", "shape": [ 412, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1687552, "byteOffset": 5758976 }, { "name": "vision_tower.vision_model.encoder.layers.6.mlp.fc2.q_scale", "shape": [ 103, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 210944, "byteOffset": 7446528 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 7657472 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 7659520 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 8085504 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 8138752 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 8140800 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 8566784 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 8620032 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 8622080 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 9048064 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 9101312 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 9103360 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 9529344 }, { "name": "vision_tower.vision_model.encoder.layers.7.layer_norm1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 9582592 }, { "name": "vision_tower.vision_model.encoder.layers.7.layer_norm1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 9584640 }, { "name": "vision_tower.vision_model.encoder.layers.7.layer_norm2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 9586688 }, { "name": "vision_tower.vision_model.encoder.layers.7.layer_norm2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 9588736 }, { "name": "vision_tower.vision_model.encoder.layers.7.mlp.fc1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9590784 }, { "name": "vision_tower.vision_model.encoder.layers.7.mlp.fc1.q_weight", "shape": [ 104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 9598976 }, { "name": "vision_tower.vision_model.encoder.layers.7.mlp.fc1.q_scale", "shape": [ 26, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 212992, "byteOffset": 11302912 }, { "name": "vision_tower.vision_model.encoder.layers.7.mlp.fc2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 11515904 }, { "name": "vision_tower.vision_model.encoder.layers.7.mlp.fc2.q_weight", "shape": [ 412, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1687552, "byteOffset": 11517952 }, { "name": "vision_tower.vision_model.encoder.layers.7.mlp.fc2.q_scale", "shape": [ 103, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 210944, "byteOffset": 13205504 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 13416448 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 13418496 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 13844480 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 13897728 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 13899776 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 14325760 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 14379008 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 14381056 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 14807040 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 14860288 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 14862336 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 15288320 }, { "name": "vision_tower.vision_model.encoder.layers.8.layer_norm1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 15341568 }, { "name": "vision_tower.vision_model.encoder.layers.8.layer_norm1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 15343616 }, { "name": "vision_tower.vision_model.encoder.layers.8.layer_norm2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 15345664 }, { "name": "vision_tower.vision_model.encoder.layers.8.layer_norm2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 15347712 }, { "name": "vision_tower.vision_model.encoder.layers.8.mlp.fc1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 15349760 }, { "name": "vision_tower.vision_model.encoder.layers.8.mlp.fc1.q_weight", "shape": [ 104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 15357952 }, { "name": "vision_tower.vision_model.encoder.layers.8.mlp.fc1.q_scale", "shape": [ 26, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 212992, "byteOffset": 17061888 }, { "name": "vision_tower.vision_model.encoder.layers.8.mlp.fc2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 17274880 }, { "name": "vision_tower.vision_model.encoder.layers.8.mlp.fc2.q_weight", "shape": [ 412, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1687552, "byteOffset": 17276928 }, { "name": "vision_tower.vision_model.encoder.layers.8.mlp.fc2.q_scale", "shape": [ 103, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 210944, "byteOffset": 18964480 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 19175424 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 19177472 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 19603456 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 19656704 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 19658752 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 20084736 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 20137984 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 20140032 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 20566016 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 20619264 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 20621312 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 21047296 }, { "name": "vision_tower.vision_model.encoder.layers.9.layer_norm1.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 21100544 }, { "name": "vision_tower.vision_model.encoder.layers.9.layer_norm1.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 21102592 }, { "name": "vision_tower.vision_model.encoder.layers.9.layer_norm2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 21104640 }, { "name": "vision_tower.vision_model.encoder.layers.9.layer_norm2.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 21106688 }, { "name": "vision_tower.vision_model.encoder.layers.9.mlp.fc1.bias", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21108736 }, { "name": "vision_tower.vision_model.encoder.layers.9.mlp.fc1.q_weight", "shape": [ 104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1703936, "byteOffset": 21116928 }, { "name": "vision_tower.vision_model.encoder.layers.9.mlp.fc1.q_scale", "shape": [ 26, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 212992, "byteOffset": 22820864 }, { "name": "vision_tower.vision_model.encoder.layers.9.mlp.fc2.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 23033856 }, { "name": "vision_tower.vision_model.encoder.layers.9.mlp.fc2.q_weight", "shape": [ 412, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1687552, "byteOffset": 23035904 }, { "name": "vision_tower.vision_model.encoder.layers.9.mlp.fc2.q_scale", "shape": [ 103, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 210944, "byteOffset": 24723456 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 24934400 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 24936448 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 25362432 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25415680 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 25417728 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 25843712 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 25896960 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 25899008 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 26324992 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 26378240 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.q_weight", "shape": [ 104, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 425984, "byteOffset": 26380288 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.q_scale", "shape": [ 26, 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 53248, "byteOffset": 26806272 }, { "name": "vision_tower.vision_model.post_layernorm.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 26859520 }, { "name": "vision_tower.vision_model.post_layernorm.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 26861568 }, { "name": "vision_tower.vision_model.pre_layrnorm.bias", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 26863616 }, { "name": "vision_tower.vision_model.pre_layrnorm.weight", "shape": [ 1024 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2048, "byteOffset": 26865664 }, { "name": "language_model.model.layers.10.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26867712 }, { "name": "language_model.model.layers.10.mlp.down_proj.q_scale", "shape": [ 276, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 26875904 } ], "md5sum": "c8ccfa854d6ea75398d4ca8bf0aac086" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "language_model.model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 412, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "3b6239fe4d35089bced0fce93db7a03f" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 32494592, "records": [ { "name": "language_model.model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 103, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 0 }, { "name": "language_model.model.layers.10.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4535296 }, { "name": "language_model.model.layers.10.self_attn.o_proj.q_weight", "shape": [ 412, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 4543488 }, { "name": "language_model.model.layers.10.self_attn.o_proj.q_scale", "shape": [ 103, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 11293696 }, { "name": "language_model.model.layers.11.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 12137472 }, { "name": "language_model.model.layers.11.mlp.down_proj.q_weight", "shape": [ 1104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 12145664 }, { "name": "language_model.model.layers.11.mlp.down_proj.q_scale", "shape": [ 276, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 30233600 } ], "md5sum": "5b68d7b09a90d358ce928900f4de3f29" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 27325440, "records": [ { "name": "language_model.model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 103, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 0 }, { "name": "language_model.model.layers.11.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4535296 }, { "name": "language_model.model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 412, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 4543488 }, { "name": "language_model.model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 103, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 24794112 } ], "md5sum": "07be3d858d417182be8b1c2f53a26017" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "language_model.model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 412, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "30d89472750265fdc6fac7cd84510bdb" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 20250624, "records": [ { "name": "language_model.model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 412, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 0 } ], "md5sum": "7c9af48ee21e8e0992807c2fbd12802b" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 32494592, "records": [ { "name": "language_model.model.layers.11.self_attn.o_proj.q_weight", "shape": [ 412, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 0 }, { "name": "language_model.model.layers.11.self_attn.o_proj.q_scale", "shape": [ 103, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 6750208 }, { "name": "language_model.model.layers.12.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7593984 }, { "name": "language_model.model.layers.12.mlp.down_proj.q_weight", "shape": [ 1104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 7602176 }, { "name": "language_model.model.layers.12.mlp.down_proj.q_scale", "shape": [ 276, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 25690112 }, { "name": "language_model.model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 103, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27951104 }, { "name": "language_model.model.layers.12.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32486400 } ], "md5sum": "fd69db0b55c405757159b3a34176b1ab" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "language_model.model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 412, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "83f3d095dc4dfd5ff8db9cc25eca564f" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 30482432, "records": [ { "name": "language_model.model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 103, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 0 }, { "name": "language_model.model.layers.12.self_attn.o_proj.q_weight", "shape": [ 412, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 2531328 }, { "name": "language_model.model.layers.12.self_attn.o_proj.q_scale", "shape": [ 103, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 9281536 }, { "name": "language_model.model.layers.13.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10125312 }, { "name": "language_model.model.layers.13.mlp.down_proj.q_weight", "shape": [ 1104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 10133504 }, { "name": "language_model.model.layers.13.mlp.down_proj.q_scale", "shape": [ 276, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 28221440 } ], "md5sum": "2004f04976a8976eacbde779b37a177c" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 27325440, "records": [ { "name": "language_model.model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 103, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 0 }, { "name": "language_model.model.layers.13.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4535296 }, { "name": "language_model.model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 412, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 4543488 }, { "name": "language_model.model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 103, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 24794112 } ], "md5sum": "4005b0e0c30e995c3fb9982eff4bbdaa" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "language_model.model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 412, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "c64943d4d80f4b557837c469ca56a2ed" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 20250624, "records": [ { "name": "language_model.model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 412, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 0 } ], "md5sum": "8f8e54e2d836bd2a0448290856405a37" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 32494592, "records": [ { "name": "language_model.model.layers.13.self_attn.o_proj.q_weight", "shape": [ 412, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 0 }, { "name": "language_model.model.layers.13.self_attn.o_proj.q_scale", "shape": [ 103, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 6750208 }, { "name": "language_model.model.layers.14.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7593984 }, { "name": "language_model.model.layers.14.mlp.down_proj.q_weight", "shape": [ 1104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 7602176 }, { "name": "language_model.model.layers.14.mlp.down_proj.q_scale", "shape": [ 276, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 25690112 }, { "name": "language_model.model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 103, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27951104 }, { "name": "language_model.model.layers.14.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32486400 } ], "md5sum": "46a5380e228265a13857b5ccfbf30a1c" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "language_model.model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 412, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "be87bd7461e936a3fc47a5e5f76979fa" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 30482432, "records": [ { "name": "language_model.model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 103, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 0 }, { "name": "language_model.model.layers.14.self_attn.o_proj.q_weight", "shape": [ 412, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 2531328 }, { "name": "language_model.model.layers.14.self_attn.o_proj.q_scale", "shape": [ 103, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 9281536 }, { "name": "language_model.model.layers.15.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10125312 }, { "name": "language_model.model.layers.15.mlp.down_proj.q_weight", "shape": [ 1104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 10133504 }, { "name": "language_model.model.layers.15.mlp.down_proj.q_scale", "shape": [ 276, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 28221440 } ], "md5sum": "156420e5aee0aacebbed948b7d749ebd" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 27325440, "records": [ { "name": "language_model.model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 103, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 0 }, { "name": "language_model.model.layers.15.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4535296 }, { "name": "language_model.model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 412, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 4543488 }, { "name": "language_model.model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 103, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 24794112 } ], "md5sum": "7ae96aa7971ee29fbaaa7495766366a0" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "language_model.model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 412, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "1aa17369d3f9a3f769bea889b4453be7" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 20250624, "records": [ { "name": "language_model.model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 412, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 0 } ], "md5sum": "89eb5129c0547c741e67c230f7dfb4c5" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 32494592, "records": [ { "name": "language_model.model.layers.15.self_attn.o_proj.q_weight", "shape": [ 412, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 0 }, { "name": "language_model.model.layers.15.self_attn.o_proj.q_scale", "shape": [ 103, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 6750208 }, { "name": "language_model.model.layers.16.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7593984 }, { "name": "language_model.model.layers.16.mlp.down_proj.q_weight", "shape": [ 1104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 7602176 }, { "name": "language_model.model.layers.16.mlp.down_proj.q_scale", "shape": [ 276, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 25690112 }, { "name": "language_model.model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 103, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27951104 }, { "name": "language_model.model.layers.16.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32486400 } ], "md5sum": "fb13ebf34c4b38b92efefb47b953eb8d" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "language_model.model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 412, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "8cd8c4bc7a88ee50c678bb7bce8f54d7" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 30482432, "records": [ { "name": "language_model.model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 103, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 0 }, { "name": "language_model.model.layers.16.self_attn.o_proj.q_weight", "shape": [ 412, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 2531328 }, { "name": "language_model.model.layers.16.self_attn.o_proj.q_scale", "shape": [ 103, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 9281536 }, { "name": "language_model.model.layers.17.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10125312 }, { "name": "language_model.model.layers.17.mlp.down_proj.q_weight", "shape": [ 1104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 10133504 }, { "name": "language_model.model.layers.17.mlp.down_proj.q_scale", "shape": [ 276, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 28221440 } ], "md5sum": "a85501baac065e4353ac45f88b287319" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 27325440, "records": [ { "name": "language_model.model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 103, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 0 }, { "name": "language_model.model.layers.17.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4535296 }, { "name": "language_model.model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 412, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 4543488 }, { "name": "language_model.model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 103, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 24794112 } ], "md5sum": "7314de7fbbd019a7550d5baf4931ad38" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "language_model.model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 412, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "6178e3a176f4c88d0c86face41791dbc" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 20250624, "records": [ { "name": "language_model.model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 412, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 0 } ], "md5sum": "c8c096b4d2d13926abc6eb973c7eeb7d" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 32494592, "records": [ { "name": "language_model.model.layers.17.self_attn.o_proj.q_weight", "shape": [ 412, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 0 }, { "name": "language_model.model.layers.17.self_attn.o_proj.q_scale", "shape": [ 103, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 6750208 }, { "name": "language_model.model.layers.18.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7593984 }, { "name": "language_model.model.layers.18.mlp.down_proj.q_weight", "shape": [ 1104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 7602176 }, { "name": "language_model.model.layers.18.mlp.down_proj.q_scale", "shape": [ 276, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 25690112 }, { "name": "language_model.model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 103, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27951104 }, { "name": "language_model.model.layers.18.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32486400 } ], "md5sum": "727c22bf4b66e76e4ee80fc9c98a0ddd" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "language_model.model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 412, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "d745ad4d07825046c605294f38668d87" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 30482432, "records": [ { "name": "language_model.model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 103, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 0 }, { "name": "language_model.model.layers.18.self_attn.o_proj.q_weight", "shape": [ 412, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 2531328 }, { "name": "language_model.model.layers.18.self_attn.o_proj.q_scale", "shape": [ 103, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 9281536 }, { "name": "language_model.model.layers.19.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10125312 }, { "name": "language_model.model.layers.19.mlp.down_proj.q_weight", "shape": [ 1104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 10133504 }, { "name": "language_model.model.layers.19.mlp.down_proj.q_scale", "shape": [ 276, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 28221440 } ], "md5sum": "338142d936f4d748536e94976ed8a7e7" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 27325440, "records": [ { "name": "language_model.model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 103, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 0 }, { "name": "language_model.model.layers.19.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4535296 }, { "name": "language_model.model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 412, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 4543488 }, { "name": "language_model.model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 103, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 24794112 } ], "md5sum": "6eaef05417cba9e345f7c5b0c5854659" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "language_model.model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 412, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "12fc97d4e5afaf64e5efb723d331716a" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 20250624, "records": [ { "name": "language_model.model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 412, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 0 } ], "md5sum": "ba7fa834998d132671a9001b613a2091" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 32494592, "records": [ { "name": "language_model.model.layers.19.self_attn.o_proj.q_weight", "shape": [ 412, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 0 }, { "name": "language_model.model.layers.19.self_attn.o_proj.q_scale", "shape": [ 103, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 6750208 }, { "name": "language_model.model.layers.20.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 7593984 }, { "name": "language_model.model.layers.20.mlp.down_proj.q_weight", "shape": [ 1104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 7602176 }, { "name": "language_model.model.layers.20.mlp.down_proj.q_scale", "shape": [ 276, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 25690112 }, { "name": "language_model.model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 103, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 27951104 }, { "name": "language_model.model.layers.20.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32486400 } ], "md5sum": "3691f59d80f37058bb862e7e15d14719" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 36282368, "records": [ { "name": "language_model.model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 412, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36282368, "byteOffset": 0 } ], "md5sum": "b598acfe0991a01944acc34137b95ad3" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 30482432, "records": [ { "name": "language_model.model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 103, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 0 }, { "name": "language_model.model.layers.20.self_attn.o_proj.q_weight", "shape": [ 412, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 2531328 }, { "name": "language_model.model.layers.20.self_attn.o_proj.q_scale", "shape": [ 103, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 9281536 }, { "name": "language_model.model.layers.21.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10125312 }, { "name": "language_model.model.layers.21.mlp.down_proj.q_weight", "shape": [ 1104, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18087936, "byteOffset": 10133504 }, { "name": "language_model.model.layers.21.mlp.down_proj.q_scale", "shape": [ 276, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2260992, "byteOffset": 28221440 } ], "md5sum": "79be1d28e9db9b48460bc3a3076a2fdb" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 27325440, "records": [ { "name": "language_model.model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 103, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4535296, "byteOffset": 0 }, { "name": "language_model.model.layers.21.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 4535296 }, { "name": "language_model.model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 412, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 4543488 }, { "name": "language_model.model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 103, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 24794112 } ], "md5sum": "a81ce6aa50311d42bf57d95170923228" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 30375936, "records": [ { "name": "language_model.model.layers.21.self_attn.o_proj.q_weight", "shape": [ 412, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 0 }, { "name": "language_model.model.layers.21.self_attn.o_proj.q_scale", "shape": [ 103, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 6750208 }, { "name": "language_model.model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 412, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 20250624, "byteOffset": 7593984 }, { "name": "language_model.model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 103, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2531328, "byteOffset": 27844608 } ], "md5sum": "59ebfc3b18644ba012aae40f6d5f53d4" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 7593984, "records": [ { "name": "language_model.model.layers.22.self_attn.o_proj.q_weight", "shape": [ 412, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6750208, "byteOffset": 0 }, { "name": "language_model.model.layers.22.self_attn.o_proj.q_scale", "shape": [ 103, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 843776, "byteOffset": 6750208 } ], "md5sum": "bc5e78e345e0799017528ae8ad6b0281" } ] }