diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,8514 @@ +{ + "metadata": { + "ParamSize": 724, + "ParamBytes": 2769074176.0, + "BitsPerParam": 5.4723152611665915 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 49250304, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 32064, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 49250304, + "byteOffset": 0 + } + ], + "md5sum": "d462bf6681cfdf3a155a33ec34995397" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.h.18.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "44d1346cb8ecbad2a0225b6f8e6b6601" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 23470080, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 32064, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6156288, + "byteOffset": 0 + }, + { + "name": "model.h.18.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 6156288 + }, + { + "name": "model.h.18.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 6162432 + }, + { + "name": "model.h.18.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 18745344 + }, + { + "name": "model.h.18.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 20318208 + }, + { + "name": "model.h.18.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 23463936 + } + ], + "md5sum": "d58b2bfe1a2018f50a2295a25b598203" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.h.19.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "23f15d529480da9a0c42195b3916270f" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "model.h.18.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "model.h.18.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "model.h.19.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "model.h.19.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "model.h.19.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "model.h.19.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "model.h.19.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "f1e852dd01f75d353a308f918d7e042b" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "model.h.19.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "model.h.19.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "model.h.19.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "model.h.19.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "model.h.20.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "d1e46423910f8ff09fa2dea997c9df8a" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.h.20.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "019f500de4760aacd3ee30f87cf92f36" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "model.h.20.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.h.20.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.h.20.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "model.h.20.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "model.h.20.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "model.h.20.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "0487a6f5169a8ec8731946ac438aca6f" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.h.21.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "4bd7aa0e240a5d3fe5cc4f04f0019851" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "model.h.20.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "model.h.20.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "model.h.21.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "model.h.21.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "model.h.21.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "model.h.21.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "model.h.21.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "2d48bf11c0cfb3f138ef4e44e0bc8205" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "model.h.21.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "model.h.21.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "model.h.21.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "model.h.21.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "model.h.22.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "18c37a099b97287fa32ef12c06226d92" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.h.22.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "6f715301d4fdbd2ed658ba3a86320f80" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "model.h.22.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.h.22.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.h.22.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "model.h.22.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "model.h.22.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "model.h.22.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "f010e04f362de8344df9efd68dff17c5" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.h.23.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "f280ebae4a4dc4feaac07b649772118c" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "model.h.22.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "model.h.22.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "model.h.23.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "model.h.23.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "model.h.23.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "model.h.23.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "model.h.23.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "d8dc46dad50b01ad80d76e32e5c4e872" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "model.h.23.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "model.h.23.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "model.h.23.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "model.h.23.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "model.h.24.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "bc99209d84c0f0887b53d173d95639dc" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.h.24.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "f988ea3fd6c4ef0446d41f6489f8137d" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "model.h.24.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.h.24.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.h.24.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "model.h.24.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "model.h.24.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "model.h.24.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "a90dc6c7a0cd0ab22ac28efa49a37437" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.h.25.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "0367200ee6d724634eee64500b76e9f4" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "model.h.24.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "model.h.24.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "model.h.25.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "model.h.25.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "model.h.25.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "model.h.25.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "model.h.25.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "e4e0d58bac3892d7466dc7a57d15b527" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "model.h.25.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "model.h.25.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "model.h.25.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "model.h.25.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "model.h.26.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "137ce81a2aef53ce819f2ddcba2662a9" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.h.26.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "57f52a6294ba07ccfd40862302abfeb0" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "model.h.26.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.h.26.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.h.26.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "model.h.26.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "model.h.26.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "model.h.26.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "fb637e3862f096624612a89e5d550586" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.h.27.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "c4a2128471bb8163d1155d4428a3e096" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "model.h.26.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "model.h.26.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "model.h.27.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "model.h.27.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "model.h.27.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "model.h.27.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "model.h.27.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "8f46c0f65098b2f7150326e2032cad8e" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "model.h.27.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "model.h.27.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "model.h.27.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "model.h.27.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "model.h.28.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "75acd30a2b79d831f7db532a8999a1f3" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.h.28.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "8d06da6215b2c8aa873f4c228389f389" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "model.h.28.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.h.28.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.h.28.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "model.h.28.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "model.h.28.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "model.h.28.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "4f7f06d0c1fc498bcd4128e6a62b01ba" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.h.29.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "dd4a9fc77672f3d4c5b29e3167fd4881" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "model.h.28.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "model.h.28.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "model.h.29.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "model.h.29.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "model.h.29.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "model.h.29.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "model.h.29.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "86a3a85df8b0d7a1237d036a46ad6a8e" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "model.h.29.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "model.h.29.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "model.h.29.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "model.h.29.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "model.h.30.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "3f10adaed93c9b7fb821bc8b1f35659e" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.h.30.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "6ddec1d838994a89efa5c5534a00f63a" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "model.h.30.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.h.30.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.h.30.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "model.h.30.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "model.h.30.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "model.h.30.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "196b53103b396b64b908064f9984b4b7" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.h.31.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "f913ba05408e86b44bc31f5a275e9cb1" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "model.h.30.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "model.h.30.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "model.h.31.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "model.h.31.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "model.h.31.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "model.h.31.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "model.h.31.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "0ed9f2a3baa6009111345e9fd08c0fe8" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 49250304, + "records": [ + { + "name": "model.embd.q_weight", + "shape": [ + 32064, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 49250304, + "byteOffset": 0 + } + ], + "md5sum": "ba96aa7c1453daa5716bf285fc9e0332" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 27402240, + "records": [ + { + "name": "model.h.31.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "model.h.31.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "model.h.31.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "model.h.31.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "model.norm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21233664 + }, + { + "name": "model.embd.q_scale", + "shape": [ + 32064, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6156288, + "byteOffset": 21239808 + }, + { + "name": "model.h.0.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 27396096 + } + ], + "md5sum": "956c42c6309a4a9e50ac3273a1dfe096" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.h.0.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "70ec8bdbd4f6edee811b13e8382e6d31" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "model.h.0.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.h.0.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.h.0.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "model.h.0.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "model.h.0.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "model.h.0.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "30a7c74e2dabfffe2c79084684693ff9" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.h.1.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "19808d9fb46a8dbe89f96ef7fcbde15b" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "model.h.0.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "model.h.0.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "model.h.1.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "model.h.1.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "model.h.1.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "model.h.1.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "model.h.1.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "e9ce9615c09f57448e2ce79382a96f14" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "model.h.1.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "model.h.1.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "model.h.1.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "model.h.1.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "model.h.10.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "fc45420316ea727605be5d7e0ed8e9da" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.h.10.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "7b52744731cd22ca6bf33ee27302e1e3" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "model.h.10.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.h.10.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.h.10.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "model.h.10.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "model.h.10.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "model.h.10.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "7016666073af181d421de47db3cd2df4" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.h.11.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "da8ffafadefa5bee6e92402a262c1591" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "model.h.10.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "model.h.10.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "model.h.11.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "model.h.11.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "model.h.11.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "model.h.11.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "model.h.11.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "579ea893625576de7ec6c925c8e472a5" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "model.h.11.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "model.h.11.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "model.h.11.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "model.h.11.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "model.h.12.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "3e3a519e78b6d214c6935bf47d9fd6da" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.h.12.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "01263e0f52d5530fd5dcce9d6e558faf" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "model.h.12.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.h.12.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.h.12.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "model.h.12.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "model.h.12.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "model.h.12.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "df492083954171dd0f29c8347415ad4c" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.h.13.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "26e9da272c07b9e3164ee39204001b1a" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "model.h.12.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "model.h.12.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "model.h.13.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "model.h.13.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "model.h.13.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "model.h.13.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "model.h.13.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "515c4b3a001119843a14b1ebb186f53e" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "model.h.13.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "model.h.13.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "model.h.13.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "model.h.13.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "model.h.14.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "e852ade5c26bd91ab0fa82467a3777c6" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.h.14.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "0adac2bada661247eab94698103f825d" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "model.h.14.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.h.14.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.h.14.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "model.h.14.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "model.h.14.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "model.h.14.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "4a35f0ddaaab6863a2c5a3e4a407a249" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.h.15.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "9c9045251d6bd64178aee3423ba02d53" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "model.h.14.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "model.h.14.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "model.h.15.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "model.h.15.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "model.h.15.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "model.h.15.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "model.h.15.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "678735984da6a7c65a985ea9bcdd9e1d" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "model.h.15.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "model.h.15.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "model.h.15.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "model.h.15.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "model.h.16.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "6faf93f32ebae44e2316776e943b73d5" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.h.16.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "7c906b3f2d0a23b4b404f384ada6c863" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "model.h.16.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.h.16.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.h.16.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "model.h.16.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "model.h.16.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "model.h.16.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "d5a7ef52b511ca5c6607c93718976580" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.h.17.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "7835aa22e5fc3ab43904a7ba317dd05e" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "model.h.16.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "model.h.16.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "model.h.17.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "model.h.17.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "model.h.17.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "model.h.17.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "model.h.17.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "97c2f05f64c9839ba60e57e830c919e6" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 26548224, + "records": [ + { + "name": "model.h.17.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "model.h.17.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "model.h.17.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "model.h.17.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "model.h.18.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 21233664 + }, + { + "name": "model.h.18.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 25952256 + }, + { + "name": "model.h.2.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 26542080 + } + ], + "md5sum": "8f2cc14e740a795d3945a92bca16366a" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.h.2.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "56f2f22839c6b77063e3a90f30122137" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "model.h.2.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.h.2.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.h.2.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "model.h.2.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "model.h.2.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "model.h.2.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "f69a2221a90921e660620770b711fd17" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.h.3.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "c6e8789cf22bfb7a983b1834e983dd6e" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "model.h.2.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "model.h.2.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "model.h.3.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "model.h.3.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "model.h.3.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "model.h.3.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "model.h.3.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "0273ecef5ee3e36de92d20e60a6091a0" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "model.h.3.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "model.h.3.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "model.h.3.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "model.h.3.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "model.h.4.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "5026939da604c97430a95d2026bb282c" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.h.4.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "6c825a7113de38b7e100f9e7f45fc976" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "model.h.4.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.h.4.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.h.4.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "model.h.4.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "model.h.4.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "model.h.4.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "e8545eb30fe53e7a9809bcbc52e5edc7" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.h.5.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "1bb546cc943cea45bd5a83dca1919c16" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "model.h.4.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "model.h.4.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "model.h.5.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "model.h.5.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "model.h.5.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "model.h.5.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "model.h.5.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "3c5add49f666f50d75e351c3758271f1" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "model.h.5.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "model.h.5.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "model.h.5.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "model.h.5.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "model.h.6.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "b9817824fd05540c9968991b4973535f" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.h.6.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "2cbf2b759939a9c467cdcd7a35ba6ce7" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "model.h.6.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.h.6.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.h.6.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "model.h.6.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "model.h.6.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "model.h.6.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "3f972e64e6d289c178af439b37093a27" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.h.7.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "a2f722107d2de12aa1644399cd04810a" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "model.h.6.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "model.h.6.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "model.h.7.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "model.h.7.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "model.h.7.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "model.h.7.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "model.h.7.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "c6eaaee1e69bfad8b25eaac488987947" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 21239808, + "records": [ + { + "name": "model.h.7.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "model.h.7.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "model.h.7.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "model.h.7.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "model.h.8.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21233664 + } + ], + "md5sum": "83155bcc2223493f6ca1afce08a7e233" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.h.8.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "aa3e3bbca10469c352e931114e4352e5" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 22616064, + "records": [ + { + "name": "model.h.8.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.h.8.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.h.8.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 14155776 + }, + { + "name": "model.h.8.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17301504 + }, + { + "name": "model.h.8.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 17307648 + }, + { + "name": "model.h.8.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 22026240 + } + ], + "md5sum": "11036296e1ab0f3fc2aad53eea717d1d" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.h.9.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "698469bf0d9056a98e7b4b95100337ce" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 33239040, + "records": [ + { + "name": "model.h.8.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 0 + }, + { + "name": "model.h.8.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 14155776 + }, + { + "name": "model.h.9.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15925248 + }, + { + "name": "model.h.9.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 15931392 + }, + { + "name": "model.h.9.mlp.down_proj.q_scale", + "shape": [ + 3072, + 256 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 28514304 + }, + { + "name": "model.h.9.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 30087168 + }, + { + "name": "model.h.9.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33232896 + } + ], + "md5sum": "72d7706eb3e77d679c1a21db987444f9" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 32036864, + "records": [ + { + "name": "model.h.9.mixer.out_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 0 + }, + { + "name": "model.h.9.mixer.out_proj.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 4718592 + }, + { + "name": "model.h.9.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 14155776, + "byteOffset": 5308416 + }, + { + "name": "model.h.9.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1769472, + "byteOffset": 19464192 + }, + { + "name": "vision_embed_tokens.glb_GN", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21233664 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.embeddings.class_embedding", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 21241856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.embeddings.patch_embedding.weight", + "shape": [ + 1024, + 3, + 14, + 14 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1204224, + "byteOffset": 21243904 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.embeddings.position_embedding.weight", + "shape": [ + 577, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1181696, + "byteOffset": 22448128 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.0.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 23629824 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.0.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 23631872 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.0.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 23633920 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.0.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 23635968 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.0.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 23638016 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.0.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 23646208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.0.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 32034816 + } + ], + "md5sum": "06eb2520ad6b2dfb6527b035312460c8" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.0.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.0.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.0.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.0.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.0.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.0.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.0.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.0.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.0.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.1.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.1.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.1.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.1.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.1.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.1.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.1.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "12ccc3786467895471d31bb5b1d2610c" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.1.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.1.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.1.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.1.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.1.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.1.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.1.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.1.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.1.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.10.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.10.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.10.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.10.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.10.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.10.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.10.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "dcc2adf61aa07fe189bb15874110e845" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.10.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.10.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.10.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.10.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.10.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.10.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.10.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.10.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.10.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.11.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.11.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.11.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.11.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.11.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.11.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.11.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "9f8d49d8894a05f4179ba707925e9d59" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.11.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.11.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.11.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.11.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.11.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.11.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.11.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.11.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.11.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.12.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.12.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.12.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.12.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.12.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.12.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.12.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "19b6f5d2cfc839108f9156b8b521d3ce" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.12.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.12.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.12.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.12.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.12.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.12.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.12.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.12.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.12.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.13.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.13.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.13.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.13.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.13.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.13.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.13.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "3e6322c0673873144d67964c709fa3be" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.13.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.13.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.13.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.13.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.13.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.13.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.13.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.13.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.13.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.14.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.14.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.14.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.14.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.14.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.14.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.14.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "f04370015a40ae2073f5d89ad86f1c42" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.14.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.14.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.14.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.14.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.14.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.14.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.14.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.14.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.14.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.15.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.15.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.15.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.15.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.15.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.15.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.15.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "e2c99d455ee851ede30d282481166453" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.15.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.15.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.15.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.15.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.15.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.15.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.15.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.15.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.15.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.16.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.16.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.16.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.16.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.16.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.16.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.16.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "11c75f74757caccc61b9f6c3f39a9737" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.16.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.16.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.16.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.16.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.16.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.16.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.16.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.16.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.16.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.17.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.17.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.17.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.17.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.17.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.17.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.17.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "a7f57fd25793b85aa6027a514efc39fb" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.17.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.17.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.17.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.17.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.17.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.17.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.17.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.17.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.17.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.18.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.18.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.18.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.18.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.18.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.18.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.18.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "e13228942bc433e638fe3903001715f3" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.18.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.18.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.18.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.18.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.18.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.18.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.18.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.18.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.18.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.19.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.19.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.19.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.19.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.19.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.19.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.19.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "eab0a6fc1a1e3beac991a0be01fdefe8" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.19.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.19.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.19.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.19.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.19.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.19.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.19.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.19.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.19.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.2.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.2.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.2.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.2.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.2.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.2.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.2.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "afcfd0263f305fcc63e7b86393753bf7" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.2.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.2.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.2.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.2.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.2.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.2.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.2.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.2.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.2.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.20.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.20.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.20.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.20.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.20.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.20.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.20.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "460a8bc76edf812f85886ec9f950f395" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.20.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.20.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.20.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.20.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.20.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.20.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.20.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.20.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.20.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.21.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.21.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.21.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.21.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.21.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.21.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.21.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "bcac984764bba0a4eb9891d2043e1d89" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.21.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.21.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.21.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.21.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.21.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.21.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.21.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.21.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.21.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.22.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.22.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.22.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.22.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.22.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.22.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.22.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "3197e582d745d84c2ab4347a1f2d4e2d" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.22.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.22.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.22.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.22.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.22.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.22.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.22.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.22.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.22.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.23.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.23.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.23.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.23.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.23.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.23.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.23.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "e6d67ae9f4dab5ea49fac79bceac7a86" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.23.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.23.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.23.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.23.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.23.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.23.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.23.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.23.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.23.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.3.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.3.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.3.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.3.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.3.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.3.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.3.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "1a74510bf14d6fc06b53f09ee6a22fdd" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.3.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.3.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.3.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.3.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.3.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.3.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.3.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.3.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.3.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.4.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.4.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.4.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.4.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.4.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.4.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.4.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "75a9ee23e852e5562e2176a5ece04f24" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.4.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.4.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.4.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.4.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.4.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.4.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.4.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.4.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.4.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.5.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.5.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.5.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.5.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.5.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.5.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.5.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "da3f4d64dad38aa57a47fd6fbca1a334" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.5.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.5.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.5.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.5.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.5.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.5.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.5.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.5.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.5.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.6.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.6.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.6.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.6.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.6.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.6.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.6.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "ca1eb7481171d37cc9fab4b3432d81d1" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.6.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.6.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.6.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.6.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.6.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.6.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.6.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.6.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.6.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.7.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.7.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.7.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.7.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.7.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.7.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.7.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "03be4317a6855a15625866d5dde56763" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.7.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.7.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.7.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.7.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.7.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.7.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.7.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.7.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.7.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.8.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.8.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.8.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.8.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.8.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.8.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.8.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "7464cccd529310def6e3920c637ad62c" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.8.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.8.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.8.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.8.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.8.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.8.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.8.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.8.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.8.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.9.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.9.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.9.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.9.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.9.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.9.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.9.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "8be48481c922751c9b723a99e5dcd727" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 29200384, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.9.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.9.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.9.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.9.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.9.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.9.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.9.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.9.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.9.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.post_layernorm.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.post_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.pre_layrnorm.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.pre_layrnorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_projection.linear_1.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_projection.linear_1.q_weight", + "shape": [ + 3072, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6291456, + "byteOffset": 16799744 + }, + { + "name": "vision_embed_tokens.img_projection.linear_1.q_scale", + "shape": [ + 3072, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 23091200 + }, + { + "name": "vision_embed_tokens.img_projection.linear_2.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 23877632 + }, + { + "name": "vision_embed_tokens.img_projection.linear_2.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 23883776 + }, + { + "name": "vision_embed_tokens.img_projection.linear_2.q_scale", + "shape": [ + 3072, + 96 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 589824, + "byteOffset": 28602368 + }, + { + "name": "vision_embed_tokens.sub_GN", + "shape": [ + 1, + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29192192 + } + ], + "md5sum": "927d59a5c381146b42f03e5e1f514411" + } + ] +} \ No newline at end of file