diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,8386 @@ +{ + "metadata": { + "ParamSize": 724, + "ParamBytes": 2340679936.0, + "BitsPerParam": 4.625711599312261 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 39502848, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 32064, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39502848, + "byteOffset": 0 + } + ], + "md5sum": "d7e1c53f4ed3388ab3651a4ac7fe5c3b" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 20185088, + "records": [ + { + "name": "model.h.18.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20185088, + "byteOffset": 0 + } + ], + "md5sum": "bbe371e4c6556993e74003d072d5a519" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 31588480, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 32064, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4937856, + "byteOffset": 0 + }, + { + "name": "model.h.18.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 4937856 + }, + { + "name": "model.h.18.mlp.down_proj.q_weight", + "shape": [ + 3072, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10076160, + "byteOffset": 4944000 + }, + { + "name": "model.h.18.mlp.down_proj.q_scale", + "shape": [ + 3072, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1259520, + "byteOffset": 15020160 + }, + { + "name": "model.h.18.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2523136, + "byteOffset": 16279680 + }, + { + "name": "model.h.18.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 18802816 + }, + { + "name": "model.h.18.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 18808960 + }, + { + "name": "model.h.18.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 30163072 + }, + { + "name": "model.h.19.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 31582336 + } + ], + "md5sum": "8eb6f7a52bf759728f53f0245c72a859" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 31520768, + "records": [ + { + "name": "model.h.19.mlp.down_proj.q_weight", + "shape": [ + 3072, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10076160, + "byteOffset": 0 + }, + { + "name": "model.h.19.mlp.down_proj.q_scale", + "shape": [ + 3072, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1259520, + "byteOffset": 10076160 + }, + { + "name": "model.h.19.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20185088, + "byteOffset": 11335680 + } + ], + "md5sum": "2ba63895330a62bc1318c87c5d5072ce" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 20185088, + "records": [ + { + "name": "model.h.20.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20185088, + "byteOffset": 0 + } + ], + "md5sum": "9456eef90ffdc61e81ffe0800109e762" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 33431552, + "records": [ + { + "name": "model.h.19.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2523136, + "byteOffset": 0 + }, + { + "name": "model.h.19.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 2523136 + }, + { + "name": "model.h.19.mixer.out_proj.q_weight", + "shape": [ + 3072, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3784704, + "byteOffset": 2529280 + }, + { + "name": "model.h.19.mixer.out_proj.q_scale", + "shape": [ + 3072, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 473088, + "byteOffset": 6313984 + }, + { + "name": "model.h.19.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 6787072 + }, + { + "name": "model.h.19.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 18141184 + }, + { + "name": "model.h.20.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19560448 + }, + { + "name": "model.h.20.mlp.down_proj.q_weight", + "shape": [ + 3072, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10076160, + "byteOffset": 19566592 + }, + { + "name": "model.h.20.mlp.down_proj.q_scale", + "shape": [ + 3072, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1259520, + "byteOffset": 29642752 + }, + { + "name": "model.h.20.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2523136, + "byteOffset": 30902272 + }, + { + "name": "model.h.20.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33425408 + } + ], + "md5sum": "cd6b9aa2b41c2140645eefb808a8a2e3" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 20185088, + "records": [ + { + "name": "model.h.21.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20185088, + "byteOffset": 0 + } + ], + "md5sum": "4ec6c96f6227fce6dbe6a251dad201f3" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 30902272, + "records": [ + { + "name": "model.h.20.mixer.out_proj.q_weight", + "shape": [ + 3072, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3784704, + "byteOffset": 0 + }, + { + "name": "model.h.20.mixer.out_proj.q_scale", + "shape": [ + 3072, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 473088, + "byteOffset": 3784704 + }, + { + "name": "model.h.20.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 4257792 + }, + { + "name": "model.h.20.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 15611904 + }, + { + "name": "model.h.21.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17031168 + }, + { + "name": "model.h.21.mlp.down_proj.q_weight", + "shape": [ + 3072, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10076160, + "byteOffset": 17037312 + }, + { + "name": "model.h.21.mlp.down_proj.q_scale", + "shape": [ + 3072, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1259520, + "byteOffset": 27113472 + }, + { + "name": "model.h.21.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2523136, + "byteOffset": 28372992 + }, + { + "name": "model.h.21.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 30896128 + } + ], + "md5sum": "30f769a15fbdac43ca45e6812ffecbfb" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 20185088, + "records": [ + { + "name": "model.h.22.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20185088, + "byteOffset": 0 + } + ], + "md5sum": "3e20b5efdce2e340dbb820d88b3cb7e0" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 30902272, + "records": [ + { + "name": "model.h.21.mixer.out_proj.q_weight", + "shape": [ + 3072, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3784704, + "byteOffset": 0 + }, + { + "name": "model.h.21.mixer.out_proj.q_scale", + "shape": [ + 3072, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 473088, + "byteOffset": 3784704 + }, + { + "name": "model.h.21.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 4257792 + }, + { + "name": "model.h.21.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 15611904 + }, + { + "name": "model.h.22.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17031168 + }, + { + "name": "model.h.22.mlp.down_proj.q_weight", + "shape": [ + 3072, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10076160, + "byteOffset": 17037312 + }, + { + "name": "model.h.22.mlp.down_proj.q_scale", + "shape": [ + 3072, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1259520, + "byteOffset": 27113472 + }, + { + "name": "model.h.22.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2523136, + "byteOffset": 28372992 + }, + { + "name": "model.h.22.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 30896128 + } + ], + "md5sum": "5bbad59fbf3fd94682eb75a071e0f264" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 20185088, + "records": [ + { + "name": "model.h.23.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20185088, + "byteOffset": 0 + } + ], + "md5sum": "5c710bda058fe56ce812112e1b0f0ee0" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 30902272, + "records": [ + { + "name": "model.h.22.mixer.out_proj.q_weight", + "shape": [ + 3072, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3784704, + "byteOffset": 0 + }, + { + "name": "model.h.22.mixer.out_proj.q_scale", + "shape": [ + 3072, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 473088, + "byteOffset": 3784704 + }, + { + "name": "model.h.22.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 4257792 + }, + { + "name": "model.h.22.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 15611904 + }, + { + "name": "model.h.23.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17031168 + }, + { + "name": "model.h.23.mlp.down_proj.q_weight", + "shape": [ + 3072, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10076160, + "byteOffset": 17037312 + }, + { + "name": "model.h.23.mlp.down_proj.q_scale", + "shape": [ + 3072, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1259520, + "byteOffset": 27113472 + }, + { + "name": "model.h.23.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2523136, + "byteOffset": 28372992 + }, + { + "name": "model.h.23.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 30896128 + } + ], + "md5sum": "7a15e2962ed42b63453eaecd837ff275" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 20185088, + "records": [ + { + "name": "model.h.24.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20185088, + "byteOffset": 0 + } + ], + "md5sum": "0a10e859199899afd04e346c84ef3eef" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 30902272, + "records": [ + { + "name": "model.h.23.mixer.out_proj.q_weight", + "shape": [ + 3072, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3784704, + "byteOffset": 0 + }, + { + "name": "model.h.23.mixer.out_proj.q_scale", + "shape": [ + 3072, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 473088, + "byteOffset": 3784704 + }, + { + "name": "model.h.23.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 4257792 + }, + { + "name": "model.h.23.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 15611904 + }, + { + "name": "model.h.24.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17031168 + }, + { + "name": "model.h.24.mlp.down_proj.q_weight", + "shape": [ + 3072, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10076160, + "byteOffset": 17037312 + }, + { + "name": "model.h.24.mlp.down_proj.q_scale", + "shape": [ + 3072, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1259520, + "byteOffset": 27113472 + }, + { + "name": "model.h.24.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2523136, + "byteOffset": 28372992 + }, + { + "name": "model.h.24.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 30896128 + } + ], + "md5sum": "6bc76013f6066d9569517976841883eb" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 20185088, + "records": [ + { + "name": "model.h.25.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20185088, + "byteOffset": 0 + } + ], + "md5sum": "f734322260c53272938e682d4441e076" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 30902272, + "records": [ + { + "name": "model.h.24.mixer.out_proj.q_weight", + "shape": [ + 3072, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3784704, + "byteOffset": 0 + }, + { + "name": "model.h.24.mixer.out_proj.q_scale", + "shape": [ + 3072, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 473088, + "byteOffset": 3784704 + }, + { + "name": "model.h.24.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 4257792 + }, + { + "name": "model.h.24.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 15611904 + }, + { + "name": "model.h.25.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17031168 + }, + { + "name": "model.h.25.mlp.down_proj.q_weight", + "shape": [ + 3072, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10076160, + "byteOffset": 17037312 + }, + { + "name": "model.h.25.mlp.down_proj.q_scale", + "shape": [ + 3072, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1259520, + "byteOffset": 27113472 + }, + { + "name": "model.h.25.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2523136, + "byteOffset": 28372992 + }, + { + "name": "model.h.25.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 30896128 + } + ], + "md5sum": "ad714acba2104718fb5491a432b197d1" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 20185088, + "records": [ + { + "name": "model.h.26.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20185088, + "byteOffset": 0 + } + ], + "md5sum": "348d7841fb570d2dc49bbb93d3679134" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 30902272, + "records": [ + { + "name": "model.h.25.mixer.out_proj.q_weight", + "shape": [ + 3072, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3784704, + "byteOffset": 0 + }, + { + "name": "model.h.25.mixer.out_proj.q_scale", + "shape": [ + 3072, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 473088, + "byteOffset": 3784704 + }, + { + "name": "model.h.25.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 4257792 + }, + { + "name": "model.h.25.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 15611904 + }, + { + "name": "model.h.26.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17031168 + }, + { + "name": "model.h.26.mlp.down_proj.q_weight", + "shape": [ + 3072, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10076160, + "byteOffset": 17037312 + }, + { + "name": "model.h.26.mlp.down_proj.q_scale", + "shape": [ + 3072, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1259520, + "byteOffset": 27113472 + }, + { + "name": "model.h.26.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2523136, + "byteOffset": 28372992 + }, + { + "name": "model.h.26.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 30896128 + } + ], + "md5sum": "6179ab639c1bda89b3c90cb50ae11762" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 20185088, + "records": [ + { + "name": "model.h.27.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20185088, + "byteOffset": 0 + } + ], + "md5sum": "336989816dd64c49e7eadbdea61ca170" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 30902272, + "records": [ + { + "name": "model.h.26.mixer.out_proj.q_weight", + "shape": [ + 3072, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3784704, + "byteOffset": 0 + }, + { + "name": "model.h.26.mixer.out_proj.q_scale", + "shape": [ + 3072, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 473088, + "byteOffset": 3784704 + }, + { + "name": "model.h.26.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 4257792 + }, + { + "name": "model.h.26.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 15611904 + }, + { + "name": "model.h.27.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17031168 + }, + { + "name": "model.h.27.mlp.down_proj.q_weight", + "shape": [ + 3072, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10076160, + "byteOffset": 17037312 + }, + { + "name": "model.h.27.mlp.down_proj.q_scale", + "shape": [ + 3072, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1259520, + "byteOffset": 27113472 + }, + { + "name": "model.h.27.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2523136, + "byteOffset": 28372992 + }, + { + "name": "model.h.27.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 30896128 + } + ], + "md5sum": "da3b6c34fb6d830d6245f9c61818a068" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 20185088, + "records": [ + { + "name": "model.h.28.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20185088, + "byteOffset": 0 + } + ], + "md5sum": "0640e92aa128f4e05a58d7c5cc356006" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 30902272, + "records": [ + { + "name": "model.h.27.mixer.out_proj.q_weight", + "shape": [ + 3072, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3784704, + "byteOffset": 0 + }, + { + "name": "model.h.27.mixer.out_proj.q_scale", + "shape": [ + 3072, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 473088, + "byteOffset": 3784704 + }, + { + "name": "model.h.27.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 4257792 + }, + { + "name": "model.h.27.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 15611904 + }, + { + "name": "model.h.28.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17031168 + }, + { + "name": "model.h.28.mlp.down_proj.q_weight", + "shape": [ + 3072, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10076160, + "byteOffset": 17037312 + }, + { + "name": "model.h.28.mlp.down_proj.q_scale", + "shape": [ + 3072, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1259520, + "byteOffset": 27113472 + }, + { + "name": "model.h.28.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2523136, + "byteOffset": 28372992 + }, + { + "name": "model.h.28.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 30896128 + } + ], + "md5sum": "db3317dfde5ff5e937017d05543c3792" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 20185088, + "records": [ + { + "name": "model.h.29.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20185088, + "byteOffset": 0 + } + ], + "md5sum": "0e9f6c20144e7d9da545ac535987d49c" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 30902272, + "records": [ + { + "name": "model.h.28.mixer.out_proj.q_weight", + "shape": [ + 3072, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3784704, + "byteOffset": 0 + }, + { + "name": "model.h.28.mixer.out_proj.q_scale", + "shape": [ + 3072, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 473088, + "byteOffset": 3784704 + }, + { + "name": "model.h.28.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 4257792 + }, + { + "name": "model.h.28.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 15611904 + }, + { + "name": "model.h.29.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17031168 + }, + { + "name": "model.h.29.mlp.down_proj.q_weight", + "shape": [ + 3072, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10076160, + "byteOffset": 17037312 + }, + { + "name": "model.h.29.mlp.down_proj.q_scale", + "shape": [ + 3072, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1259520, + "byteOffset": 27113472 + }, + { + "name": "model.h.29.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2523136, + "byteOffset": 28372992 + }, + { + "name": "model.h.29.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 30896128 + } + ], + "md5sum": "5cd3f1298c7d3e2eba61c2dccaa51fa1" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 20185088, + "records": [ + { + "name": "model.h.30.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20185088, + "byteOffset": 0 + } + ], + "md5sum": "1d625599633f2e8fdd20ea38072c1a3a" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 30902272, + "records": [ + { + "name": "model.h.29.mixer.out_proj.q_weight", + "shape": [ + 3072, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3784704, + "byteOffset": 0 + }, + { + "name": "model.h.29.mixer.out_proj.q_scale", + "shape": [ + 3072, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 473088, + "byteOffset": 3784704 + }, + { + "name": "model.h.29.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 4257792 + }, + { + "name": "model.h.29.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 15611904 + }, + { + "name": "model.h.30.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17031168 + }, + { + "name": "model.h.30.mlp.down_proj.q_weight", + "shape": [ + 3072, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10076160, + "byteOffset": 17037312 + }, + { + "name": "model.h.30.mlp.down_proj.q_scale", + "shape": [ + 3072, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1259520, + "byteOffset": 27113472 + }, + { + "name": "model.h.30.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2523136, + "byteOffset": 28372992 + }, + { + "name": "model.h.30.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 30896128 + } + ], + "md5sum": "430a7c4d1d69396f3ec1eb880b7f716f" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 20185088, + "records": [ + { + "name": "model.h.31.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20185088, + "byteOffset": 0 + } + ], + "md5sum": "25077b71bded2893f97467c767447e8f" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 30902272, + "records": [ + { + "name": "model.h.30.mixer.out_proj.q_weight", + "shape": [ + 3072, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3784704, + "byteOffset": 0 + }, + { + "name": "model.h.30.mixer.out_proj.q_scale", + "shape": [ + 3072, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 473088, + "byteOffset": 3784704 + }, + { + "name": "model.h.30.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 4257792 + }, + { + "name": "model.h.30.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 15611904 + }, + { + "name": "model.h.31.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17031168 + }, + { + "name": "model.h.31.mlp.down_proj.q_weight", + "shape": [ + 3072, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10076160, + "byteOffset": 17037312 + }, + { + "name": "model.h.31.mlp.down_proj.q_scale", + "shape": [ + 3072, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1259520, + "byteOffset": 27113472 + }, + { + "name": "model.h.31.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2523136, + "byteOffset": 28372992 + }, + { + "name": "model.h.31.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 30896128 + } + ], + "md5sum": "2239b285a54e01bb80bccbf22c562d3f" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 39502848, + "records": [ + { + "name": "model.embd.q_weight", + "shape": [ + 32064, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 39502848, + "byteOffset": 0 + } + ], + "md5sum": "5e7537fcb657900a598019c4ea92dc0a" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 20185088, + "records": [ + { + "name": "model.h.0.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20185088, + "byteOffset": 0 + } + ], + "md5sum": "8409accc0efa14b9bd2212b965b6d449" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 33316992, + "records": [ + { + "name": "model.h.31.mixer.out_proj.q_weight", + "shape": [ + 3072, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3784704, + "byteOffset": 0 + }, + { + "name": "model.h.31.mixer.out_proj.q_scale", + "shape": [ + 3072, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 473088, + "byteOffset": 3784704 + }, + { + "name": "model.h.31.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 4257792 + }, + { + "name": "model.h.31.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 15611904 + }, + { + "name": "model.norm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17031168 + }, + { + "name": "model.embd.q_scale", + "shape": [ + 32064, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4937856, + "byteOffset": 17037312 + }, + { + "name": "model.h.0.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21975168 + }, + { + "name": "model.h.0.mlp.down_proj.q_weight", + "shape": [ + 3072, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10076160, + "byteOffset": 21981312 + }, + { + "name": "model.h.0.mlp.down_proj.q_scale", + "shape": [ + 3072, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1259520, + "byteOffset": 32057472 + } + ], + "md5sum": "897b4fd728d5eaf06f7e5e8eb9fff479" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 20185088, + "records": [ + { + "name": "model.h.1.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20185088, + "byteOffset": 0 + } + ], + "md5sum": "d3bc8dfb66a961ed8ff8d82d62f891e6" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 33431552, + "records": [ + { + "name": "model.h.0.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2523136, + "byteOffset": 0 + }, + { + "name": "model.h.0.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 2523136 + }, + { + "name": "model.h.0.mixer.out_proj.q_weight", + "shape": [ + 3072, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3784704, + "byteOffset": 2529280 + }, + { + "name": "model.h.0.mixer.out_proj.q_scale", + "shape": [ + 3072, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 473088, + "byteOffset": 6313984 + }, + { + "name": "model.h.0.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 6787072 + }, + { + "name": "model.h.0.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 18141184 + }, + { + "name": "model.h.1.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19560448 + }, + { + "name": "model.h.1.mlp.down_proj.q_weight", + "shape": [ + 3072, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10076160, + "byteOffset": 19566592 + }, + { + "name": "model.h.1.mlp.down_proj.q_scale", + "shape": [ + 3072, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1259520, + "byteOffset": 29642752 + }, + { + "name": "model.h.1.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2523136, + "byteOffset": 30902272 + }, + { + "name": "model.h.1.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33425408 + } + ], + "md5sum": "9fa4180ca7f3d8d2671e424df58f7d33" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 20185088, + "records": [ + { + "name": "model.h.10.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20185088, + "byteOffset": 0 + } + ], + "md5sum": "a3f57885456413befe772a61db9fe003" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 30902272, + "records": [ + { + "name": "model.h.1.mixer.out_proj.q_weight", + "shape": [ + 3072, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3784704, + "byteOffset": 0 + }, + { + "name": "model.h.1.mixer.out_proj.q_scale", + "shape": [ + 3072, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 473088, + "byteOffset": 3784704 + }, + { + "name": "model.h.1.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 4257792 + }, + { + "name": "model.h.1.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 15611904 + }, + { + "name": "model.h.10.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17031168 + }, + { + "name": "model.h.10.mlp.down_proj.q_weight", + "shape": [ + 3072, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10076160, + "byteOffset": 17037312 + }, + { + "name": "model.h.10.mlp.down_proj.q_scale", + "shape": [ + 3072, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1259520, + "byteOffset": 27113472 + }, + { + "name": "model.h.10.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2523136, + "byteOffset": 28372992 + }, + { + "name": "model.h.10.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 30896128 + } + ], + "md5sum": "939e4d0a910d890d33ab3aa4cfae6527" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 20185088, + "records": [ + { + "name": "model.h.11.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20185088, + "byteOffset": 0 + } + ], + "md5sum": "ba8cc1009d70aa580a3c3464ddcf2e1b" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 30902272, + "records": [ + { + "name": "model.h.10.mixer.out_proj.q_weight", + "shape": [ + 3072, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3784704, + "byteOffset": 0 + }, + { + "name": "model.h.10.mixer.out_proj.q_scale", + "shape": [ + 3072, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 473088, + "byteOffset": 3784704 + }, + { + "name": "model.h.10.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 4257792 + }, + { + "name": "model.h.10.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 15611904 + }, + { + "name": "model.h.11.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17031168 + }, + { + "name": "model.h.11.mlp.down_proj.q_weight", + "shape": [ + 3072, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10076160, + "byteOffset": 17037312 + }, + { + "name": "model.h.11.mlp.down_proj.q_scale", + "shape": [ + 3072, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1259520, + "byteOffset": 27113472 + }, + { + "name": "model.h.11.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2523136, + "byteOffset": 28372992 + }, + { + "name": "model.h.11.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 30896128 + } + ], + "md5sum": "cf73ece61d143b5685eef41c20229c42" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 20185088, + "records": [ + { + "name": "model.h.12.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20185088, + "byteOffset": 0 + } + ], + "md5sum": "62f2abbb1faa52d0bce9d55753794a88" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 30902272, + "records": [ + { + "name": "model.h.11.mixer.out_proj.q_weight", + "shape": [ + 3072, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3784704, + "byteOffset": 0 + }, + { + "name": "model.h.11.mixer.out_proj.q_scale", + "shape": [ + 3072, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 473088, + "byteOffset": 3784704 + }, + { + "name": "model.h.11.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 4257792 + }, + { + "name": "model.h.11.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 15611904 + }, + { + "name": "model.h.12.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17031168 + }, + { + "name": "model.h.12.mlp.down_proj.q_weight", + "shape": [ + 3072, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10076160, + "byteOffset": 17037312 + }, + { + "name": "model.h.12.mlp.down_proj.q_scale", + "shape": [ + 3072, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1259520, + "byteOffset": 27113472 + }, + { + "name": "model.h.12.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2523136, + "byteOffset": 28372992 + }, + { + "name": "model.h.12.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 30896128 + } + ], + "md5sum": "d2acdc47a3bc43aa505032d2bb8c15f3" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 20185088, + "records": [ + { + "name": "model.h.13.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20185088, + "byteOffset": 0 + } + ], + "md5sum": "bb5529cdb10895082d3b2e5293f6d1cd" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 30902272, + "records": [ + { + "name": "model.h.12.mixer.out_proj.q_weight", + "shape": [ + 3072, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3784704, + "byteOffset": 0 + }, + { + "name": "model.h.12.mixer.out_proj.q_scale", + "shape": [ + 3072, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 473088, + "byteOffset": 3784704 + }, + { + "name": "model.h.12.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 4257792 + }, + { + "name": "model.h.12.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 15611904 + }, + { + "name": "model.h.13.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17031168 + }, + { + "name": "model.h.13.mlp.down_proj.q_weight", + "shape": [ + 3072, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10076160, + "byteOffset": 17037312 + }, + { + "name": "model.h.13.mlp.down_proj.q_scale", + "shape": [ + 3072, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1259520, + "byteOffset": 27113472 + }, + { + "name": "model.h.13.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2523136, + "byteOffset": 28372992 + }, + { + "name": "model.h.13.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 30896128 + } + ], + "md5sum": "98ef5b4e63d4701a25201cdc3c05bf12" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 20185088, + "records": [ + { + "name": "model.h.14.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20185088, + "byteOffset": 0 + } + ], + "md5sum": "24522563518632d9a17f4166066cb200" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 30902272, + "records": [ + { + "name": "model.h.13.mixer.out_proj.q_weight", + "shape": [ + 3072, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3784704, + "byteOffset": 0 + }, + { + "name": "model.h.13.mixer.out_proj.q_scale", + "shape": [ + 3072, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 473088, + "byteOffset": 3784704 + }, + { + "name": "model.h.13.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 4257792 + }, + { + "name": "model.h.13.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 15611904 + }, + { + "name": "model.h.14.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17031168 + }, + { + "name": "model.h.14.mlp.down_proj.q_weight", + "shape": [ + 3072, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10076160, + "byteOffset": 17037312 + }, + { + "name": "model.h.14.mlp.down_proj.q_scale", + "shape": [ + 3072, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1259520, + "byteOffset": 27113472 + }, + { + "name": "model.h.14.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2523136, + "byteOffset": 28372992 + }, + { + "name": "model.h.14.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 30896128 + } + ], + "md5sum": "8d3fab7847a5c022d510e83f15c23ad7" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 20185088, + "records": [ + { + "name": "model.h.15.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20185088, + "byteOffset": 0 + } + ], + "md5sum": "215dc6ff0a1f211c3727dcc991c46cf1" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 30902272, + "records": [ + { + "name": "model.h.14.mixer.out_proj.q_weight", + "shape": [ + 3072, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3784704, + "byteOffset": 0 + }, + { + "name": "model.h.14.mixer.out_proj.q_scale", + "shape": [ + 3072, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 473088, + "byteOffset": 3784704 + }, + { + "name": "model.h.14.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 4257792 + }, + { + "name": "model.h.14.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 15611904 + }, + { + "name": "model.h.15.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17031168 + }, + { + "name": "model.h.15.mlp.down_proj.q_weight", + "shape": [ + 3072, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10076160, + "byteOffset": 17037312 + }, + { + "name": "model.h.15.mlp.down_proj.q_scale", + "shape": [ + 3072, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1259520, + "byteOffset": 27113472 + }, + { + "name": "model.h.15.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2523136, + "byteOffset": 28372992 + }, + { + "name": "model.h.15.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 30896128 + } + ], + "md5sum": "72866c136adefde27ba78274ed57619a" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 20185088, + "records": [ + { + "name": "model.h.16.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20185088, + "byteOffset": 0 + } + ], + "md5sum": "7f02d5ccf7cc2c4370777d6ae00b1244" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 30902272, + "records": [ + { + "name": "model.h.15.mixer.out_proj.q_weight", + "shape": [ + 3072, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3784704, + "byteOffset": 0 + }, + { + "name": "model.h.15.mixer.out_proj.q_scale", + "shape": [ + 3072, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 473088, + "byteOffset": 3784704 + }, + { + "name": "model.h.15.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 4257792 + }, + { + "name": "model.h.15.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 15611904 + }, + { + "name": "model.h.16.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17031168 + }, + { + "name": "model.h.16.mlp.down_proj.q_weight", + "shape": [ + 3072, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10076160, + "byteOffset": 17037312 + }, + { + "name": "model.h.16.mlp.down_proj.q_scale", + "shape": [ + 3072, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1259520, + "byteOffset": 27113472 + }, + { + "name": "model.h.16.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2523136, + "byteOffset": 28372992 + }, + { + "name": "model.h.16.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 30896128 + } + ], + "md5sum": "30abcca65e41086ad0667aacaf53384b" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 20185088, + "records": [ + { + "name": "model.h.17.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20185088, + "byteOffset": 0 + } + ], + "md5sum": "69a2467ff910a2811ef2c4542bc4fe56" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 30902272, + "records": [ + { + "name": "model.h.16.mixer.out_proj.q_weight", + "shape": [ + 3072, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3784704, + "byteOffset": 0 + }, + { + "name": "model.h.16.mixer.out_proj.q_scale", + "shape": [ + 3072, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 473088, + "byteOffset": 3784704 + }, + { + "name": "model.h.16.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 4257792 + }, + { + "name": "model.h.16.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 15611904 + }, + { + "name": "model.h.17.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17031168 + }, + { + "name": "model.h.17.mlp.down_proj.q_weight", + "shape": [ + 3072, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10076160, + "byteOffset": 17037312 + }, + { + "name": "model.h.17.mlp.down_proj.q_scale", + "shape": [ + 3072, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1259520, + "byteOffset": 27113472 + }, + { + "name": "model.h.17.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2523136, + "byteOffset": 28372992 + }, + { + "name": "model.h.17.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 30896128 + } + ], + "md5sum": "a22c8fb5e9fed8eeee2e25e97eebabaa" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 20185088, + "records": [ + { + "name": "model.h.2.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20185088, + "byteOffset": 0 + } + ], + "md5sum": "de1a4314be8c30ee24e9f222493b0f5b" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 32630784, + "records": [ + { + "name": "model.h.17.mixer.out_proj.q_weight", + "shape": [ + 3072, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3784704, + "byteOffset": 0 + }, + { + "name": "model.h.17.mixer.out_proj.q_scale", + "shape": [ + 3072, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 473088, + "byteOffset": 3784704 + }, + { + "name": "model.h.17.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 4257792 + }, + { + "name": "model.h.17.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 15611904 + }, + { + "name": "model.h.18.mixer.out_proj.q_weight", + "shape": [ + 3072, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3784704, + "byteOffset": 17031168 + }, + { + "name": "model.h.18.mixer.out_proj.q_scale", + "shape": [ + 3072, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 473088, + "byteOffset": 20815872 + }, + { + "name": "model.h.2.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 21288960 + }, + { + "name": "model.h.2.mlp.down_proj.q_weight", + "shape": [ + 3072, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10076160, + "byteOffset": 21295104 + }, + { + "name": "model.h.2.mlp.down_proj.q_scale", + "shape": [ + 3072, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1259520, + "byteOffset": 31371264 + } + ], + "md5sum": "e236c8247a12c058d052abe0ae7fc877" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 20185088, + "records": [ + { + "name": "model.h.3.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20185088, + "byteOffset": 0 + } + ], + "md5sum": "8d311f71a279bc57e82db1bfee4377e0" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 33431552, + "records": [ + { + "name": "model.h.2.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2523136, + "byteOffset": 0 + }, + { + "name": "model.h.2.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 2523136 + }, + { + "name": "model.h.2.mixer.out_proj.q_weight", + "shape": [ + 3072, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3784704, + "byteOffset": 2529280 + }, + { + "name": "model.h.2.mixer.out_proj.q_scale", + "shape": [ + 3072, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 473088, + "byteOffset": 6313984 + }, + { + "name": "model.h.2.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 6787072 + }, + { + "name": "model.h.2.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 18141184 + }, + { + "name": "model.h.3.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19560448 + }, + { + "name": "model.h.3.mlp.down_proj.q_weight", + "shape": [ + 3072, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10076160, + "byteOffset": 19566592 + }, + { + "name": "model.h.3.mlp.down_proj.q_scale", + "shape": [ + 3072, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1259520, + "byteOffset": 29642752 + }, + { + "name": "model.h.3.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2523136, + "byteOffset": 30902272 + }, + { + "name": "model.h.3.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 33425408 + } + ], + "md5sum": "64e85992e12b41d931b4135988285364" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 20185088, + "records": [ + { + "name": "model.h.4.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20185088, + "byteOffset": 0 + } + ], + "md5sum": "c9311d3be9641447bf58117d7e8c58a7" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 30902272, + "records": [ + { + "name": "model.h.3.mixer.out_proj.q_weight", + "shape": [ + 3072, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3784704, + "byteOffset": 0 + }, + { + "name": "model.h.3.mixer.out_proj.q_scale", + "shape": [ + 3072, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 473088, + "byteOffset": 3784704 + }, + { + "name": "model.h.3.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 4257792 + }, + { + "name": "model.h.3.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 15611904 + }, + { + "name": "model.h.4.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17031168 + }, + { + "name": "model.h.4.mlp.down_proj.q_weight", + "shape": [ + 3072, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10076160, + "byteOffset": 17037312 + }, + { + "name": "model.h.4.mlp.down_proj.q_scale", + "shape": [ + 3072, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1259520, + "byteOffset": 27113472 + }, + { + "name": "model.h.4.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2523136, + "byteOffset": 28372992 + }, + { + "name": "model.h.4.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 30896128 + } + ], + "md5sum": "d621aecf6605c376c716c5740763cd54" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 20185088, + "records": [ + { + "name": "model.h.5.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20185088, + "byteOffset": 0 + } + ], + "md5sum": "d3b2599308c53564ab04eb7c13b15fad" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 30902272, + "records": [ + { + "name": "model.h.4.mixer.out_proj.q_weight", + "shape": [ + 3072, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3784704, + "byteOffset": 0 + }, + { + "name": "model.h.4.mixer.out_proj.q_scale", + "shape": [ + 3072, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 473088, + "byteOffset": 3784704 + }, + { + "name": "model.h.4.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 4257792 + }, + { + "name": "model.h.4.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 15611904 + }, + { + "name": "model.h.5.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17031168 + }, + { + "name": "model.h.5.mlp.down_proj.q_weight", + "shape": [ + 3072, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10076160, + "byteOffset": 17037312 + }, + { + "name": "model.h.5.mlp.down_proj.q_scale", + "shape": [ + 3072, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1259520, + "byteOffset": 27113472 + }, + { + "name": "model.h.5.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2523136, + "byteOffset": 28372992 + }, + { + "name": "model.h.5.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 30896128 + } + ], + "md5sum": "efdbce3b0bdac0012a5351dde276b4cc" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 20185088, + "records": [ + { + "name": "model.h.6.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20185088, + "byteOffset": 0 + } + ], + "md5sum": "5e4f4154f7e37074b030ef2ba84f99fe" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 30902272, + "records": [ + { + "name": "model.h.5.mixer.out_proj.q_weight", + "shape": [ + 3072, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3784704, + "byteOffset": 0 + }, + { + "name": "model.h.5.mixer.out_proj.q_scale", + "shape": [ + 3072, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 473088, + "byteOffset": 3784704 + }, + { + "name": "model.h.5.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 4257792 + }, + { + "name": "model.h.5.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 15611904 + }, + { + "name": "model.h.6.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17031168 + }, + { + "name": "model.h.6.mlp.down_proj.q_weight", + "shape": [ + 3072, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10076160, + "byteOffset": 17037312 + }, + { + "name": "model.h.6.mlp.down_proj.q_scale", + "shape": [ + 3072, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1259520, + "byteOffset": 27113472 + }, + { + "name": "model.h.6.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2523136, + "byteOffset": 28372992 + }, + { + "name": "model.h.6.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 30896128 + } + ], + "md5sum": "852443b009f7b915ac3d3b9058bd8473" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 20185088, + "records": [ + { + "name": "model.h.7.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20185088, + "byteOffset": 0 + } + ], + "md5sum": "e7e25940d67c00c1b8d30e56ea7d5fd4" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 30902272, + "records": [ + { + "name": "model.h.6.mixer.out_proj.q_weight", + "shape": [ + 3072, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3784704, + "byteOffset": 0 + }, + { + "name": "model.h.6.mixer.out_proj.q_scale", + "shape": [ + 3072, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 473088, + "byteOffset": 3784704 + }, + { + "name": "model.h.6.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 4257792 + }, + { + "name": "model.h.6.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 15611904 + }, + { + "name": "model.h.7.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17031168 + }, + { + "name": "model.h.7.mlp.down_proj.q_weight", + "shape": [ + 3072, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10076160, + "byteOffset": 17037312 + }, + { + "name": "model.h.7.mlp.down_proj.q_scale", + "shape": [ + 3072, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1259520, + "byteOffset": 27113472 + }, + { + "name": "model.h.7.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2523136, + "byteOffset": 28372992 + }, + { + "name": "model.h.7.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 30896128 + } + ], + "md5sum": "94d1dfa1be75385702998e301115055a" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 20185088, + "records": [ + { + "name": "model.h.8.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20185088, + "byteOffset": 0 + } + ], + "md5sum": "4608f00654ceaa9953c3daccb281330b" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 30902272, + "records": [ + { + "name": "model.h.7.mixer.out_proj.q_weight", + "shape": [ + 3072, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3784704, + "byteOffset": 0 + }, + { + "name": "model.h.7.mixer.out_proj.q_scale", + "shape": [ + 3072, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 473088, + "byteOffset": 3784704 + }, + { + "name": "model.h.7.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 4257792 + }, + { + "name": "model.h.7.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 15611904 + }, + { + "name": "model.h.8.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17031168 + }, + { + "name": "model.h.8.mlp.down_proj.q_weight", + "shape": [ + 3072, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10076160, + "byteOffset": 17037312 + }, + { + "name": "model.h.8.mlp.down_proj.q_scale", + "shape": [ + 3072, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1259520, + "byteOffset": 27113472 + }, + { + "name": "model.h.8.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2523136, + "byteOffset": 28372992 + }, + { + "name": "model.h.8.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 30896128 + } + ], + "md5sum": "9bd35cc0fed109dedcb6e04ff7d162d2" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 20185088, + "records": [ + { + "name": "model.h.9.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 20185088, + "byteOffset": 0 + } + ], + "md5sum": "5f3ccfc709639df0a73db5ee3a960fb7" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 30902272, + "records": [ + { + "name": "model.h.8.mixer.out_proj.q_weight", + "shape": [ + 3072, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3784704, + "byteOffset": 0 + }, + { + "name": "model.h.8.mixer.out_proj.q_scale", + "shape": [ + 3072, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 473088, + "byteOffset": 3784704 + }, + { + "name": "model.h.8.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 4257792 + }, + { + "name": "model.h.8.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 15611904 + }, + { + "name": "model.h.9.ln.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17031168 + }, + { + "name": "model.h.9.mlp.down_proj.q_weight", + "shape": [ + 3072, + 820 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10076160, + "byteOffset": 17037312 + }, + { + "name": "model.h.9.mlp.down_proj.q_scale", + "shape": [ + 3072, + 205 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1259520, + "byteOffset": 27113472 + }, + { + "name": "model.h.9.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2523136, + "byteOffset": 28372992 + }, + { + "name": "model.h.9.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 30896128 + } + ], + "md5sum": "ad1237b06fbd0528c9f8e23cf79a8891" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 27834368, + "records": [ + { + "name": "model.h.9.mixer.out_proj.q_weight", + "shape": [ + 3072, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3784704, + "byteOffset": 0 + }, + { + "name": "model.h.9.mixer.out_proj.q_scale", + "shape": [ + 3072, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 473088, + "byteOffset": 3784704 + }, + { + "name": "model.h.9.mixer.qkv_proj.q_weight", + "shape": [ + 9216, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 4257792 + }, + { + "name": "model.h.9.mixer.qkv_proj.q_scale", + "shape": [ + 9216, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 15611904 + }, + { + "name": "vision_embed_tokens.glb_GN", + "shape": [ + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 17031168 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.embeddings.class_embedding", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 17039360 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.embeddings.patch_embedding.weight", + "shape": [ + 1024, + 3, + 14, + 14 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1204224, + "byteOffset": 17041408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.embeddings.position_embedding.weight", + "shape": [ + 577, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1181696, + "byteOffset": 18245632 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.0.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 19427328 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.0.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 19429376 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.0.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 19431424 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.0.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 19433472 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.0.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 19435520 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.0.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 19443712 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.0.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 27832320 + } + ], + "md5sum": "e9f4130276ed5535299d74f5e68e0597" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.0.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.0.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.0.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.0.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.0.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.0.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.0.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.0.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.0.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.1.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.1.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.1.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.1.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.1.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.1.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.1.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "12ccc3786467895471d31bb5b1d2610c" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.1.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.1.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.1.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.1.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.1.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.1.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.1.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.1.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.1.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.10.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.10.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.10.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.10.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.10.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.10.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.10.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "dcc2adf61aa07fe189bb15874110e845" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.10.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.10.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.10.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.10.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.10.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.10.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.10.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.10.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.10.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.11.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.11.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.11.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.11.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.11.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.11.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.11.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "9f8d49d8894a05f4179ba707925e9d59" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.11.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.11.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.11.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.11.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.11.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.11.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.11.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.11.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.11.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.12.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.12.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.12.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.12.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.12.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.12.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.12.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "19b6f5d2cfc839108f9156b8b521d3ce" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.12.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.12.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.12.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.12.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.12.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.12.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.12.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.12.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.12.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.13.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.13.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.13.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.13.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.13.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.13.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.13.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "3e6322c0673873144d67964c709fa3be" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.13.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.13.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.13.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.13.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.13.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.13.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.13.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.13.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.13.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.14.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.14.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.14.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.14.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.14.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.14.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.14.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "f04370015a40ae2073f5d89ad86f1c42" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.14.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.14.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.14.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.14.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.14.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.14.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.14.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.14.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.14.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.15.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.15.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.15.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.15.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.15.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.15.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.15.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "e2c99d455ee851ede30d282481166453" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.15.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.15.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.15.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.15.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.15.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.15.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.15.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.15.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.15.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.16.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.16.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.16.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.16.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.16.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.16.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.16.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "11c75f74757caccc61b9f6c3f39a9737" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.16.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.16.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.16.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.16.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.16.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.16.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.16.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.16.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.16.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.17.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.17.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.17.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.17.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.17.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.17.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.17.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "a7f57fd25793b85aa6027a514efc39fb" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.17.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.17.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.17.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.17.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.17.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.17.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.17.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.17.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.17.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.18.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.18.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.18.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.18.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.18.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.18.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.18.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "e13228942bc433e638fe3903001715f3" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.18.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.18.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.18.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.18.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.18.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.18.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.18.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.18.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.18.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.19.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.19.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.19.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.19.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.19.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.19.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.19.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "eab0a6fc1a1e3beac991a0be01fdefe8" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.19.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.19.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.19.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.19.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.19.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.19.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.19.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.19.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.19.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.2.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.2.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.2.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.2.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.2.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.2.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.2.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "afcfd0263f305fcc63e7b86393753bf7" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.2.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.2.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.2.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.2.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.2.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.2.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.2.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.2.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.2.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.20.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.20.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.20.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.20.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.20.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.20.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.20.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "460a8bc76edf812f85886ec9f950f395" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.20.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.20.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.20.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.20.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.20.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.20.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.20.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.20.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.20.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.21.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.21.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.21.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.21.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.21.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.21.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.21.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "bcac984764bba0a4eb9891d2043e1d89" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.21.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.21.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.21.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.21.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.21.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.21.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.21.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.21.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.21.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.22.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.22.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.22.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.22.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.22.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.22.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.22.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "3197e582d745d84c2ab4347a1f2d4e2d" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.22.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.22.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.22.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.22.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.22.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.22.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.22.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.22.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.22.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.23.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.23.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.23.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.23.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.23.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.23.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.23.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "e6d67ae9f4dab5ea49fac79bceac7a86" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.23.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.23.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.23.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.23.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.23.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.23.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.23.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.23.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.23.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.3.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.3.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.3.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.3.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.3.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.3.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.3.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "1a74510bf14d6fc06b53f09ee6a22fdd" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.3.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.3.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.3.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.3.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.3.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.3.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.3.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.3.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.3.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.4.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.4.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.4.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.4.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.4.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.4.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.4.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "75a9ee23e852e5562e2176a5ece04f24" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.4.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.4.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.4.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.4.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.4.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.4.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.4.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.4.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.4.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.5.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.5.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.5.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.5.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.5.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.5.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.5.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "da3f4d64dad38aa57a47fd6fbca1a334" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.5.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.5.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.5.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.5.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.5.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.5.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.5.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.5.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.5.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.6.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.6.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.6.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.6.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.6.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.6.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.6.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "ca1eb7481171d37cc9fab4b3432d81d1" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.6.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.6.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.6.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.6.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.6.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.6.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.6.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.6.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.6.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.7.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.7.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.7.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.7.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.7.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.7.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.7.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "03be4317a6855a15625866d5dde56763" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.7.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.7.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.7.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.7.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.7.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.7.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.7.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.7.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.7.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.8.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.8.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.8.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.8.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.8.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.8.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.8.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "7464cccd529310def6e3920c637ad62c" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 25192448, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.8.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.8.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.8.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.8.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.8.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.8.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.8.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.8.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.8.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.9.layer_norm1.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.9.layer_norm1.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.9.layer_norm2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.9.layer_norm2.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.9.mlp.fc1.bias", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.9.mlp.fc1.weight", + "shape": [ + 4096, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.9.mlp.fc2.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 25190400 + } + ], + "md5sum": "8be48481c922751c9b723a99e5dcd727" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 26767360, + "records": [ + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.9.mlp.fc2.weight", + "shape": [ + 1024, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.9.self_attn.k_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 8388608 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.9.self_attn.k_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 8390656 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.9.self_attn.out_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 10487808 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.9.self_attn.out_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 10489856 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.9.self_attn.q_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 12587008 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.9.self_attn.q_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 12589056 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.9.self_attn.v_proj.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 14686208 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.encoder.layers.9.self_attn.v_proj.weight", + "shape": [ + 1024, + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2097152, + "byteOffset": 14688256 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.post_layernorm.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16785408 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.post_layernorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16787456 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.pre_layrnorm.bias", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16789504 + }, + { + "name": "vision_embed_tokens.img_processor.vision_model.pre_layrnorm.weight", + "shape": [ + 1024 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2048, + "byteOffset": 16791552 + }, + { + "name": "vision_embed_tokens.img_projection.linear_1.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 16793600 + }, + { + "name": "vision_embed_tokens.img_projection.linear_1.q_weight", + "shape": [ + 3072, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5062656, + "byteOffset": 16799744 + }, + { + "name": "vision_embed_tokens.img_projection.linear_1.q_scale", + "shape": [ + 3072, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 632832, + "byteOffset": 21862400 + }, + { + "name": "vision_embed_tokens.img_projection.linear_2.bias", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 22495232 + }, + { + "name": "vision_embed_tokens.img_projection.linear_2.q_weight", + "shape": [ + 3072, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3784704, + "byteOffset": 22501376 + }, + { + "name": "vision_embed_tokens.img_projection.linear_2.q_scale", + "shape": [ + 3072, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 473088, + "byteOffset": 26286080 + }, + { + "name": "vision_embed_tokens.sub_GN", + "shape": [ + 1, + 1, + 1, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26759168 + } + ], + "md5sum": "91fb7eb9ed733756ed0c89a04c1a2657" + } + ] +} \ No newline at end of file