{ "metadata": { "ParamSize": 1037, "ParamBytes": 1795483744.0, "BitsPerParam": 4.507976587172874 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 65536000, "records": [ { "name": "language_model.lm_head.linear.q_weight", "shape": [ 51200, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 65536000, "byteOffset": 0 } ], "md5sum": "aa7fb6d0a78b7735d1b2d7bf3c3f1286" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 32179040, "records": [ { "name": "language_model.lm_head.linear.bias", "shape": [ 51200 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 102400, "byteOffset": 0 }, { "name": "language_model.lm_head.linear.q_scale", "shape": [ 51200, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192000, "byteOffset": 102400 }, { "name": "language_model.lm_head.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 8294400 }, { "name": "language_model.lm_head.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 8299520 }, { "name": "multi_modal_projector.linear_1.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 8304640 }, { "name": "multi_modal_projector.linear_1.q_weight", "shape": [ 2560, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1474560, "byteOffset": 8309760 }, { "name": "multi_modal_projector.linear_1.q_scale", "shape": [ 2560, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 184320, "byteOffset": 9784320 }, { "name": "multi_modal_projector.linear_2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 9968640 }, { "name": "multi_modal_projector.linear_2.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 9973760 }, { "name": "multi_modal_projector.linear_2.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13250560 }, { "name": "vision_tower.vision_model.encoder.layers.23.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 13660160 }, { "name": "vision_tower.vision_model.encoder.layers.23.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 13662464 }, { "name": "vision_tower.vision_model.encoder.layers.23.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 13664768 }, { "name": "vision_tower.vision_model.encoder.layers.23.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 13667072 }, { "name": "vision_tower.vision_model.encoder.layers.23.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 13669376 }, { "name": "vision_tower.vision_model.encoder.layers.23.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 13677984 }, { "name": "vision_tower.vision_model.encoder.layers.23.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 16157088 }, { "name": "vision_tower.vision_model.encoder.layers.23.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 16466976 }, { "name": "vision_tower.vision_model.encoder.layers.23.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 16469280 }, { "name": "vision_tower.vision_model.encoder.layers.23.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 18957600 }, { "name": "vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 19268640 }, { "name": "vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 19270944 }, { "name": "vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 19934496 }, { "name": "vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 20017440 }, { "name": "vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 20019744 }, { "name": "vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 20683296 }, { "name": "vision_tower.vision_model.encoder.layers.24.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 20766240 }, { "name": "vision_tower.vision_model.encoder.layers.24.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 20768544 }, { "name": "vision_tower.vision_model.encoder.layers.24.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 20770848 }, { "name": "vision_tower.vision_model.encoder.layers.24.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 20773152 }, { "name": "vision_tower.vision_model.encoder.layers.24.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 20775456 }, { "name": "vision_tower.vision_model.encoder.layers.24.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 20784064 }, { "name": "vision_tower.vision_model.encoder.layers.24.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 23263168 }, { "name": "vision_tower.vision_model.encoder.layers.24.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 23573056 }, { "name": "vision_tower.vision_model.encoder.layers.24.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 23575360 }, { "name": "vision_tower.vision_model.encoder.layers.24.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 26063680 }, { "name": "vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 26374720 }, { "name": "vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 26377024 }, { "name": "vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 27040576 }, { "name": "vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 27123520 }, { "name": "vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 27125824 }, { "name": "vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 27789376 }, { "name": "vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 27872320 }, { "name": "vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 27874624 }, { "name": "vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 28538176 }, { "name": "vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 28621120 }, { "name": "vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 28623424 }, { "name": "vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 29286976 }, { "name": "vision_tower.vision_model.encoder.layers.25.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 29369920 }, { "name": "vision_tower.vision_model.encoder.layers.25.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 29372224 }, { "name": "vision_tower.vision_model.encoder.layers.25.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 29374528 }, { "name": "vision_tower.vision_model.encoder.layers.25.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 29376832 }, { "name": "vision_tower.vision_model.encoder.layers.25.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 29379136 }, { "name": "vision_tower.vision_model.encoder.layers.25.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 29387744 }, { "name": "vision_tower.vision_model.encoder.layers.25.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 31866848 }, { "name": "vision_tower.vision_model.encoder.layers.25.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 32176736 } ], "md5sum": "5adc3d3771e57553306ddcb8787e29ea" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 65536000, "records": [ { "name": "language_model.transformer.embd.q_weight", "shape": [ 51200, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 65536000, "byteOffset": 0 } ], "md5sum": "21a3f3014f0bb1e817c0d8fde5a20645" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 28783360, "records": [ { "name": "vision_tower.vision_model.encoder.layers.25.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 0 }, { "name": "vision_tower.vision_model.encoder.layers.25.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 2488320 }, { "name": "vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 2799360 }, { "name": "vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 2801664 }, { "name": "vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 3465216 }, { "name": "vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 3548160 }, { "name": "vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 3550464 }, { "name": "vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 4214016 }, { "name": "vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 4296960 }, { "name": "vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 4299264 }, { "name": "vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 4962816 }, { "name": "vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 5045760 }, { "name": "vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 5048064 }, { "name": "vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 5711616 }, { "name": "language_model.transformer.embd.q_scale", "shape": [ 51200, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192000, "byteOffset": 5794560 }, { "name": "language_model.transformer.h.0.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13986560 }, { "name": "language_model.transformer.h.0.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13991680 }, { "name": "language_model.transformer.h.0.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 13996800 }, { "name": "language_model.transformer.h.0.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 14012160 }, { "name": "language_model.transformer.h.0.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 23842560 }, { "name": "language_model.transformer.h.0.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 25071360 }, { "name": "language_model.transformer.h.0.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 25076480 }, { "name": "language_model.transformer.h.0.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 28353280 }, { "name": "language_model.transformer.h.0.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 28762880 } ], "md5sum": "0bb01ed473f2eccc950bf7bef22fabf0" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.0.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.0.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.0.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.0.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.0.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27857920 }, { "name": "language_model.transformer.h.1.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29496320 }, { "name": "language_model.transformer.h.1.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29501440 }, { "name": "language_model.transformer.h.1.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 29506560 } ], "md5sum": "cef4cbae358fd6f4dcb93ea442697859" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.1.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "language_model.transformer.h.1.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "language_model.transformer.h.1.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 11059200 }, { "name": "language_model.transformer.h.1.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11064320 }, { "name": "language_model.transformer.h.1.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14341120 }, { "name": "language_model.transformer.h.1.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.1.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.1.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27878400 }, { "name": "language_model.transformer.h.1.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29516800 } ], "md5sum": "37f61df831ef54acd43ffafd7f5f6812" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 29542400, "records": [ { "name": "language_model.transformer.h.1.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.1.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.2.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.2.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.2.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 14755840 }, { "name": "language_model.transformer.h.2.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.2.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 24601600 }, { "name": "language_model.transformer.h.2.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 25830400 }, { "name": "language_model.transformer.h.2.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 25835520 }, { "name": "language_model.transformer.h.2.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 29112320 }, { "name": "language_model.transformer.h.2.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 29521920 } ], "md5sum": "a1c472c2ca759105e021ed55099de77d" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.2.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.2.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.2.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.2.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.2.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27857920 }, { "name": "language_model.transformer.h.3.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29496320 }, { "name": "language_model.transformer.h.3.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29501440 }, { "name": "language_model.transformer.h.3.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 29506560 } ], "md5sum": "26afd0de7b0b99c434a3246b45c5334f" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.3.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "language_model.transformer.h.3.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "language_model.transformer.h.3.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 11059200 }, { "name": "language_model.transformer.h.3.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11064320 }, { "name": "language_model.transformer.h.3.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14341120 }, { "name": "language_model.transformer.h.3.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.3.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.3.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27878400 }, { "name": "language_model.transformer.h.3.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29516800 } ], "md5sum": "1945d645f30822b081562de6ee94fc8c" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 29542400, "records": [ { "name": "language_model.transformer.h.3.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.3.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.4.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.4.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.4.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 14755840 }, { "name": "language_model.transformer.h.4.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.4.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 24601600 }, { "name": "language_model.transformer.h.4.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 25830400 }, { "name": "language_model.transformer.h.4.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 25835520 }, { "name": "language_model.transformer.h.4.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 29112320 }, { "name": "language_model.transformer.h.4.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 29521920 } ], "md5sum": "ad4c1914abb36247a00e353c3a031efa" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 29542400, "records": [ { "name": "language_model.transformer.h.4.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.4.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.10.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.10.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.10.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 14755840 }, { "name": "language_model.transformer.h.10.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.10.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 24601600 }, { "name": "language_model.transformer.h.10.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 25830400 }, { "name": "language_model.transformer.h.10.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 25835520 }, { "name": "language_model.transformer.h.10.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 29112320 }, { "name": "language_model.transformer.h.10.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 29521920 } ], "md5sum": "4f317d90555209c547b10c685e7911eb" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 29511680, "records": [ { "name": "language_model.transformer.h.10.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.10.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.10.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.10.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.10.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27857920 }, { "name": "language_model.transformer.h.11.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29496320 }, { "name": "language_model.transformer.h.11.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29501440 }, { "name": "language_model.transformer.h.4.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29506560 } ], "md5sum": "e7cf867eee41783ef38734efb55c83f6" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 29542400, "records": [ { "name": "language_model.transformer.h.4.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.4.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.5.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.5.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.5.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 14755840 }, { "name": "language_model.transformer.h.5.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.5.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 24601600 }, { "name": "language_model.transformer.h.5.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 25830400 }, { "name": "language_model.transformer.h.5.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 25835520 }, { "name": "language_model.transformer.h.5.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 29112320 }, { "name": "language_model.transformer.h.5.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 29521920 } ], "md5sum": "b12af01e6f14b6b435bc419b677722b4" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.5.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.5.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.5.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.5.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.5.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27857920 }, { "name": "language_model.transformer.h.6.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29496320 }, { "name": "language_model.transformer.h.6.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29501440 }, { "name": "language_model.transformer.h.6.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 29506560 } ], "md5sum": "400814140d5d15c8ff50cc259f2ecfd6" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.6.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "language_model.transformer.h.6.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "language_model.transformer.h.6.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 11059200 }, { "name": "language_model.transformer.h.6.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11064320 }, { "name": "language_model.transformer.h.6.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14341120 }, { "name": "language_model.transformer.h.6.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.6.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.6.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27878400 }, { "name": "language_model.transformer.h.6.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29516800 } ], "md5sum": "2a1b56d70a3eb9050c01b929f9e2833d" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 29542400, "records": [ { "name": "language_model.transformer.h.6.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.6.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.7.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.7.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.7.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 14755840 }, { "name": "language_model.transformer.h.7.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.7.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 24601600 }, { "name": "language_model.transformer.h.7.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 25830400 }, { "name": "language_model.transformer.h.7.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 25835520 }, { "name": "language_model.transformer.h.7.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 29112320 }, { "name": "language_model.transformer.h.7.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 29521920 } ], "md5sum": "756c61c14754f1aaf76bbb5b63cb9cd3" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.7.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.7.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.7.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.7.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.7.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27857920 }, { "name": "language_model.transformer.h.8.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29496320 }, { "name": "language_model.transformer.h.8.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29501440 }, { "name": "language_model.transformer.h.8.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 29506560 } ], "md5sum": "43f2fec47b7045dd4665bb097141e3cc" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.8.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "language_model.transformer.h.8.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "language_model.transformer.h.8.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 11059200 }, { "name": "language_model.transformer.h.8.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11064320 }, { "name": "language_model.transformer.h.8.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14341120 }, { "name": "language_model.transformer.h.8.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.8.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.8.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27878400 }, { "name": "language_model.transformer.h.8.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29516800 } ], "md5sum": "2629378b5b3c5e10ed7b85a6b743997f" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 29542400, "records": [ { "name": "language_model.transformer.h.8.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.8.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.9.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.9.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.9.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 14755840 }, { "name": "language_model.transformer.h.9.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.9.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 24601600 }, { "name": "language_model.transformer.h.9.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 25830400 }, { "name": "language_model.transformer.h.9.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 25835520 }, { "name": "language_model.transformer.h.9.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 29112320 }, { "name": "language_model.transformer.h.9.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 29521920 } ], "md5sum": "bf3482ff703c3aecf80abb45aa9d1f72" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 29511680, "records": [ { "name": "language_model.transformer.h.9.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.9.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.9.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.9.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.9.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27857920 }, { "name": "language_model.transformer.h.11.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 29496320 } ], "md5sum": "fb1399319bf97dea35a4e57bb78701ce" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.11.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "language_model.transformer.h.11.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "language_model.transformer.h.11.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 11059200 }, { "name": "language_model.transformer.h.11.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11064320 }, { "name": "language_model.transformer.h.11.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14341120 }, { "name": "language_model.transformer.h.11.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.11.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.11.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27878400 }, { "name": "language_model.transformer.h.11.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29516800 } ], "md5sum": "c79b84728f32c85c9f1934fca1a5e950" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 29542400, "records": [ { "name": "language_model.transformer.h.11.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.11.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.12.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.12.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.12.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 14755840 }, { "name": "language_model.transformer.h.12.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.12.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 24601600 }, { "name": "language_model.transformer.h.12.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 25830400 }, { "name": "language_model.transformer.h.12.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 25835520 }, { "name": "language_model.transformer.h.12.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 29112320 }, { "name": "language_model.transformer.h.12.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 29521920 } ], "md5sum": "57466bcfa7866cc0c897b01aadcf1958" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.12.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.12.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.12.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.12.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.12.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27857920 }, { "name": "language_model.transformer.h.13.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29496320 }, { "name": "language_model.transformer.h.13.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29501440 }, { "name": "language_model.transformer.h.13.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 29506560 } ], "md5sum": "3340ca09c7e0fcf188cbc732170b8918" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.13.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "language_model.transformer.h.13.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "language_model.transformer.h.13.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 11059200 }, { "name": "language_model.transformer.h.13.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11064320 }, { "name": "language_model.transformer.h.13.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14341120 }, { "name": "language_model.transformer.h.13.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.13.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.13.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27878400 }, { "name": "language_model.transformer.h.13.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29516800 } ], "md5sum": "4c6b0b2d5459ccde1e2cefe733034624" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 29542400, "records": [ { "name": "language_model.transformer.h.13.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.13.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.14.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.14.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.14.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 14755840 }, { "name": "language_model.transformer.h.14.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.14.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 24601600 }, { "name": "language_model.transformer.h.14.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 25830400 }, { "name": "language_model.transformer.h.14.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 25835520 }, { "name": "language_model.transformer.h.14.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 29112320 }, { "name": "language_model.transformer.h.14.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 29521920 } ], "md5sum": "f97b884e48179336a87a3f8a9d4b42d7" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.14.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.14.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.14.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.14.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.14.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27857920 }, { "name": "language_model.transformer.h.15.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29496320 }, { "name": "language_model.transformer.h.15.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29501440 }, { "name": "language_model.transformer.h.15.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 29506560 } ], "md5sum": "cfc35534fa6e014e29247779fbdfc3cf" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.15.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "language_model.transformer.h.15.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "language_model.transformer.h.15.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 11059200 }, { "name": "language_model.transformer.h.15.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11064320 }, { "name": "language_model.transformer.h.15.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14341120 }, { "name": "language_model.transformer.h.15.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.15.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.15.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27878400 }, { "name": "language_model.transformer.h.15.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29516800 } ], "md5sum": "fc44665f09555d0fa815a2b610b1eb17" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 29542400, "records": [ { "name": "language_model.transformer.h.15.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.15.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.16.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.16.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.16.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 14755840 }, { "name": "language_model.transformer.h.16.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.16.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 24601600 }, { "name": "language_model.transformer.h.16.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 25830400 }, { "name": "language_model.transformer.h.16.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 25835520 }, { "name": "language_model.transformer.h.16.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 29112320 }, { "name": "language_model.transformer.h.16.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 29521920 } ], "md5sum": "10ca5436a4a4f4bce2c553afb1ba1fcc" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.16.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.16.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.16.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.16.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.16.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27857920 }, { "name": "language_model.transformer.h.17.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29496320 }, { "name": "language_model.transformer.h.17.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29501440 }, { "name": "language_model.transformer.h.17.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 29506560 } ], "md5sum": "4608a90e3da183fe93a5c5e8097295a6" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.17.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "language_model.transformer.h.17.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "language_model.transformer.h.17.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 11059200 }, { "name": "language_model.transformer.h.17.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11064320 }, { "name": "language_model.transformer.h.17.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14341120 }, { "name": "language_model.transformer.h.17.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.17.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.17.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27878400 }, { "name": "language_model.transformer.h.17.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29516800 } ], "md5sum": "fc6c063417c051a25e2131e352ec05cb" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 29542400, "records": [ { "name": "language_model.transformer.h.17.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.17.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.18.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.18.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.18.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 14755840 }, { "name": "language_model.transformer.h.18.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.18.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 24601600 }, { "name": "language_model.transformer.h.18.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 25830400 }, { "name": "language_model.transformer.h.18.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 25835520 }, { "name": "language_model.transformer.h.18.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 29112320 }, { "name": "language_model.transformer.h.18.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 29521920 } ], "md5sum": "d18d81d1b421829cba6750eed528604d" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.18.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.18.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.18.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.18.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.18.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27857920 }, { "name": "language_model.transformer.h.19.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29496320 }, { "name": "language_model.transformer.h.19.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29501440 }, { "name": "language_model.transformer.h.19.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 29506560 } ], "md5sum": "429d2bf504e0300d1cc34a2bd22b095f" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.19.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "language_model.transformer.h.19.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "language_model.transformer.h.19.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 11059200 }, { "name": "language_model.transformer.h.19.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11064320 }, { "name": "language_model.transformer.h.19.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14341120 }, { "name": "language_model.transformer.h.19.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.19.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.19.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27878400 }, { "name": "language_model.transformer.h.19.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29516800 } ], "md5sum": "6b595a9d205fc6fdfbe291b41b981623" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 29542400, "records": [ { "name": "language_model.transformer.h.19.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.19.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.20.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.20.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.20.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 14755840 }, { "name": "language_model.transformer.h.20.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.20.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 24601600 }, { "name": "language_model.transformer.h.20.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 25830400 }, { "name": "language_model.transformer.h.20.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 25835520 }, { "name": "language_model.transformer.h.20.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 29112320 }, { "name": "language_model.transformer.h.20.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 29521920 } ], "md5sum": "667d122a145b61fcbd1a21a9b51bc673" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.20.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.20.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.20.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.20.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.20.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27857920 }, { "name": "language_model.transformer.h.21.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29496320 }, { "name": "language_model.transformer.h.21.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29501440 }, { "name": "language_model.transformer.h.21.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 29506560 } ], "md5sum": "76374ccdc28bc337c12d13965ff476fd" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.21.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "language_model.transformer.h.21.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "language_model.transformer.h.21.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 11059200 }, { "name": "language_model.transformer.h.21.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11064320 }, { "name": "language_model.transformer.h.21.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14341120 }, { "name": "language_model.transformer.h.21.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.21.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.21.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27878400 }, { "name": "language_model.transformer.h.21.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29516800 } ], "md5sum": "87e08dbc66040f6f5339eafcd7b19065" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 29542400, "records": [ { "name": "language_model.transformer.h.21.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.21.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.22.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.22.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.22.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 14755840 }, { "name": "language_model.transformer.h.22.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.22.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 24601600 }, { "name": "language_model.transformer.h.22.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 25830400 }, { "name": "language_model.transformer.h.22.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 25835520 }, { "name": "language_model.transformer.h.22.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 29112320 }, { "name": "language_model.transformer.h.22.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 29521920 } ], "md5sum": "b11319085e0047274865af7284817cb1" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.22.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.22.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.22.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.22.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.22.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27857920 }, { "name": "language_model.transformer.h.23.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29496320 }, { "name": "language_model.transformer.h.23.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29501440 }, { "name": "language_model.transformer.h.23.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 29506560 } ], "md5sum": "098680097974538a2d0fc87b5504ee13" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.23.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "language_model.transformer.h.23.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "language_model.transformer.h.23.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 11059200 }, { "name": "language_model.transformer.h.23.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11064320 }, { "name": "language_model.transformer.h.23.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14341120 }, { "name": "language_model.transformer.h.23.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.23.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.23.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27878400 }, { "name": "language_model.transformer.h.23.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29516800 } ], "md5sum": "6b82e9eae4254946565c2f97a5f10228" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 29542400, "records": [ { "name": "language_model.transformer.h.23.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.23.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.24.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.24.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.24.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 14755840 }, { "name": "language_model.transformer.h.24.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.24.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 24601600 }, { "name": "language_model.transformer.h.24.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 25830400 }, { "name": "language_model.transformer.h.24.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 25835520 }, { "name": "language_model.transformer.h.24.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 29112320 }, { "name": "language_model.transformer.h.24.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 29521920 } ], "md5sum": "f97d583766c4b7683792655a6ef53a72" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.24.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.24.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.24.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.24.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.24.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27857920 }, { "name": "language_model.transformer.h.25.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29496320 }, { "name": "language_model.transformer.h.25.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29501440 }, { "name": "language_model.transformer.h.25.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 29506560 } ], "md5sum": "ee08d68a817c47e054574d526f16b5a8" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.25.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "language_model.transformer.h.25.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "language_model.transformer.h.25.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 11059200 }, { "name": "language_model.transformer.h.25.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11064320 }, { "name": "language_model.transformer.h.25.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14341120 }, { "name": "language_model.transformer.h.25.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.25.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.25.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27878400 }, { "name": "language_model.transformer.h.25.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29516800 } ], "md5sum": "00dcd5a79c2c1b3bcc95ed17d2b0dbfd" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 29542400, "records": [ { "name": "language_model.transformer.h.25.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.25.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.26.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.26.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.26.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 14755840 }, { "name": "language_model.transformer.h.26.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.26.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 24601600 }, { "name": "language_model.transformer.h.26.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 25830400 }, { "name": "language_model.transformer.h.26.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 25835520 }, { "name": "language_model.transformer.h.26.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 29112320 }, { "name": "language_model.transformer.h.26.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 29521920 } ], "md5sum": "8838f72f88f6233bb881bedc1f78212f" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.26.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.26.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.26.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.26.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.26.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27857920 }, { "name": "language_model.transformer.h.27.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29496320 }, { "name": "language_model.transformer.h.27.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29501440 }, { "name": "language_model.transformer.h.27.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 29506560 } ], "md5sum": "a48c36b1931009c97168ff71bbd86dfb" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.27.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "language_model.transformer.h.27.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "language_model.transformer.h.27.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 11059200 }, { "name": "language_model.transformer.h.27.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11064320 }, { "name": "language_model.transformer.h.27.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14341120 }, { "name": "language_model.transformer.h.27.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.27.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.27.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27878400 }, { "name": "language_model.transformer.h.27.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29516800 } ], "md5sum": "7fff4fb6458a94cff85a64c844d1d800" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 29542400, "records": [ { "name": "language_model.transformer.h.27.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.27.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.28.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.28.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.28.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 14755840 }, { "name": "language_model.transformer.h.28.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.28.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 24601600 }, { "name": "language_model.transformer.h.28.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 25830400 }, { "name": "language_model.transformer.h.28.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 25835520 }, { "name": "language_model.transformer.h.28.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 29112320 }, { "name": "language_model.transformer.h.28.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 29521920 } ], "md5sum": "d25c54d16ba8582de473776d9fafd0e3" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.28.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.28.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.28.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.28.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.28.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27857920 }, { "name": "language_model.transformer.h.29.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29496320 }, { "name": "language_model.transformer.h.29.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29501440 }, { "name": "language_model.transformer.h.29.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 29506560 } ], "md5sum": "842ebae2d06efcd1af97e2f0c288d230" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.29.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "language_model.transformer.h.29.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "language_model.transformer.h.29.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 11059200 }, { "name": "language_model.transformer.h.29.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11064320 }, { "name": "language_model.transformer.h.29.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14341120 }, { "name": "language_model.transformer.h.29.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.29.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.29.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27878400 }, { "name": "language_model.transformer.h.29.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29516800 } ], "md5sum": "9d54f89c063b089c8c722d4d893f5d72" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 29542400, "records": [ { "name": "language_model.transformer.h.29.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.29.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.30.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.30.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.30.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 14755840 }, { "name": "language_model.transformer.h.30.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.30.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 24601600 }, { "name": "language_model.transformer.h.30.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 25830400 }, { "name": "language_model.transformer.h.30.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 25835520 }, { "name": "language_model.transformer.h.30.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 29112320 }, { "name": "language_model.transformer.h.30.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 29521920 } ], "md5sum": "baadd5c209ec076b336d2cae73703085" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.30.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.30.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "language_model.transformer.h.30.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 14745600 }, { "name": "language_model.transformer.h.30.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.30.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27857920 }, { "name": "language_model.transformer.h.31.ln.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29496320 }, { "name": "language_model.transformer.h.31.ln.weight", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29501440 }, { "name": "language_model.transformer.h.31.mixer.Wqkv.bias", "shape": [ 7680 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 15360, "byteOffset": 29506560 } ], "md5sum": "250ea1e41763e73ccfa9699fc0fde9e2" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 29521920, "records": [ { "name": "language_model.transformer.h.31.mixer.Wqkv.q_weight", "shape": [ 7680, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 9830400, "byteOffset": 0 }, { "name": "language_model.transformer.h.31.mixer.Wqkv.q_scale", "shape": [ 7680, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1228800, "byteOffset": 9830400 }, { "name": "language_model.transformer.h.31.mixer.out_proj.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 11059200 }, { "name": "language_model.transformer.h.31.mixer.out_proj.q_weight", "shape": [ 2560, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 3276800, "byteOffset": 11064320 }, { "name": "language_model.transformer.h.31.mixer.out_proj.q_scale", "shape": [ 2560, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 14341120 }, { "name": "language_model.transformer.h.31.mlp.fc1.bias", "shape": [ 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 20480, "byteOffset": 14750720 }, { "name": "language_model.transformer.h.31.mlp.fc1.q_weight", "shape": [ 10240, 320 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 14771200 }, { "name": "language_model.transformer.h.31.mlp.fc1.q_scale", "shape": [ 10240, 80 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 27878400 }, { "name": "language_model.transformer.h.31.mlp.fc2.bias", "shape": [ 2560 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 29516800 } ], "md5sum": "8addd266b1afd006e7953553d9c3cb76" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 33454848, "records": [ { "name": "language_model.transformer.h.31.mlp.fc2.q_weight", "shape": [ 2560, 1280 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "language_model.transformer.h.31.mlp.fc2.q_scale", "shape": [ 2560, 320 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "vision_tower.vision_model.embeddings.patch_embedding.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 14745600 }, { "name": "vision_tower.vision_model.embeddings.patch_embedding.weight", "shape": [ 1152, 3, 14, 14 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1354752, "byteOffset": 14747904 }, { "name": "vision_tower.vision_model.embeddings.position_embedding.q_weight", "shape": [ 196, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 112896, "byteOffset": 16102656 }, { "name": "vision_tower.vision_model.embeddings.position_embedding.q_scale", "shape": [ 196, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14112, "byteOffset": 16215552 }, { "name": "vision_tower.vision_model.encoder.layers.0.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 16229664 }, { "name": "vision_tower.vision_model.encoder.layers.0.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 16231968 }, { "name": "vision_tower.vision_model.encoder.layers.0.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 16234272 }, { "name": "vision_tower.vision_model.encoder.layers.0.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 16236576 }, { "name": "vision_tower.vision_model.encoder.layers.0.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 16238880 }, { "name": "vision_tower.vision_model.encoder.layers.0.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 16247488 }, { "name": "vision_tower.vision_model.encoder.layers.0.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 18726592 }, { "name": "vision_tower.vision_model.encoder.layers.0.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 19036480 }, { "name": "vision_tower.vision_model.encoder.layers.0.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 19038784 }, { "name": "vision_tower.vision_model.encoder.layers.0.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 21527104 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 21838144 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 21840448 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 22504000 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 22586944 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 22589248 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 23252800 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 23335744 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 23338048 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 24001600 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 24084544 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 24086848 }, { "name": "vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 24750400 }, { "name": "vision_tower.vision_model.encoder.layers.1.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 24833344 }, { "name": "vision_tower.vision_model.encoder.layers.1.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 24835648 }, { "name": "vision_tower.vision_model.encoder.layers.1.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 24837952 }, { "name": "vision_tower.vision_model.encoder.layers.1.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 24840256 }, { "name": "vision_tower.vision_model.encoder.layers.1.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 24842560 }, { "name": "vision_tower.vision_model.encoder.layers.1.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 24851168 }, { "name": "vision_tower.vision_model.encoder.layers.1.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 27330272 }, { "name": "vision_tower.vision_model.encoder.layers.1.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 27640160 }, { "name": "vision_tower.vision_model.encoder.layers.1.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 27642464 }, { "name": "vision_tower.vision_model.encoder.layers.1.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 30130784 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 30441824 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 30444128 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 31107680 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 31190624 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 31192928 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 31856480 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 31939424 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 31941728 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 32605280 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 32688224 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 32690528 }, { "name": "vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 33354080 }, { "name": "vision_tower.vision_model.encoder.layers.10.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 33437024 }, { "name": "vision_tower.vision_model.encoder.layers.10.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 33439328 }, { "name": "vision_tower.vision_model.encoder.layers.10.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 33441632 }, { "name": "vision_tower.vision_model.encoder.layers.10.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 33443936 }, { "name": "vision_tower.vision_model.encoder.layers.10.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 33446240 } ], "md5sum": "b4b54e1a9c3645832a5a4404392296cc" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 32901600, "records": [ { "name": "vision_tower.vision_model.encoder.layers.10.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 0 }, { "name": "vision_tower.vision_model.encoder.layers.10.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 2479104 }, { "name": "vision_tower.vision_model.encoder.layers.10.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 2788992 }, { "name": "vision_tower.vision_model.encoder.layers.10.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 2791296 }, { "name": "vision_tower.vision_model.encoder.layers.10.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 5279616 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 5590656 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 5592960 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 6256512 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 6339456 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 6341760 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 7005312 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 7088256 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 7090560 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 7754112 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 7837056 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 7839360 }, { "name": "vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 8502912 }, { "name": "vision_tower.vision_model.encoder.layers.11.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 8585856 }, { "name": "vision_tower.vision_model.encoder.layers.11.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 8588160 }, { "name": "vision_tower.vision_model.encoder.layers.11.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 8590464 }, { "name": "vision_tower.vision_model.encoder.layers.11.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 8592768 }, { "name": "vision_tower.vision_model.encoder.layers.11.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 8595072 }, { "name": "vision_tower.vision_model.encoder.layers.11.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 8603680 }, { "name": "vision_tower.vision_model.encoder.layers.11.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 11082784 }, { "name": "vision_tower.vision_model.encoder.layers.11.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 11392672 }, { "name": "vision_tower.vision_model.encoder.layers.11.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 11394976 }, { "name": "vision_tower.vision_model.encoder.layers.11.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 13883296 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 14194336 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 14196640 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 14860192 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 14943136 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 14945440 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 15608992 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 15691936 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 15694240 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 16357792 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 16440736 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 16443040 }, { "name": "vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 17106592 }, { "name": "vision_tower.vision_model.encoder.layers.12.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 17189536 }, { "name": "vision_tower.vision_model.encoder.layers.12.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 17191840 }, { "name": "vision_tower.vision_model.encoder.layers.12.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 17194144 }, { "name": "vision_tower.vision_model.encoder.layers.12.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 17196448 }, { "name": "vision_tower.vision_model.encoder.layers.12.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 17198752 }, { "name": "vision_tower.vision_model.encoder.layers.12.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 17207360 }, { "name": "vision_tower.vision_model.encoder.layers.12.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 19686464 }, { "name": "vision_tower.vision_model.encoder.layers.12.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 19996352 }, { "name": "vision_tower.vision_model.encoder.layers.12.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 19998656 }, { "name": "vision_tower.vision_model.encoder.layers.12.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 22486976 }, { "name": "vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 22798016 }, { "name": "vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 22800320 }, { "name": "vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 23463872 }, { "name": "vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 23546816 }, { "name": "vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 23549120 }, { "name": "vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 24212672 }, { "name": "vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 24295616 }, { "name": "vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 24297920 }, { "name": "vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 24961472 }, { "name": "vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 25044416 }, { "name": "vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 25046720 }, { "name": "vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 25710272 }, { "name": "vision_tower.vision_model.encoder.layers.13.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 25793216 }, { "name": "vision_tower.vision_model.encoder.layers.13.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 25795520 }, { "name": "vision_tower.vision_model.encoder.layers.13.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 25797824 }, { "name": "vision_tower.vision_model.encoder.layers.13.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 25800128 }, { "name": "vision_tower.vision_model.encoder.layers.13.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 25802432 }, { "name": "vision_tower.vision_model.encoder.layers.13.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 25811040 }, { "name": "vision_tower.vision_model.encoder.layers.13.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 28290144 }, { "name": "vision_tower.vision_model.encoder.layers.13.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 28600032 }, { "name": "vision_tower.vision_model.encoder.layers.13.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 28602336 }, { "name": "vision_tower.vision_model.encoder.layers.13.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 31090656 }, { "name": "vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 31401696 }, { "name": "vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 31404000 }, { "name": "vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 32067552 }, { "name": "vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 32150496 }, { "name": "vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 32152800 }, { "name": "vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 32816352 }, { "name": "vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 32899296 } ], "md5sum": "ba17beee7dcff6649ea089918d143c55" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 32917120, "records": [ { "name": "vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 0 }, { "name": "vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 663552 }, { "name": "vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 746496 }, { "name": "vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 748800 }, { "name": "vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 1412352 }, { "name": "vision_tower.vision_model.encoder.layers.14.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 1495296 }, { "name": "vision_tower.vision_model.encoder.layers.14.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 1497600 }, { "name": "vision_tower.vision_model.encoder.layers.14.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 1499904 }, { "name": "vision_tower.vision_model.encoder.layers.14.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 1502208 }, { "name": "vision_tower.vision_model.encoder.layers.14.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 1504512 }, { "name": "vision_tower.vision_model.encoder.layers.14.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 1513120 }, { "name": "vision_tower.vision_model.encoder.layers.14.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 3992224 }, { "name": "vision_tower.vision_model.encoder.layers.14.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 4302112 }, { "name": "vision_tower.vision_model.encoder.layers.14.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 4304416 }, { "name": "vision_tower.vision_model.encoder.layers.14.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 6792736 }, { "name": "vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 7103776 }, { "name": "vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 7106080 }, { "name": "vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 7769632 }, { "name": "vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 7852576 }, { "name": "vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 7854880 }, { "name": "vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 8518432 }, { "name": "vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 8601376 }, { "name": "vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 8603680 }, { "name": "vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 9267232 }, { "name": "vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 9350176 }, { "name": "vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 9352480 }, { "name": "vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 10016032 }, { "name": "vision_tower.vision_model.encoder.layers.15.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 10098976 }, { "name": "vision_tower.vision_model.encoder.layers.15.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 10101280 }, { "name": "vision_tower.vision_model.encoder.layers.15.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 10103584 }, { "name": "vision_tower.vision_model.encoder.layers.15.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 10105888 }, { "name": "vision_tower.vision_model.encoder.layers.15.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 10108192 }, { "name": "vision_tower.vision_model.encoder.layers.15.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 10116800 }, { "name": "vision_tower.vision_model.encoder.layers.15.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 12595904 }, { "name": "vision_tower.vision_model.encoder.layers.15.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 12905792 }, { "name": "vision_tower.vision_model.encoder.layers.15.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 12908096 }, { "name": "vision_tower.vision_model.encoder.layers.15.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 15396416 }, { "name": "vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 15707456 }, { "name": "vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 15709760 }, { "name": "vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 16373312 }, { "name": "vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 16456256 }, { "name": "vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 16458560 }, { "name": "vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 17122112 }, { "name": "vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 17205056 }, { "name": "vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 17207360 }, { "name": "vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 17870912 }, { "name": "vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 17953856 }, { "name": "vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 17956160 }, { "name": "vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 18619712 }, { "name": "vision_tower.vision_model.encoder.layers.16.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 18702656 }, { "name": "vision_tower.vision_model.encoder.layers.16.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 18704960 }, { "name": "vision_tower.vision_model.encoder.layers.16.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 18707264 }, { "name": "vision_tower.vision_model.encoder.layers.16.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 18709568 }, { "name": "vision_tower.vision_model.encoder.layers.16.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 18711872 }, { "name": "vision_tower.vision_model.encoder.layers.16.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 18720480 }, { "name": "vision_tower.vision_model.encoder.layers.16.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 21199584 }, { "name": "vision_tower.vision_model.encoder.layers.16.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 21509472 }, { "name": "vision_tower.vision_model.encoder.layers.16.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 21511776 }, { "name": "vision_tower.vision_model.encoder.layers.16.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 24000096 }, { "name": "vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 24311136 }, { "name": "vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 24313440 }, { "name": "vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 24976992 }, { "name": "vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 25059936 }, { "name": "vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 25062240 }, { "name": "vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 25725792 }, { "name": "vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 25808736 }, { "name": "vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 25811040 }, { "name": "vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 26474592 }, { "name": "vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 26557536 }, { "name": "vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 26559840 }, { "name": "vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 27223392 }, { "name": "vision_tower.vision_model.encoder.layers.17.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 27306336 }, { "name": "vision_tower.vision_model.encoder.layers.17.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 27308640 }, { "name": "vision_tower.vision_model.encoder.layers.17.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 27310944 }, { "name": "vision_tower.vision_model.encoder.layers.17.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 27313248 }, { "name": "vision_tower.vision_model.encoder.layers.17.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 27315552 }, { "name": "vision_tower.vision_model.encoder.layers.17.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 27324160 }, { "name": "vision_tower.vision_model.encoder.layers.17.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 29803264 }, { "name": "vision_tower.vision_model.encoder.layers.17.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 30113152 }, { "name": "vision_tower.vision_model.encoder.layers.17.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 30115456 }, { "name": "vision_tower.vision_model.encoder.layers.17.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 32603776 }, { "name": "vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 32914816 } ], "md5sum": "6e196d63f48895def405a7d43ad0e3bd" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 31613056, "records": [ { "name": "vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 0 }, { "name": "vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 663552 }, { "name": "vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 746496 }, { "name": "vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 748800 }, { "name": "vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 1412352 }, { "name": "vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 1495296 }, { "name": "vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 1497600 }, { "name": "vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 2161152 }, { "name": "vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 2244096 }, { "name": "vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 2246400 }, { "name": "vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 2909952 }, { "name": "vision_tower.vision_model.encoder.layers.18.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 2992896 }, { "name": "vision_tower.vision_model.encoder.layers.18.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 2995200 }, { "name": "vision_tower.vision_model.encoder.layers.18.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 2997504 }, { "name": "vision_tower.vision_model.encoder.layers.18.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 2999808 }, { "name": "vision_tower.vision_model.encoder.layers.18.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 3002112 }, { "name": "vision_tower.vision_model.encoder.layers.18.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 3010720 }, { "name": "vision_tower.vision_model.encoder.layers.18.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 5489824 }, { "name": "vision_tower.vision_model.encoder.layers.18.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 5799712 }, { "name": "vision_tower.vision_model.encoder.layers.18.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 5802016 }, { "name": "vision_tower.vision_model.encoder.layers.18.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 8290336 }, { "name": "vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 8601376 }, { "name": "vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 8603680 }, { "name": "vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 9267232 }, { "name": "vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 9350176 }, { "name": "vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 9352480 }, { "name": "vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 10016032 }, { "name": "vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 10098976 }, { "name": "vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 10101280 }, { "name": "vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 10764832 }, { "name": "vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 10847776 }, { "name": "vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 10850080 }, { "name": "vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 11513632 }, { "name": "vision_tower.vision_model.encoder.layers.19.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 11596576 }, { "name": "vision_tower.vision_model.encoder.layers.19.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 11598880 }, { "name": "vision_tower.vision_model.encoder.layers.19.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 11601184 }, { "name": "vision_tower.vision_model.encoder.layers.19.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 11603488 }, { "name": "vision_tower.vision_model.encoder.layers.19.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 11605792 }, { "name": "vision_tower.vision_model.encoder.layers.19.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 11614400 }, { "name": "vision_tower.vision_model.encoder.layers.19.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 14093504 }, { "name": "vision_tower.vision_model.encoder.layers.19.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 14403392 }, { "name": "vision_tower.vision_model.encoder.layers.19.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 14405696 }, { "name": "vision_tower.vision_model.encoder.layers.19.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 16894016 }, { "name": "vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 17205056 }, { "name": "vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 17207360 }, { "name": "vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 17870912 }, { "name": "vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 17953856 }, { "name": "vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 17956160 }, { "name": "vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 18619712 }, { "name": "vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 18702656 }, { "name": "vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 18704960 }, { "name": "vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 19368512 }, { "name": "vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 19451456 }, { "name": "vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 19453760 }, { "name": "vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 20117312 }, { "name": "vision_tower.vision_model.encoder.layers.2.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 20200256 }, { "name": "vision_tower.vision_model.encoder.layers.2.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 20202560 }, { "name": "vision_tower.vision_model.encoder.layers.2.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 20204864 }, { "name": "vision_tower.vision_model.encoder.layers.2.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 20207168 }, { "name": "vision_tower.vision_model.encoder.layers.2.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 20209472 }, { "name": "vision_tower.vision_model.encoder.layers.2.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 20218080 }, { "name": "vision_tower.vision_model.encoder.layers.2.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 22697184 }, { "name": "vision_tower.vision_model.encoder.layers.2.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 23007072 }, { "name": "vision_tower.vision_model.encoder.layers.2.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 23009376 }, { "name": "vision_tower.vision_model.encoder.layers.2.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 25497696 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 25808736 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 25811040 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 26474592 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 26557536 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 26559840 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 27223392 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 27306336 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 27308640 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 27972192 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 28055136 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 28057440 }, { "name": "vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 28720992 }, { "name": "vision_tower.vision_model.encoder.layers.20.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 28803936 }, { "name": "vision_tower.vision_model.encoder.layers.20.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 28806240 }, { "name": "vision_tower.vision_model.encoder.layers.20.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 28808544 }, { "name": "vision_tower.vision_model.encoder.layers.20.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 28810848 }, { "name": "vision_tower.vision_model.encoder.layers.20.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 28813152 }, { "name": "vision_tower.vision_model.encoder.layers.20.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 28821760 }, { "name": "vision_tower.vision_model.encoder.layers.20.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 31300864 }, { "name": "vision_tower.vision_model.encoder.layers.20.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 31610752 } ], "md5sum": "d864ea3d1a8877f30ea4d857773d4765" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 33121024, "records": [ { "name": "vision_tower.vision_model.encoder.layers.20.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 0 }, { "name": "vision_tower.vision_model.encoder.layers.20.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 2488320 }, { "name": "vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 2799360 }, { "name": "vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 2801664 }, { "name": "vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 3465216 }, { "name": "vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 3548160 }, { "name": "vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 3550464 }, { "name": "vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 4214016 }, { "name": "vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 4296960 }, { "name": "vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 4299264 }, { "name": "vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 4962816 }, { "name": "vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 5045760 }, { "name": "vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 5048064 }, { "name": "vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 5711616 }, { "name": "vision_tower.vision_model.encoder.layers.21.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 5794560 }, { "name": "vision_tower.vision_model.encoder.layers.21.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 5796864 }, { "name": "vision_tower.vision_model.encoder.layers.21.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 5799168 }, { "name": "vision_tower.vision_model.encoder.layers.21.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 5801472 }, { "name": "vision_tower.vision_model.encoder.layers.21.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 5803776 }, { "name": "vision_tower.vision_model.encoder.layers.21.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 5812384 }, { "name": "vision_tower.vision_model.encoder.layers.21.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 8291488 }, { "name": "vision_tower.vision_model.encoder.layers.21.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 8601376 }, { "name": "vision_tower.vision_model.encoder.layers.21.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 8603680 }, { "name": "vision_tower.vision_model.encoder.layers.21.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 11092000 }, { "name": "vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 11403040 }, { "name": "vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 11405344 }, { "name": "vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 12068896 }, { "name": "vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 12151840 }, { "name": "vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 12154144 }, { "name": "vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 12817696 }, { "name": "vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 12900640 }, { "name": "vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 12902944 }, { "name": "vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 13566496 }, { "name": "vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 13649440 }, { "name": "vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 13651744 }, { "name": "vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 14315296 }, { "name": "vision_tower.vision_model.encoder.layers.22.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 14398240 }, { "name": "vision_tower.vision_model.encoder.layers.22.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 14400544 }, { "name": "vision_tower.vision_model.encoder.layers.22.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 14402848 }, { "name": "vision_tower.vision_model.encoder.layers.22.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 14405152 }, { "name": "vision_tower.vision_model.encoder.layers.22.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 14407456 }, { "name": "vision_tower.vision_model.encoder.layers.22.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 14416064 }, { "name": "vision_tower.vision_model.encoder.layers.22.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 16895168 }, { "name": "vision_tower.vision_model.encoder.layers.22.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 17205056 }, { "name": "vision_tower.vision_model.encoder.layers.22.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 17207360 }, { "name": "vision_tower.vision_model.encoder.layers.22.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 19695680 }, { "name": "vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 20006720 }, { "name": "vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 20009024 }, { "name": "vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 20672576 }, { "name": "vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 20755520 }, { "name": "vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 20757824 }, { "name": "vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 21421376 }, { "name": "vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 21504320 }, { "name": "vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 21506624 }, { "name": "vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 22170176 }, { "name": "vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 22253120 }, { "name": "vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 22255424 }, { "name": "vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 22918976 }, { "name": "vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 23001920 }, { "name": "vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 23004224 }, { "name": "vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 23667776 }, { "name": "vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 23750720 }, { "name": "vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 23753024 }, { "name": "vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 24416576 }, { "name": "vision_tower.vision_model.encoder.layers.3.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 24499520 }, { "name": "vision_tower.vision_model.encoder.layers.3.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 24501824 }, { "name": "vision_tower.vision_model.encoder.layers.3.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 24504128 }, { "name": "vision_tower.vision_model.encoder.layers.3.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 24506432 }, { "name": "vision_tower.vision_model.encoder.layers.3.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 24508736 }, { "name": "vision_tower.vision_model.encoder.layers.3.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 24517344 }, { "name": "vision_tower.vision_model.encoder.layers.3.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 26996448 }, { "name": "vision_tower.vision_model.encoder.layers.3.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 27306336 }, { "name": "vision_tower.vision_model.encoder.layers.3.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 27308640 }, { "name": "vision_tower.vision_model.encoder.layers.3.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 29796960 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 30108000 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 30110304 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 30773856 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 30856800 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 30859104 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 31522656 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 31605600 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 31607904 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 32271456 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 32354400 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 32356704 }, { "name": "vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 33020256 }, { "name": "vision_tower.vision_model.encoder.layers.4.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 33103200 }, { "name": "vision_tower.vision_model.encoder.layers.4.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 33105504 }, { "name": "vision_tower.vision_model.encoder.layers.4.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 33107808 }, { "name": "vision_tower.vision_model.encoder.layers.4.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 33110112 }, { "name": "vision_tower.vision_model.encoder.layers.4.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 33112416 } ], "md5sum": "0a29e36bbdabb1f50b2eae09a70a3c06" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 32901600, "records": [ { "name": "vision_tower.vision_model.encoder.layers.4.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 0 }, { "name": "vision_tower.vision_model.encoder.layers.4.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 2479104 }, { "name": "vision_tower.vision_model.encoder.layers.4.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 2788992 }, { "name": "vision_tower.vision_model.encoder.layers.4.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 2791296 }, { "name": "vision_tower.vision_model.encoder.layers.4.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 5279616 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 5590656 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 5592960 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 6256512 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 6339456 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 6341760 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 7005312 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 7088256 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 7090560 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 7754112 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 7837056 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 7839360 }, { "name": "vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 8502912 }, { "name": "vision_tower.vision_model.encoder.layers.5.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 8585856 }, { "name": "vision_tower.vision_model.encoder.layers.5.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 8588160 }, { "name": "vision_tower.vision_model.encoder.layers.5.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 8590464 }, { "name": "vision_tower.vision_model.encoder.layers.5.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 8592768 }, { "name": "vision_tower.vision_model.encoder.layers.5.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 8595072 }, { "name": "vision_tower.vision_model.encoder.layers.5.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 8603680 }, { "name": "vision_tower.vision_model.encoder.layers.5.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 11082784 }, { "name": "vision_tower.vision_model.encoder.layers.5.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 11392672 }, { "name": "vision_tower.vision_model.encoder.layers.5.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 11394976 }, { "name": "vision_tower.vision_model.encoder.layers.5.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 13883296 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 14194336 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 14196640 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 14860192 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 14943136 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 14945440 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 15608992 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 15691936 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 15694240 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 16357792 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 16440736 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 16443040 }, { "name": "vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 17106592 }, { "name": "vision_tower.vision_model.encoder.layers.6.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 17189536 }, { "name": "vision_tower.vision_model.encoder.layers.6.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 17191840 }, { "name": "vision_tower.vision_model.encoder.layers.6.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 17194144 }, { "name": "vision_tower.vision_model.encoder.layers.6.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 17196448 }, { "name": "vision_tower.vision_model.encoder.layers.6.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 17198752 }, { "name": "vision_tower.vision_model.encoder.layers.6.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 17207360 }, { "name": "vision_tower.vision_model.encoder.layers.6.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 19686464 }, { "name": "vision_tower.vision_model.encoder.layers.6.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 19996352 }, { "name": "vision_tower.vision_model.encoder.layers.6.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 19998656 }, { "name": "vision_tower.vision_model.encoder.layers.6.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 22486976 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 22798016 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 22800320 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 23463872 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 23546816 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 23549120 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 24212672 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 24295616 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 24297920 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 24961472 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 25044416 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 25046720 }, { "name": "vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 25710272 }, { "name": "vision_tower.vision_model.encoder.layers.7.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 25793216 }, { "name": "vision_tower.vision_model.encoder.layers.7.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 25795520 }, { "name": "vision_tower.vision_model.encoder.layers.7.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 25797824 }, { "name": "vision_tower.vision_model.encoder.layers.7.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 25800128 }, { "name": "vision_tower.vision_model.encoder.layers.7.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 25802432 }, { "name": "vision_tower.vision_model.encoder.layers.7.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 25811040 }, { "name": "vision_tower.vision_model.encoder.layers.7.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 28290144 }, { "name": "vision_tower.vision_model.encoder.layers.7.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 28600032 }, { "name": "vision_tower.vision_model.encoder.layers.7.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 28602336 }, { "name": "vision_tower.vision_model.encoder.layers.7.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 31090656 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 31401696 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 31404000 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 32067552 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 32150496 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 32152800 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 32816352 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 32899296 } ], "md5sum": "73999b155901a73b57825b4a62bf5363" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 18702656, "records": [ { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 0 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 663552 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 746496 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 748800 }, { "name": "vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 1412352 }, { "name": "vision_tower.vision_model.encoder.layers.8.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 1495296 }, { "name": "vision_tower.vision_model.encoder.layers.8.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 1497600 }, { "name": "vision_tower.vision_model.encoder.layers.8.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 1499904 }, { "name": "vision_tower.vision_model.encoder.layers.8.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 1502208 }, { "name": "vision_tower.vision_model.encoder.layers.8.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 1504512 }, { "name": "vision_tower.vision_model.encoder.layers.8.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 1513120 }, { "name": "vision_tower.vision_model.encoder.layers.8.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 3992224 }, { "name": "vision_tower.vision_model.encoder.layers.8.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 4302112 }, { "name": "vision_tower.vision_model.encoder.layers.8.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 4304416 }, { "name": "vision_tower.vision_model.encoder.layers.8.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 6792736 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 7103776 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 7106080 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 7769632 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 7852576 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 7854880 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 8518432 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 8601376 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 8603680 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 9267232 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 9350176 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 9352480 }, { "name": "vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 10016032 }, { "name": "vision_tower.vision_model.encoder.layers.9.layer_norm1.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 10098976 }, { "name": "vision_tower.vision_model.encoder.layers.9.layer_norm1.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 10101280 }, { "name": "vision_tower.vision_model.encoder.layers.9.layer_norm2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 10103584 }, { "name": "vision_tower.vision_model.encoder.layers.9.layer_norm2.weight", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 10105888 }, { "name": "vision_tower.vision_model.encoder.layers.9.mlp.fc1.bias", "shape": [ 4304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8608, "byteOffset": 10108192 }, { "name": "vision_tower.vision_model.encoder.layers.9.mlp.fc1.q_weight", "shape": [ 4304, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2479104, "byteOffset": 10116800 }, { "name": "vision_tower.vision_model.encoder.layers.9.mlp.fc1.q_scale", "shape": [ 4304, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 309888, "byteOffset": 12595904 }, { "name": "vision_tower.vision_model.encoder.layers.9.mlp.fc2.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 12905792 }, { "name": "vision_tower.vision_model.encoder.layers.9.mlp.fc2.q_weight", "shape": [ 1152, 540 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2488320, "byteOffset": 12908096 }, { "name": "vision_tower.vision_model.encoder.layers.9.mlp.fc2.q_scale", "shape": [ 1152, 135 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 311040, "byteOffset": 15396416 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 15707456 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 15709760 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 16373312 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 16456256 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 16458560 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 17122112 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 17205056 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 17207360 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 17870912 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.bias", "shape": [ 1152 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2304, "byteOffset": 17953856 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.q_weight", "shape": [ 1152, 144 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 663552, "byteOffset": 17956160 }, { "name": "vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.q_scale", "shape": [ 1152, 36 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 82944, "byteOffset": 18619712 } ], "md5sum": "8610acfcb3bf679640fdb61a35cf2b21" } ] }