diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,5695 @@ +{ + "metadata": { + "ParamSize": 405, + "ParamBytes": 7322025600.0, + "BitsPerParam": 4.500366420249213 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 81922560, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 32001, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 81922560, + "byteOffset": 0 + } + ], + "md5sum": "0697be082ade47c89c5b8756c540aa80" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.38.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "ea08cbad1e6c3103161c091f1f30d8c5" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.38.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "f1f632a036eabe2f053e2f1a561a77a8" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.39.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "15ad02a0af22890a5d7e134630c0cf6c" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.39.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "b27614523da9cb11ed9c349e1843344f" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 27965760, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 32001, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240320, + "byteOffset": 0 + }, + { + "name": "model.layers.38.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 10240320 + }, + { + "name": "model.layers.38.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 10250560 + }, + { + "name": "model.layers.38.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 14674240 + }, + { + "name": "model.layers.38.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 23521600 + }, + { + "name": "model.layers.39.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 23531840 + }, + { + "name": "model.layers.39.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 23542080 + } + ], + "md5sum": "c4eb4cfbb1db04f4481761b5e03a2236" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.39.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "79ae161f65b68d4da1b4b6ccfb755715" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 81922560, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 32001, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 81922560, + "byteOffset": 0 + } + ], + "md5sum": "f5381192ea1d84f6ec947ab32ef3bc2e" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 28528640, + "records": [ + { + "name": "model.layers.39.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 0 + }, + { + "name": "model.layers.39.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 8847360 + }, + { + "name": "model.layers.39.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 8857600 + }, + { + "name": "model.layers.39.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 13772800 + }, + { + "name": "model.layers.39.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 26880000 + }, + { + "name": "model.norm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 28518400 + } + ], + "md5sum": "291861851b9d374a82bfc45c3c6d965e" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "39fd2a781bad2b0872aa90db35059b1c" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "56a7a0d485e97439de08ed4ef6c631f9" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.0.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "346c0f14ef88929c621c68fe77bc48f7" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 28447040, + "records": [ + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 32001, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240320, + "byteOffset": 0 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 10240320 + }, + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 10250560 + }, + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 14674240 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 23521600 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 23531840 + } + ], + "md5sum": "c3263311c071315e0c3d631d17988d0e" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "05b056dc2d80c27a1cf14caac36fb8b8" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "4fe1bb4c38bfb7db74d5e60c09e94b64" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.1.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "1ae97e622089e43d5f6c628d2096e87b" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.1.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 28037120 + } + ], + "md5sum": "246fd518301c75947884ee7c0d981c7f" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "b39c173e22a13255ef5b2a9ab7f8f11d" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "e179e0e982fd70e7deea37c7df584a33" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.2.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "85eb307be85ad37d5a7fd0fdc45d0d8c" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 28037120 + } + ], + "md5sum": "c7625a52780403fe8e2122122cb0eb19" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "e8b77b7860210e6891c8641080a3a6dd" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "c419c3a5880899763e31f54f9161d0aa" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.3.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "d7894501d32e92bbd7dfcb644ce6e3f8" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.3.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 28037120 + } + ], + "md5sum": "bcb2adf6986ec8d498cc53bf3841037a" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "c2320ddca85bd14ead40642fd9f95717" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "cb0f69bf29e4aa92410ccb019fc9236e" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.4.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "30ada311d33535cf415407be7af785e6" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 28037120 + } + ], + "md5sum": "7cfe423007e6a1f2c9325f1eb9fc0a35" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "dc64097ea4b506eee3a3e10dc303cc0b" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "06c318672f5c52ccb227438ea4a7092a" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.5.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "d5f4e12c554b4c0b6bc33968de53cc29" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 28037120 + } + ], + "md5sum": "9ad62daf087714d0d7d7020ee92b9fb5" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "c94e5c7419c4f57a2869b3c237724f49" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "90e2f4b85ea3ea08841ffba047c94c0a" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.6.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "b529fdf61e303d8bfdc0f7b490d5136e" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.6.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 28037120 + } + ], + "md5sum": "6ea93461d063e7730eff922a9d90fe2d" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.7.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "20d9b1013449e76245c6ff76263d73c5" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 32768000, + "records": [ + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.7.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 14745600 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 19660800 + } + ], + "md5sum": "bda955923af273d583c89e289aae1b32" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "d1683baee779cde7c96ae2eccfb81607" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "69dc5a94899a6e9c4f6bde4ff4167797" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.10.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "0e5f5db1f74fe4083b503d3d484e0b73" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 0 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 1638400 + }, + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 1648640 + }, + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 6072320 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 14919680 + }, + { + "name": "model.layers.10.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 14929920 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 19845120 + } + ], + "md5sum": "1b1a581d6d06a06b01c3c6e02ee086a9" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "e0f73e5abf21b22eb0ab079deea7160b" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "f743053431ee109bce9bd0c297629738" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.11.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "d8253180a5f658d0537b339271f49862" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 0 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 1638400 + }, + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 1648640 + }, + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 6072320 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 14919680 + }, + { + "name": "model.layers.11.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 14929920 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 19845120 + } + ], + "md5sum": "59fad1ba46dac4c2805567ab8d2fde38" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "6924ac06a52ed715cafacac60f409707" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "84e004f1a9d7b95bd0c84afcd69bad65" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.12.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "278cf4f4eee9a689f0dd6725305446af" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 0 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 1638400 + }, + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 1648640 + }, + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 6072320 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 14919680 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 14929920 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 19845120 + } + ], + "md5sum": "4128fdb8055d6f9cc175518b34efad94" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "489a7b92ef43d57853fee00bbe6499ca" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "96ebc23847fcff0eab1e4010bc08856b" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.13.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "04697c479a597b01b7db6aef825409a7" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 0 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 1638400 + }, + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 1648640 + }, + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 6072320 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 14919680 + }, + { + "name": "model.layers.13.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 14929920 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 19845120 + } + ], + "md5sum": "34795055d455e14122b5fc0830f18aa1" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "24612f760f51f9a02962096a1b5ac585" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "9139483c55130bb2e562b3192dddf54d" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.14.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "3d0f9edffeb74f28b4491d1ba0338fcf" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 0 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 1638400 + }, + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 1648640 + }, + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 6072320 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 14919680 + }, + { + "name": "model.layers.14.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 14929920 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 19845120 + } + ], + "md5sum": "f14ee8eaa3f288158bfe2bb81c55886e" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.15.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "a6bcd4b092436cf08583a93aabee9891" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "6d946796bf50770959f8e4ea94c857b2" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "f3023afc8a3a07a479929c62a011cb36" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "039fbf22ffeed52fa0230a12e5df4d2f" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "e28fd57833306613c2c02548a8a60941" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.8.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "5ef935cc304d3d983e570808b360b6a2" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 33136640, + "records": [ + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 0 + }, + { + "name": "model.layers.15.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 1638400 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 6553600 + }, + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 6563840 + }, + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 10987520 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19834880 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19845120 + }, + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 19855360 + }, + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 24279040 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 33126400 + } + ], + "md5sum": "143d47ff33cb1004cf2c46fff563f738" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "b53e7bdb718c71036f7c93c6581a439d" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "9ab5432153e7e6c723714f6defa9d055" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.9.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "4e0335167e559df99b0ef368b72836d0" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.8.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 19671040 + }, + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 24094720 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 32942080 + } + ], + "md5sum": "47157067195730bdfc63166534cc188a" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "cc111c599aeedae3066f841d9afe9416" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "642309be72c45d1673f836d59b0ca7ef" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.9.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 19671040 + }, + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 24094720 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 32942080 + } + ], + "md5sum": "40c37ee3c3d86b2020c82b13978b938b" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "3b278a92c387f843d131248bd5769ac6" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "8ed7bccf2662633f899f3f01dd5809df" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.16.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "09fa659b74dc3b2bcec6a5156e079237" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 28037120 + } + ], + "md5sum": "36f82618d49104c9524f5b254c53a76f" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "e7d4bd5f375d92cb62ba3de4dcbdda55" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "29872185176bb75fbab6c9bc7232af9f" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.17.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "4c97217b84a36164e3d8eac10a627bd4" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.17.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 28037120 + } + ], + "md5sum": "17b2b457dd5c300f140027d1095680ee" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "e9e3efd05fd6b9b8ed21a988a8f7f3ee" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "b8a514895cf47a05711df414ffb64efe" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.18.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "0e5dc24fe80aeaa8cd60b60ecb96abff" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.18.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 28037120 + } + ], + "md5sum": "f028f8748c86c045aac5b5322362dbb8" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "9780a94bb5a2ea9780396078b7d0e8f1" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "e91911a87a1de3e9cb019aeb848d7eb7" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.19.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "7658055645e10640a7676d8909cf82aa" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.19.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 28037120 + } + ], + "md5sum": "b86b633ff3659cdea14cbc77f27aea21" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "0a926563c8019648eaafffa5061754e5" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "cbde4d76896be9bceafe8ed0f344061f" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.20.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "bb19bf72e9ea3e7a4bf4cfbdf1738848" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.20.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 28037120 + } + ], + "md5sum": "832db2f03b78eac87ff96b9827c2c26d" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "55a0af73a1e232e0b9c63c37e7e0b19a" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "8b5deca84e77207ed3eb6829cd147c6c" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.21.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "9783d50e95c53b1c3c98f2ac0880e9f4" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 28037120 + } + ], + "md5sum": "f0bfbafc86e602053d25424a168aed1a" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "bc145499fa54f6c55298560a66dc4098" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "3ad6787b8005d1459ceec26701fb1133" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.22.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "3b51f3c83ce5bc773474ab8095044f86" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 28037120 + } + ], + "md5sum": "c4c78a0070fcd1641d8bf0352c229994" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "6642c8f14b4492f103a72febd9d20586" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "cb51f2206531d89eca30a280a36a44f3" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.23.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "a91a6e591d718ca83848c72ccaaa4025" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.23.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 28037120 + } + ], + "md5sum": "d946626fecef31f3feab1a8bc1d7f163" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "daf48f1dfaedb510f0b3951b3ecb9528" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "642cb1a208862f1a8f900afba15bf00f" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.24.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "9a95dbb4c6033124d338ad19de773d2a" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.24.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 28037120 + } + ], + "md5sum": "fa65f2c0a9024034f9384259935b3dbc" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "2238134f5a6237d8701749ab9aca188d" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "ae687cd409881e0b5124d7a117bdeef5" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.25.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "8035a546e39fd9722ead12d6f40987e7" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.25.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 28037120 + } + ], + "md5sum": "bc471d943e14e754b49140e600894c0e" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "10c1168920c0d67fa0febf8018461775" + }, + { + "dataPath": "params_shard_114.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "63bcec509e39e0a6c5969aa7a31e7bbd" + }, + { + "dataPath": "params_shard_115.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.26.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "0163ec35a99c148aec82a60a45ed9535" + }, + { + "dataPath": "params_shard_116.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.26.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 28037120 + } + ], + "md5sum": "d47868912520b9cfccecd2b08382a9f5" + }, + { + "dataPath": "params_shard_117.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "8b603bf99bad65f6a426361a3649f2ab" + }, + { + "dataPath": "params_shard_118.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "2b1d7d79c0756dfa3cf2c920ccef78ca" + }, + { + "dataPath": "params_shard_119.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.27.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "6351bc1ffc9bfbd19ac0615b160e1e8d" + }, + { + "dataPath": "params_shard_120.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.27.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 28037120 + } + ], + "md5sum": "cfea80fae115b5b469e64b47ff0a5b5e" + }, + { + "dataPath": "params_shard_121.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "d249c81800e4eec1065575ac4b54d5ca" + }, + { + "dataPath": "params_shard_122.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "73bd3d9e13d28532dde52648dcb4bf4e" + }, + { + "dataPath": "params_shard_123.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.28.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "b3871c2432b0907b239d0d711f406b68" + }, + { + "dataPath": "params_shard_124.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.28.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.28.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.28.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 28037120 + } + ], + "md5sum": "1e5c1b1ae6e005df9815c779f80d3135" + }, + { + "dataPath": "params_shard_125.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "08ff42782eb6e4d06b54593d553861a7" + }, + { + "dataPath": "params_shard_126.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "097fd1906f7e5d115b9f1ba475203021" + }, + { + "dataPath": "params_shard_127.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.29.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "518b07a6b59fb5732fddd5646b917e9f" + }, + { + "dataPath": "params_shard_128.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.28.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.29.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.29.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 19179520 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 28026880 + }, + { + "name": "model.layers.29.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 28037120 + } + ], + "md5sum": "dd64061ea775f96168a05d960da9c6ab" + }, + { + "dataPath": "params_shard_129.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "aecff958903056b9b447da827e0471d8" + }, + { + "dataPath": "params_shard_130.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.30.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "5c08ea7bd23cde1551c4366f36994409" + }, + { + "dataPath": "params_shard_131.bin", + "format": "raw-shard", + "nbytes": 28508160, + "records": [ + { + "name": "model.layers.29.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.30.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 14745600 + }, + { + "name": "model.layers.30.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 23592960 + } + ], + "md5sum": "b9ec4a384ca1be7530d65d26c34d92ad" + }, + { + "dataPath": "params_shard_132.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "aa3a33fb90fea26ab0cb9ceb0219895c" + }, + { + "dataPath": "params_shard_133.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "701c394425aaa31f7099a719406442f3" + }, + { + "dataPath": "params_shard_134.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "0a926c5fa91cae678f8e1a0e326fd3b6" + }, + { + "dataPath": "params_shard_135.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.31.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "beed1156ca3d25b9c302dcd977c00d77" + }, + { + "dataPath": "params_shard_136.bin", + "format": "raw-shard", + "nbytes": 32481280, + "records": [ + { + "name": "model.layers.30.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.30.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 14755840 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19179520 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19189760 + }, + { + "name": "model.layers.31.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 19200000 + }, + { + "name": "model.layers.31.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 23623680 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 32471040 + } + ], + "md5sum": "970779970520be0ddba650a06c7afc16" + }, + { + "dataPath": "params_shard_137.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.32.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "5d826201fc0855b3a2365bb649c8e35d" + }, + { + "dataPath": "params_shard_138.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.32.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "c2cb83e04f5c085144510ba8878f4514" + }, + { + "dataPath": "params_shard_139.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.32.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "424aab4902252052f792becde9b5fb13" + }, + { + "dataPath": "params_shard_140.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.31.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "model.layers.32.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "model.layers.32.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 19671040 + }, + { + "name": "model.layers.32.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 24094720 + }, + { + "name": "model.layers.32.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 32942080 + } + ], + "md5sum": "c16f975b84b153c0aa9ac94a5599c509" + }, + { + "dataPath": "params_shard_141.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.33.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "afe667b6e095a1d273c985431741b5c2" + }, + { + "dataPath": "params_shard_142.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.33.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "2d2c6753fc7b505020c75a42132fa8c8" + }, + { + "dataPath": "params_shard_143.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.33.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "3ae7d43a82f779c0fe5e17fd8d7418f5" + }, + { + "dataPath": "params_shard_144.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.32.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "model.layers.32.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "model.layers.32.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "model.layers.33.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "model.layers.33.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 19671040 + }, + { + "name": "model.layers.33.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 24094720 + }, + { + "name": "model.layers.33.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 32942080 + } + ], + "md5sum": "209897b6000c73c376e6ce6c9b1012ed" + }, + { + "dataPath": "params_shard_145.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.34.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "b57271e7022c8f0b7a152f7f1f28a322" + }, + { + "dataPath": "params_shard_146.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.34.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "13c6e72052d299412f57860457243208" + }, + { + "dataPath": "params_shard_147.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.34.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "d1c325f4c8242000dcd3c8879554dc40" + }, + { + "dataPath": "params_shard_148.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.33.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "model.layers.33.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "model.layers.33.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "model.layers.34.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "model.layers.34.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 19671040 + }, + { + "name": "model.layers.34.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 24094720 + }, + { + "name": "model.layers.34.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 32942080 + } + ], + "md5sum": "c7ad918b513cd8c66478f0ed6a3dceec" + }, + { + "dataPath": "params_shard_149.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.35.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "1224189b5a4e532d256c114d31de4d26" + }, + { + "dataPath": "params_shard_150.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.35.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "0b925362e79771cde4e736bc7d2f0ddc" + }, + { + "dataPath": "params_shard_151.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.35.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "28ceb7b611bb837aedb46ab24e8ecbe6" + }, + { + "dataPath": "params_shard_152.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.34.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "model.layers.34.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "model.layers.34.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "model.layers.35.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "model.layers.35.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 19671040 + }, + { + "name": "model.layers.35.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 24094720 + }, + { + "name": "model.layers.35.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 32942080 + } + ], + "md5sum": "dc95a00b596ca6bd4a034ce0a1d4da09" + }, + { + "dataPath": "params_shard_153.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.36.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "4c958b080916f330c95fc44d27ea05b8" + }, + { + "dataPath": "params_shard_154.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.36.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "9a79de0e2c22e8bc33e5567e8b229936" + }, + { + "dataPath": "params_shard_155.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.36.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "1a06c6287966f546166384294bef00dd" + }, + { + "dataPath": "params_shard_156.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.35.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "model.layers.35.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "model.layers.35.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "model.layers.36.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "model.layers.36.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 19671040 + }, + { + "name": "model.layers.36.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 24094720 + }, + { + "name": "model.layers.36.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 32942080 + } + ], + "md5sum": "4b7e16964d879369b7bded3c802220e2" + }, + { + "dataPath": "params_shard_157.bin", + "format": "raw-shard", + "nbytes": 35389440, + "records": [ + { + "name": "model.layers.37.mlp.down_proj.q_weight", + "shape": [ + 5120, + 1728 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 35389440, + "byteOffset": 0 + } + ], + "md5sum": "bc1f18661cc735ab08fa27411a122836" + }, + { + "dataPath": "params_shard_158.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.37.mlp.gate_up_proj.q_weight", + "shape": [ + 27648, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "f2413a49d3d191944fafa50c47c85b29" + }, + { + "dataPath": "params_shard_159.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.37.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "4f2b6fd3f946da76442a38c555692c78" + }, + { + "dataPath": "params_shard_160.bin", + "format": "raw-shard", + "nbytes": 32952320, + "records": [ + { + "name": "model.layers.36.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "model.layers.36.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "model.layers.36.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "model.layers.37.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 19660800 + }, + { + "name": "model.layers.37.mlp.down_proj.q_scale", + "shape": [ + 5120, + 432 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4423680, + "byteOffset": 19671040 + }, + { + "name": "model.layers.37.mlp.gate_up_proj.q_scale", + "shape": [ + 27648, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8847360, + "byteOffset": 24094720 + }, + { + "name": "model.layers.37.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 10240, + "byteOffset": 32942080 + } + ], + "md5sum": "b8866ea4966818d1ff123f20cd37c7cf" + }, + { + "dataPath": "params_shard_161.bin", + "format": "raw-shard", + "nbytes": 39321600, + "records": [ + { + "name": "model.layers.38.self_attn.qkv_proj.q_weight", + "shape": [ + 15360, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 39321600, + "byteOffset": 0 + } + ], + "md5sum": "99fc6ee67b0dfd493a78767fd076309d" + }, + { + "dataPath": "params_shard_162.bin", + "format": "raw-shard", + "nbytes": 24576000, + "records": [ + { + "name": "model.layers.37.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "model.layers.37.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 4915200 + }, + { + "name": "model.layers.37.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 18022400 + }, + { + "name": "model.layers.38.self_attn.qkv_proj.q_scale", + "shape": [ + 15360, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 4915200, + "byteOffset": 19660800 + } + ], + "md5sum": "c62a14123c5b984ba3be02951ca8bda7" + }, + { + "dataPath": "params_shard_163.bin", + "format": "raw-shard", + "nbytes": 14745600, + "records": [ + { + "name": "model.layers.38.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.38.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1638400, + "byteOffset": 13107200 + } + ], + "md5sum": "1bc8400067ddb796ded6a1d6eba9ce9e" + } + ] +} \ No newline at end of file