diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,4439 @@ +{ + "metadata": { + "ParamSize": 325, + "ParamBytes": 3790741504.0, + "BitsPerParam": 4.500454373872803 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 32000, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "8a73677d56fcbae5b449c5d44d65055b" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 30744576, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 32000, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192000, + "byteOffset": 0 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 8192000 + }, + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 8200192 + } + ], + "md5sum": "f14f8fbc5e0e520af31c87ed16847b14" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "e114a5ec13e265f9246d24ff010793e8" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.24.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "ab9e6cf8ded7b01af63496454cbf3797" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "7fc5aa64f9937917d21981c0656d95a9" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "1f7f354b69f83d867117f93c75ca9264" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.25.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "0497f3e3e3c8f54e263c9c9d74deacbb" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 32661504, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "model.layers.24.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 19996672 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 21053440 + }, + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 23871488 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 29507584 + }, + { + "name": "model.layers.25.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 29515776 + } + ], + "md5sum": "ad38ecf6470b14c3eddfcf6b8026fa9a" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 31989760, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 9445376 + } + ], + "md5sum": "4cce513f54c40abf676c5ba80dfe35bf" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "74e0c73abca300f2d87b7db28e1fc4c9" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.26.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "0daa4809ad558590426527187ca502c3" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "3b9dc0141455ca94860210a5c3c765c3" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "c544bace4d057a8da2df6ef9de5d9732" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.27.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "b0a020f252abdd640710a487bd604606" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 32661504, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "model.layers.26.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 19996672 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 21053440 + }, + { + "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 23871488 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 29507584 + }, + { + "name": "model.layers.27.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 29515776 + } + ], + "md5sum": "2fba4cb3aea173202992090c78b77da5" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 31989760, + "records": [ + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.28.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 9445376 + } + ], + "md5sum": "1c4e12c3032fad4c5b81aa66671ae701" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "b7201a4a94c3f0590329cab49e3978fe" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.28.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "6438df8045d49f0bf9b777bd4be6f1a1" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "4f9efc48974ff783548c6aa23463ba31" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "3c919a553f9d2e88dcd15e5a3fcf9b27" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.29.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "7f425b60efd15a21f63bf87fab3452e7" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 32661504, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.28.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "model.layers.28.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 19996672 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.29.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 21053440 + }, + { + "name": "model.layers.29.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 23871488 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 29507584 + }, + { + "name": "model.layers.29.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 29515776 + } + ], + "md5sum": "e0f11a5e4fe259450ac1c9f3cc845d6c" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 31989760, + "records": [ + { + "name": "model.layers.29.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.30.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 9445376 + } + ], + "md5sum": "61039348025279590bd8bdc3ea4bbca3" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "853653d56149a68bdd61a7c1b2ed120a" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.30.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "97a08ff62e8117cb831615b0f2c99334" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "fcbc3c435afc4c56a34a63797c62d081" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "b8fa859a2a93a38e14a763465c0245c0" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.31.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "8bc24c5908fdefbc64fbca0cfaeddc51" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 32661504, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.30.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "model.layers.30.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 19996672 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.31.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 21053440 + }, + { + "name": "model.layers.31.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 23871488 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 29507584 + }, + { + "name": "model.layers.31.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 29515776 + } + ], + "md5sum": "c8874194773359ab2c73d6b8ae27d7d9" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 32000, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "66ad602d220b664a12421ccdb3ccb5d6" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "db570748f3458eff8205503782b2b1ca" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "5b23a18e2f1ce4e8e4783dfe4e056615" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.0.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "d2cd8ae66289879284ebf3335f5bb3c2" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 29253632, + "records": [ + { + "name": "model.layers.31.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.norm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 32000, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192000, + "byteOffset": 9445376 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 17637376 + }, + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 17645568 + }, + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 20463616 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 26099712 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 26107904 + } + ], + "md5sum": "235da66a83808a169e6dd0389ea628e0" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 31989760, + "records": [ + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 9445376 + } + ], + "md5sum": "387f5a48e8833545a9e6625174177147" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "6b9c9c046c5def1a3554bf23fd449715" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.1.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "b0fbfdc8b07b911c27f1c77ac9f6b0c7" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "fe743495899c53ec9d39d7bdde54c9b8" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "1f40c42e671b86c077853c473315a1a7" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.10.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "dba942217d515737d3660f4bb42aa52e" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 32661504, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "model.layers.1.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 19996672 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 21053440 + }, + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 23871488 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 29507584 + }, + { + "name": "model.layers.10.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 29515776 + } + ], + "md5sum": "96c2d2d457e245086fa9eef9eadaa08c" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 31989760, + "records": [ + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 9445376 + } + ], + "md5sum": "c74842e4663434d99c262c930e59fae7" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "ec15fbc4ce31df5effad4b048749e190" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.11.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "f150ccdc57f9e011c1f43a5811166279" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "178f72f719c24f633d506d62966df563" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "4548c2c7a33661a30d88cb3c71f08ca9" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.12.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "f6202141e5fef174d48c21950b49607b" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 32661504, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "model.layers.11.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 19996672 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 21053440 + }, + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 23871488 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 29507584 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 29515776 + } + ], + "md5sum": "f7a658290f79a3f4c444ef0f7731ad60" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 31989760, + "records": [ + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 9445376 + } + ], + "md5sum": "ce60a1da15fa399406ce71250609be42" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "a22608ec842dedfcdd1ce9ec980229b3" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.13.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "97aeea95f7e75a2dc63af7a4fbfaa038" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "c5c905105dcf35af1035ffcbc21cd02b" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "170ecd72ddce28ccbc02cb87da9742c0" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.14.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "1be45451863d797bbc5c026fecd9089e" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 32661504, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "model.layers.13.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 19996672 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 21053440 + }, + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 23871488 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 29507584 + }, + { + "name": "model.layers.14.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 29515776 + } + ], + "md5sum": "b2613b0b9c3cfc429c06185fd8a80bcb" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 31989760, + "records": [ + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 9445376 + } + ], + "md5sum": "e431f95e1faf8969acd6eb48165c0e8f" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "e6431be7c454f2d153235f46042ed3cb" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.15.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "018d5b6a87c6753407119a41441fb7db" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "4b4a23a01bdea42dd56e2928a8649fa3" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "5cef3b5abb12c2c0033ba506083b5cb0" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.16.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "e918bc270a74419195f80ef44cf538b9" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 32661504, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "model.layers.15.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 19996672 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 21053440 + }, + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 23871488 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 29507584 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 29515776 + } + ], + "md5sum": "646b8fc1e9c76c3421bb9ff983381887" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 31989760, + "records": [ + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 9445376 + } + ], + "md5sum": "28272a97757c2f6ce33c67f671792dd7" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "0374db0f9d986c3865f0e1127bd7c4e7" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.17.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "b9e5895348f211118ec2aef0f5480ac4" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "5fbc44603a2962b74c1fd9aa7a6483bd" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "6758eaa67d50d176ccc7fd6e04066dca" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.18.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "dc41a539f6006ee401283004ecf2a757" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 32661504, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "model.layers.17.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 19996672 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 21053440 + }, + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 23871488 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 29507584 + }, + { + "name": "model.layers.18.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 29515776 + } + ], + "md5sum": "de21b6b74453f601369d39eab11a83f6" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 31989760, + "records": [ + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 9445376 + } + ], + "md5sum": "402727f31bed4f7aa0fe12bc1e06cebe" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "3b8844826937bc6b8b4d605f24803fc1" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.19.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "b75c7a160d72673da5a907ff2420a804" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "0f20b2d3a5aaf6289d520ce16bf9e429" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "84ba93be30d0b3da934777fa0a04563e" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.2.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "d2b856be8da472ea80accc7b80dccabe" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 32661504, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "model.layers.19.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 19996672 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 21053440 + }, + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 23871488 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 29507584 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 29515776 + } + ], + "md5sum": "112169ed477082383321512048e655c4" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 31989760, + "records": [ + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 9445376 + } + ], + "md5sum": "3947df68c3dc37b5591fa56f3480b4f2" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "9d14761cebfc8b928830327a1a8d641b" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.20.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "047781efb723631cea11872f8b0ac5b2" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "70cc93ad771010ba4db590fa2d316623" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "8a0d2e3aeaccabf4e369cae36f9de1d0" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.21.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "7ecc5b846c796bd8a9df5166e48da9b3" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 32661504, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "model.layers.20.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 19996672 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 21053440 + }, + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 23871488 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 29507584 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 29515776 + } + ], + "md5sum": "88bb1353b8146da768d06f353f7ddc22" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 31989760, + "records": [ + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 9445376 + } + ], + "md5sum": "cdf20e93f4ed2607b08aa7f10ce37db9" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "a5d40ab85a9286393c1dd0256e97fcff" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.22.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "7563103ed726dbe49aac60e7e7adf378" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "269d28a86479a0ddf7b11e333f98da8e" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "7136bc14c72dab36849ba7a77ca6bd5f" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.23.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "888c0fe959f4008e9776b2e4f322c102" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 32661504, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 19996672 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 21053440 + }, + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 23871488 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 29507584 + }, + { + "name": "model.layers.23.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 29515776 + } + ], + "md5sum": "616984218cfd4187b02de2a35dc1ea9d" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 31989760, + "records": [ + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 9445376 + } + ], + "md5sum": "798edf7cfd53f938c607d8a0fee4dcdd" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "46023b82d0acf71b699af98d634f186a" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.3.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "83b0fcdcf18901aaa79b2f757b7a9b65" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "52f62b26b649e98d13d39430e4c22c08" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "ffef0af806eb5ad195dff005e4c7a0db" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.4.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "ec602aa527e74344a25da011b0524c77" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 32661504, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "model.layers.3.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 19996672 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 21053440 + }, + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 23871488 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 29507584 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 29515776 + } + ], + "md5sum": "cbb74df2ec60f4455861bfef2a6bd2c1" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 31989760, + "records": [ + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 9445376 + } + ], + "md5sum": "8da13b26890594049e53e891964ebaf6" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "30ea4f0ec0c48665c28916918848d70b" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.5.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "fa3792e8b94c87b01c76302aed717580" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "43909cfbdbf6b8e73345fe75cb27a6f3" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "c9c041914bc226b3d3413cc55c1ea515" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.6.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "66c4f85f749681350067688e08025cac" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 32661504, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 19996672 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 21053440 + }, + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 23871488 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 29507584 + }, + { + "name": "model.layers.6.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 29515776 + } + ], + "md5sum": "016d2b77848a71aa44b5d8c60b56bc24" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 31989760, + "records": [ + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 9445376 + } + ], + "md5sum": "741e888ce1bc10cf4466fcba8b154c0e" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "80e9b846b3cbde0a81d98c36a42e5893" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.7.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "c0ecc4a20aea308f088fc5013e77551a" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "0415b228d5aaf08a49fe5f5e94df1be9" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "8db1080b2975bda71434a1f350b20cec" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.8.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "e8f45b4fc47b73331e4aeaa1f7312e26" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 32661504, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "model.layers.7.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 19996672 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 21053440 + }, + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 23871488 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 29507584 + }, + { + "name": "model.layers.8.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 29515776 + } + ], + "md5sum": "40cb85109b1aca9ccec8f728f528b163" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 31989760, + "records": [ + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 22544384, + "byteOffset": 9445376 + } + ], + "md5sum": "effd6e644e946a2b06324cded63cf5ac" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "3d189c21a66d98cfc21202d349a785dd" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.9.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "b1148de679b33c93a3da0f54b847b6b9" + }, + { + "dataPath": "params_shard_114.bin", + "format": "raw-shard", + "nbytes": 21045248, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "raw", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 19996672 + } + ], + "md5sum": "e8a9b0e9cd8ff6c60f282547242cb49e" + } + ] +} \ No newline at end of file