diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,3494 @@ +{ + "metadata": { + "ParamSize": 310, + "ParamBytes": 1063128064.0, + "BitsPerParam": 4.785933230093276 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 466747392, + "records": [ + { + "name": "model.embed_tokens.weight", + "shape": [ + 151936, + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 466747392, + "byteOffset": 0 + } + ], + "md5sum": "368748610d7936326990a90582ced2b5" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 27495424, + "records": [ + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 0 + }, + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 1536, + 896 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 3072 + }, + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 1536, + 224 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 5508096 + }, + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 17920, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11182080, + "byteOffset": 6196224 + }, + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 17920, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1397760, + "byteOffset": 17378304 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 18776064 + }, + { + "name": "model.layers.0.self_attn.c_attn.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 18779136 + }, + { + "name": "model.layers.0.self_attn.c_attn.q_weight", + "shape": [ + 2048, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1277952, + "byteOffset": 18783232 + }, + { + "name": "model.layers.0.self_attn.c_attn.q_scale", + "shape": [ + 2048, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 159744, + "byteOffset": 20061184 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 1536, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 958464, + "byteOffset": 20220928 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 1536, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 119808, + "byteOffset": 21179392 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 21299200 + }, + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 1536, + 896 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 21302272 + }, + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 1536, + 224 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 26807296 + } + ], + "md5sum": "c08c532bc62310dca00a2bf910557caf" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 32481280, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 17920, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11182080, + "byteOffset": 0 + }, + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 17920, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1397760, + "byteOffset": 11182080 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 12579840 + }, + { + "name": "model.layers.1.self_attn.c_attn.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 12582912 + }, + { + "name": "model.layers.1.self_attn.c_attn.q_weight", + "shape": [ + 2048, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1277952, + "byteOffset": 12587008 + }, + { + "name": "model.layers.1.self_attn.c_attn.q_scale", + "shape": [ + 2048, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 159744, + "byteOffset": 13864960 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 1536, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 958464, + "byteOffset": 14024704 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 1536, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 119808, + "byteOffset": 14983168 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 15102976 + }, + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 1536, + 896 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 15106048 + }, + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 1536, + 224 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 20611072 + }, + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 17920, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11182080, + "byteOffset": 21299200 + } + ], + "md5sum": "3b6a90ea1d96a0dda0f5a779b24cc4b6" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 31416320, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 17920, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1397760, + "byteOffset": 0 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 1397760 + }, + { + "name": "model.layers.10.self_attn.c_attn.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 1400832 + }, + { + "name": "model.layers.10.self_attn.c_attn.q_weight", + "shape": [ + 2048, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1277952, + "byteOffset": 1404928 + }, + { + "name": "model.layers.10.self_attn.c_attn.q_scale", + "shape": [ + 2048, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 159744, + "byteOffset": 2682880 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 1536, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 958464, + "byteOffset": 2842624 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 1536, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 119808, + "byteOffset": 3801088 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 3920896 + }, + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 1536, + 896 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 3923968 + }, + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 1536, + 224 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 9428992 + }, + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 17920, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11182080, + "byteOffset": 10117120 + }, + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 17920, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1397760, + "byteOffset": 21299200 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 22696960 + }, + { + "name": "model.layers.11.self_attn.c_attn.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 22700032 + }, + { + "name": "model.layers.11.self_attn.c_attn.q_weight", + "shape": [ + 2048, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1277952, + "byteOffset": 22704128 + }, + { + "name": "model.layers.11.self_attn.c_attn.q_scale", + "shape": [ + 2048, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 159744, + "byteOffset": 23982080 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 1536, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 958464, + "byteOffset": 24141824 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 1536, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 119808, + "byteOffset": 25100288 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 25220096 + }, + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 1536, + 896 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 25223168 + }, + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 1536, + 224 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 30728192 + } + ], + "md5sum": "a534a20e533a7681269439b8e88e454b" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 32481280, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 17920, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11182080, + "byteOffset": 0 + }, + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 17920, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1397760, + "byteOffset": 11182080 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 12579840 + }, + { + "name": "model.layers.12.self_attn.c_attn.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 12582912 + }, + { + "name": "model.layers.12.self_attn.c_attn.q_weight", + "shape": [ + 2048, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1277952, + "byteOffset": 12587008 + }, + { + "name": "model.layers.12.self_attn.c_attn.q_scale", + "shape": [ + 2048, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 159744, + "byteOffset": 13864960 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 1536, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 958464, + "byteOffset": 14024704 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 1536, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 119808, + "byteOffset": 14983168 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 15102976 + }, + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 1536, + 896 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 15106048 + }, + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 1536, + 224 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 20611072 + }, + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 17920, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11182080, + "byteOffset": 21299200 + } + ], + "md5sum": "bb0ef100688b4a728c1f853e5768d755" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 31416320, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 17920, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1397760, + "byteOffset": 0 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 1397760 + }, + { + "name": "model.layers.13.self_attn.c_attn.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 1400832 + }, + { + "name": "model.layers.13.self_attn.c_attn.q_weight", + "shape": [ + 2048, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1277952, + "byteOffset": 1404928 + }, + { + "name": "model.layers.13.self_attn.c_attn.q_scale", + "shape": [ + 2048, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 159744, + "byteOffset": 2682880 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 1536, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 958464, + "byteOffset": 2842624 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 1536, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 119808, + "byteOffset": 3801088 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 3920896 + }, + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 1536, + 896 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 3923968 + }, + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 1536, + 224 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 9428992 + }, + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 17920, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11182080, + "byteOffset": 10117120 + }, + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 17920, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1397760, + "byteOffset": 21299200 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 22696960 + }, + { + "name": "model.layers.14.self_attn.c_attn.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 22700032 + }, + { + "name": "model.layers.14.self_attn.c_attn.q_weight", + "shape": [ + 2048, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1277952, + "byteOffset": 22704128 + }, + { + "name": "model.layers.14.self_attn.c_attn.q_scale", + "shape": [ + 2048, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 159744, + "byteOffset": 23982080 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 1536, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 958464, + "byteOffset": 24141824 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 1536, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 119808, + "byteOffset": 25100288 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 25220096 + }, + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 1536, + 896 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 25223168 + }, + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 1536, + 224 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 30728192 + } + ], + "md5sum": "0013f22a1cb74ceb1e2baee1033c3056" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 32481280, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 17920, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11182080, + "byteOffset": 0 + }, + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 17920, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1397760, + "byteOffset": 11182080 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 12579840 + }, + { + "name": "model.layers.15.self_attn.c_attn.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 12582912 + }, + { + "name": "model.layers.15.self_attn.c_attn.q_weight", + "shape": [ + 2048, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1277952, + "byteOffset": 12587008 + }, + { + "name": "model.layers.15.self_attn.c_attn.q_scale", + "shape": [ + 2048, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 159744, + "byteOffset": 13864960 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 1536, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 958464, + "byteOffset": 14024704 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 1536, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 119808, + "byteOffset": 14983168 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 15102976 + }, + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 1536, + 896 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 15106048 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 1536, + 224 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 20611072 + }, + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 17920, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11182080, + "byteOffset": 21299200 + } + ], + "md5sum": "61cf8e053e833ba22dfaf92521260c33" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 31416320, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 17920, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1397760, + "byteOffset": 0 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 1397760 + }, + { + "name": "model.layers.16.self_attn.c_attn.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 1400832 + }, + { + "name": "model.layers.16.self_attn.c_attn.q_weight", + "shape": [ + 2048, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1277952, + "byteOffset": 1404928 + }, + { + "name": "model.layers.16.self_attn.c_attn.q_scale", + "shape": [ + 2048, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 159744, + "byteOffset": 2682880 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 1536, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 958464, + "byteOffset": 2842624 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 1536, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 119808, + "byteOffset": 3801088 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 3920896 + }, + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 1536, + 896 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 3923968 + }, + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 1536, + 224 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 9428992 + }, + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 17920, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11182080, + "byteOffset": 10117120 + }, + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 17920, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1397760, + "byteOffset": 21299200 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 22696960 + }, + { + "name": "model.layers.17.self_attn.c_attn.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 22700032 + }, + { + "name": "model.layers.17.self_attn.c_attn.q_weight", + "shape": [ + 2048, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1277952, + "byteOffset": 22704128 + }, + { + "name": "model.layers.17.self_attn.c_attn.q_scale", + "shape": [ + 2048, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 159744, + "byteOffset": 23982080 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 1536, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 958464, + "byteOffset": 24141824 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 1536, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 119808, + "byteOffset": 25100288 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 25220096 + }, + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 1536, + 896 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 25223168 + }, + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 1536, + 224 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 30728192 + } + ], + "md5sum": "b4831648f469bdeded7047abfe235508" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 32481280, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 17920, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11182080, + "byteOffset": 0 + }, + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 17920, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1397760, + "byteOffset": 11182080 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 12579840 + }, + { + "name": "model.layers.18.self_attn.c_attn.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 12582912 + }, + { + "name": "model.layers.18.self_attn.c_attn.q_weight", + "shape": [ + 2048, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1277952, + "byteOffset": 12587008 + }, + { + "name": "model.layers.18.self_attn.c_attn.q_scale", + "shape": [ + 2048, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 159744, + "byteOffset": 13864960 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 1536, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 958464, + "byteOffset": 14024704 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 1536, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 119808, + "byteOffset": 14983168 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 15102976 + }, + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 1536, + 896 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 15106048 + }, + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 1536, + 224 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 20611072 + }, + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 17920, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11182080, + "byteOffset": 21299200 + } + ], + "md5sum": "3a0337f57a3f252b8dcf513e60ce7fca" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 31416320, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 17920, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1397760, + "byteOffset": 0 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 1397760 + }, + { + "name": "model.layers.19.self_attn.c_attn.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 1400832 + }, + { + "name": "model.layers.19.self_attn.c_attn.q_weight", + "shape": [ + 2048, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1277952, + "byteOffset": 1404928 + }, + { + "name": "model.layers.19.self_attn.c_attn.q_scale", + "shape": [ + 2048, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 159744, + "byteOffset": 2682880 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 1536, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 958464, + "byteOffset": 2842624 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 1536, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 119808, + "byteOffset": 3801088 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 3920896 + }, + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 1536, + 896 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 3923968 + }, + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 1536, + 224 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 9428992 + }, + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 17920, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11182080, + "byteOffset": 10117120 + }, + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 17920, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1397760, + "byteOffset": 21299200 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 22696960 + }, + { + "name": "model.layers.2.self_attn.c_attn.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 22700032 + }, + { + "name": "model.layers.2.self_attn.c_attn.q_weight", + "shape": [ + 2048, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1277952, + "byteOffset": 22704128 + }, + { + "name": "model.layers.2.self_attn.c_attn.q_scale", + "shape": [ + 2048, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 159744, + "byteOffset": 23982080 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 1536, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 958464, + "byteOffset": 24141824 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 1536, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 119808, + "byteOffset": 25100288 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 25220096 + }, + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 1536, + 896 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 25223168 + }, + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 1536, + 224 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 30728192 + } + ], + "md5sum": "dd7e6bf77ced35f6109b25d44096e6d7" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 32481280, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 17920, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11182080, + "byteOffset": 0 + }, + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 17920, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1397760, + "byteOffset": 11182080 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 12579840 + }, + { + "name": "model.layers.20.self_attn.c_attn.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 12582912 + }, + { + "name": "model.layers.20.self_attn.c_attn.q_weight", + "shape": [ + 2048, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1277952, + "byteOffset": 12587008 + }, + { + "name": "model.layers.20.self_attn.c_attn.q_scale", + "shape": [ + 2048, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 159744, + "byteOffset": 13864960 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 1536, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 958464, + "byteOffset": 14024704 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 1536, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 119808, + "byteOffset": 14983168 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 15102976 + }, + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 1536, + 896 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 15106048 + }, + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 1536, + 224 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 20611072 + }, + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 17920, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11182080, + "byteOffset": 21299200 + } + ], + "md5sum": "f349b9bec9997a911a2e48559e6ac464" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 31416320, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 17920, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1397760, + "byteOffset": 0 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 1397760 + }, + { + "name": "model.layers.21.self_attn.c_attn.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 1400832 + }, + { + "name": "model.layers.21.self_attn.c_attn.q_weight", + "shape": [ + 2048, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1277952, + "byteOffset": 1404928 + }, + { + "name": "model.layers.21.self_attn.c_attn.q_scale", + "shape": [ + 2048, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 159744, + "byteOffset": 2682880 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 1536, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 958464, + "byteOffset": 2842624 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 1536, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 119808, + "byteOffset": 3801088 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 3920896 + }, + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 1536, + 896 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 3923968 + }, + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 1536, + 224 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 9428992 + }, + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 17920, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11182080, + "byteOffset": 10117120 + }, + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 17920, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1397760, + "byteOffset": 21299200 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 22696960 + }, + { + "name": "model.layers.22.self_attn.c_attn.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 22700032 + }, + { + "name": "model.layers.22.self_attn.c_attn.q_weight", + "shape": [ + 2048, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1277952, + "byteOffset": 22704128 + }, + { + "name": "model.layers.22.self_attn.c_attn.q_scale", + "shape": [ + 2048, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 159744, + "byteOffset": 23982080 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 1536, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 958464, + "byteOffset": 24141824 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 1536, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 119808, + "byteOffset": 25100288 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 25220096 + }, + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 1536, + 896 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 25223168 + }, + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 1536, + 224 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 30728192 + } + ], + "md5sum": "a49064ffd13ccaa0b8fd82b256d7eadb" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 32481280, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 17920, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11182080, + "byteOffset": 0 + }, + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 17920, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1397760, + "byteOffset": 11182080 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 12579840 + }, + { + "name": "model.layers.23.self_attn.c_attn.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 12582912 + }, + { + "name": "model.layers.23.self_attn.c_attn.q_weight", + "shape": [ + 2048, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1277952, + "byteOffset": 12587008 + }, + { + "name": "model.layers.23.self_attn.c_attn.q_scale", + "shape": [ + 2048, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 159744, + "byteOffset": 13864960 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 1536, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 958464, + "byteOffset": 14024704 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 1536, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 119808, + "byteOffset": 14983168 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 15102976 + }, + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 1536, + 896 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 15106048 + }, + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 1536, + 224 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 20611072 + }, + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 17920, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11182080, + "byteOffset": 21299200 + } + ], + "md5sum": "46e71f8c84ce074146d7ba7771a80cbf" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 31416320, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 17920, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1397760, + "byteOffset": 0 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 1397760 + }, + { + "name": "model.layers.24.self_attn.c_attn.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 1400832 + }, + { + "name": "model.layers.24.self_attn.c_attn.q_weight", + "shape": [ + 2048, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1277952, + "byteOffset": 1404928 + }, + { + "name": "model.layers.24.self_attn.c_attn.q_scale", + "shape": [ + 2048, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 159744, + "byteOffset": 2682880 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 1536, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 958464, + "byteOffset": 2842624 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 1536, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 119808, + "byteOffset": 3801088 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 3920896 + }, + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 1536, + 896 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 3923968 + }, + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 1536, + 224 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 9428992 + }, + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 17920, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11182080, + "byteOffset": 10117120 + }, + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 17920, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1397760, + "byteOffset": 21299200 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 22696960 + }, + { + "name": "model.layers.25.self_attn.c_attn.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 22700032 + }, + { + "name": "model.layers.25.self_attn.c_attn.q_weight", + "shape": [ + 2048, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1277952, + "byteOffset": 22704128 + }, + { + "name": "model.layers.25.self_attn.c_attn.q_scale", + "shape": [ + 2048, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 159744, + "byteOffset": 23982080 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 1536, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 958464, + "byteOffset": 24141824 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 1536, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 119808, + "byteOffset": 25100288 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 25220096 + }, + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 1536, + 896 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 25223168 + }, + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 1536, + 224 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 30728192 + } + ], + "md5sum": "a4dfde5374301c6d8994473a1a1c4d9c" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 32481280, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 17920, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11182080, + "byteOffset": 0 + }, + { + "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 17920, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1397760, + "byteOffset": 11182080 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 12579840 + }, + { + "name": "model.layers.26.self_attn.c_attn.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 12582912 + }, + { + "name": "model.layers.26.self_attn.c_attn.q_weight", + "shape": [ + 2048, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1277952, + "byteOffset": 12587008 + }, + { + "name": "model.layers.26.self_attn.c_attn.q_scale", + "shape": [ + 2048, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 159744, + "byteOffset": 13864960 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 1536, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 958464, + "byteOffset": 14024704 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 1536, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 119808, + "byteOffset": 14983168 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 15102976 + }, + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 1536, + 896 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 15106048 + }, + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 1536, + 224 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 20611072 + }, + { + "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 17920, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11182080, + "byteOffset": 21299200 + } + ], + "md5sum": "4966114ac6c5e0cd000ebfec2b1a8033" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 31416320, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 17920, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1397760, + "byteOffset": 0 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 1397760 + }, + { + "name": "model.layers.27.self_attn.c_attn.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 1400832 + }, + { + "name": "model.layers.27.self_attn.c_attn.q_weight", + "shape": [ + 2048, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1277952, + "byteOffset": 1404928 + }, + { + "name": "model.layers.27.self_attn.c_attn.q_scale", + "shape": [ + 2048, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 159744, + "byteOffset": 2682880 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 1536, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 958464, + "byteOffset": 2842624 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 1536, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 119808, + "byteOffset": 3801088 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 3920896 + }, + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 1536, + 896 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 3923968 + }, + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 1536, + 224 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 9428992 + }, + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 17920, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11182080, + "byteOffset": 10117120 + }, + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 17920, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1397760, + "byteOffset": 21299200 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 22696960 + }, + { + "name": "model.layers.3.self_attn.c_attn.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 22700032 + }, + { + "name": "model.layers.3.self_attn.c_attn.q_weight", + "shape": [ + 2048, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1277952, + "byteOffset": 22704128 + }, + { + "name": "model.layers.3.self_attn.c_attn.q_scale", + "shape": [ + 2048, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 159744, + "byteOffset": 23982080 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 1536, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 958464, + "byteOffset": 24141824 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 1536, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 119808, + "byteOffset": 25100288 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 25220096 + }, + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 1536, + 896 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 25223168 + }, + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 1536, + 224 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 30728192 + } + ], + "md5sum": "6e0cfd56239887e2a0662abb828fb4a4" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 32481280, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 17920, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11182080, + "byteOffset": 0 + }, + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 17920, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1397760, + "byteOffset": 11182080 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 12579840 + }, + { + "name": "model.layers.4.self_attn.c_attn.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 12582912 + }, + { + "name": "model.layers.4.self_attn.c_attn.q_weight", + "shape": [ + 2048, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1277952, + "byteOffset": 12587008 + }, + { + "name": "model.layers.4.self_attn.c_attn.q_scale", + "shape": [ + 2048, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 159744, + "byteOffset": 13864960 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 1536, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 958464, + "byteOffset": 14024704 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 1536, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 119808, + "byteOffset": 14983168 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 15102976 + }, + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 1536, + 896 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 15106048 + }, + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 1536, + 224 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 20611072 + }, + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 17920, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11182080, + "byteOffset": 21299200 + } + ], + "md5sum": "e150ff6e8052da88adea3439fa52daf9" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 31416320, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 17920, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1397760, + "byteOffset": 0 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 1397760 + }, + { + "name": "model.layers.5.self_attn.c_attn.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 1400832 + }, + { + "name": "model.layers.5.self_attn.c_attn.q_weight", + "shape": [ + 2048, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1277952, + "byteOffset": 1404928 + }, + { + "name": "model.layers.5.self_attn.c_attn.q_scale", + "shape": [ + 2048, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 159744, + "byteOffset": 2682880 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 1536, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 958464, + "byteOffset": 2842624 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 1536, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 119808, + "byteOffset": 3801088 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 3920896 + }, + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 1536, + 896 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 3923968 + }, + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 1536, + 224 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 9428992 + }, + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 17920, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11182080, + "byteOffset": 10117120 + }, + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 17920, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1397760, + "byteOffset": 21299200 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 22696960 + }, + { + "name": "model.layers.6.self_attn.c_attn.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 22700032 + }, + { + "name": "model.layers.6.self_attn.c_attn.q_weight", + "shape": [ + 2048, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1277952, + "byteOffset": 22704128 + }, + { + "name": "model.layers.6.self_attn.c_attn.q_scale", + "shape": [ + 2048, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 159744, + "byteOffset": 23982080 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 1536, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 958464, + "byteOffset": 24141824 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 1536, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 119808, + "byteOffset": 25100288 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 25220096 + }, + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 1536, + 896 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 25223168 + }, + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 1536, + 224 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 30728192 + } + ], + "md5sum": "1b0e2b7eeaa3b284676fef5a5c869a65" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 32481280, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 17920, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11182080, + "byteOffset": 0 + }, + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 17920, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1397760, + "byteOffset": 11182080 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 12579840 + }, + { + "name": "model.layers.7.self_attn.c_attn.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 12582912 + }, + { + "name": "model.layers.7.self_attn.c_attn.q_weight", + "shape": [ + 2048, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1277952, + "byteOffset": 12587008 + }, + { + "name": "model.layers.7.self_attn.c_attn.q_scale", + "shape": [ + 2048, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 159744, + "byteOffset": 13864960 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 1536, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 958464, + "byteOffset": 14024704 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 1536, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 119808, + "byteOffset": 14983168 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 15102976 + }, + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 1536, + 896 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 15106048 + }, + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 1536, + 224 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 20611072 + }, + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 17920, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11182080, + "byteOffset": 21299200 + } + ], + "md5sum": "67032fc20cf869f4cacc189f84b044aa" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 25223168, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 17920, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1397760, + "byteOffset": 0 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 1397760 + }, + { + "name": "model.layers.8.self_attn.c_attn.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 1400832 + }, + { + "name": "model.layers.8.self_attn.c_attn.q_weight", + "shape": [ + 2048, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1277952, + "byteOffset": 1404928 + }, + { + "name": "model.layers.8.self_attn.c_attn.q_scale", + "shape": [ + 2048, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 159744, + "byteOffset": 2682880 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 1536, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 958464, + "byteOffset": 2842624 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 1536, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 119808, + "byteOffset": 3801088 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 3920896 + }, + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 1536, + 896 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 5505024, + "byteOffset": 3923968 + }, + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 1536, + 224 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 688128, + "byteOffset": 9428992 + }, + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 17920, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11182080, + "byteOffset": 10117120 + }, + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 17920, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1397760, + "byteOffset": 21299200 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 22696960 + }, + { + "name": "model.layers.9.self_attn.c_attn.bias", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 22700032 + }, + { + "name": "model.layers.9.self_attn.c_attn.q_weight", + "shape": [ + 2048, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 1277952, + "byteOffset": 22704128 + }, + { + "name": "model.layers.9.self_attn.c_attn.q_scale", + "shape": [ + 2048, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 159744, + "byteOffset": 23982080 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 1536, + 156 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 958464, + "byteOffset": 24141824 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 1536, + 39 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 119808, + "byteOffset": 25100288 + }, + { + "name": "model.norm.weight", + "shape": [ + 1536 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3072, + "byteOffset": 25220096 + } + ], + "md5sum": "5df2483d3715bb6f76fcd01917e3bbb8" + } + ] +} \ No newline at end of file