{ "metadata": { "ParamSize": 325, "ParamBytes": 3790741504.0, "BitsPerParam": 4.500454373872803 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 65536000, "records": [ { "name": "lm_head.q_weight", "shape": [ 512, 32000 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 65536000, "byteOffset": 0 } ], "md5sum": "78a749d044adfaa06cee95edd0f5dee8" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 30744576, "records": [ { "name": "lm_head.q_scale", "shape": [ 128, 32000 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192000, "byteOffset": 0 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8192000 }, { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 8200192 } ], "md5sum": "15eefa6c04318939a79f27761ae6b13e" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "563dc84e2fa017475de201ab50ed1936" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "dfbdfc09763ca355d75a9527def3e84c" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "35111861d53e97f148aae5208b102445" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "cad6b17fe6b5258dd607082b00a6fc1e" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "17c22051c7498d079bc63986332ee5b9" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "4ac9369eefc72d6f2c3d6ad68aad4419" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "fcd0c105412bd589fca871634c0209df" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "339c8a40026fba5b626e5fd1c84ca901" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "84e31378c54cf25868f66439aadb9490" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "3f3bff6dbf5bf06e3f7757985a20d403" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "88f28cfb3568e4bf4cdd8fb06f871845" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "dfb36338b1a878bc925777ab79aeaf7e" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "8b0bb0b655c804c066ac6d1d92e91c1e" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "5e723c41d0755c27c56f869dba97aa97" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "e6731d9bfcb76760facbbdab213ec8ac" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "b9b4aa965f11b5aa309ca5b101bef87e" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "b2362631b99eebd28245449e0c288488" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "afc6d30ec59d5f10ec3e9b8acbd12236" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "ed189ce627c842e9a55125096932c62f" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "8a1e32a19d2b6d2d10999db679967837" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "3eea20958fe8c5bc0cd2981c756a2cd2" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "8d3e0b0547938e33079c5b19e4b2cd0a" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "11ae85882834b557fecb89c4535017c9" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "71a6edd10d21e966abd621458bef2843" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "d52a07f14383e8c2a8b308edf7c5b100" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "48075b24e06c0039b41e7c5fb95a5bb0" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "c53d02bffa378fac42e722c7630eb426" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 65536000, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 32000, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 65536000, "byteOffset": 0 } ], "md5sum": "0a7805c0b735f4876eb41cae01b0e911" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "749123fd7602c6b123819306791e8306" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "cd216c1098f4f6f87e0b2bd95778f8fb" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "3fa8c67d4fdbfcc8389afde68aa4eb8b" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 29253632, "records": [ { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.embed_tokens.q_scale", "shape": [ 32000, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192000, "byteOffset": 9445376 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 17637376 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 17645568 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 20463616 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 26099712 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 26107904 } ], "md5sum": "af089d9e270d4d8725b884517bd1d974" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "65b53ed7bcda391c5220a638bc9721f6" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "51d8de0b4315218af2d44e042d7aad0e" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "1a19227b6a3da8b98728c874bc609b56" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "2e4f47f9daf438281a040e167e5cb3f0" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "af54dc05a0f3d1dda1617b29c7fb563f" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "fbf0d1ceb3700f60c6dc09171295921d" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "dd134d82db19013347dd15b9d9fc63aa" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "841f022c5ae47b975b8b16b5a60b1403" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "a11f4dfeb91a6a801b0b7d365fe41b18" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "8ca1447e8a470fc3101dec26b096d878" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "fc9b2036d9b782fab6aa11b0a953df6d" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "16724c89f1363a820ae2be1023a0c436" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "d39da49ce0e393a520490fd8d418e5ee" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "bd7e72ef928f3f0296b443036abd40cf" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "8e2f5cbc652d7749e6a7d3c39bc7ab86" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "fb84a4c108f0e524b6467ac2b8d5517f" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "d4cccb51eea871bbd318d965f77f593c" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "12e2b072b1b94a113b9824a3367c9804" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "bd51c6578b1f26d96a0c77c9bf042016" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "6f5ec28f62c7443dfe9964ecdd55babb" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "edf5abc692e99ec9545eca5ac437f152" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "0683e0786ecc4521fc6c40d39d9ab129" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "5c0933dca7dbb5906eae4e4e3efc0b83" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "b83d68a5341a48d6e9d02d3e320ad9e6" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "ff8bc696f914a57f242cc6419c2c28db" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "7e1855c2505bf4ebb399748981b7c707" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "cc6ccc1346a8dc68baa88c46f0a3b35d" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "96377e98fa412f3077e6671c83f659e5" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "7f982e4b2285b7a3bf108badd475b9e5" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "32437ab684b15ac61216da632a9cc895" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "561b45869c0c3edc5dbf3e5c165bcce5" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "c26bde59eb5be7c82a1b8421c9849fb2" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "88a96034b4ed1b4ef0ad66bb9835c597" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "d344decbb0c9086ea317067afbc9a06c" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "14fcf815afa306ce00d2143e6af556cb" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "4ef8628e635948371068603af70161fe" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "607e852b8d8066d874aa92760715f472" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "babc0d144fa80b4d734db8722e7562ec" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "4e6534daeddfbc2ad4a59e4511b60352" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "4370ffe77e7dab62a69cacbefd5bfe69" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "cc79b3e7e5e566c6f1fc4fad92cf5fc1" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "7cbf6609651f8fc31cf126483fa03956" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "1efc2ea6714a6405eac6d55e7816d253" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "638e8a8a3182a0f1bb8cb03bd43c4190" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "58ff936d9fb5e64966bdb0240a39f2d3" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "0f717193e2e0b042bfd69b3a7ad19411" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "b33ceed5c95a413de3590b3d57be4cb1" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "9b84ceffe83a98c555ba66dc02c7d0ba" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "a0d867dab6cec06be77c18794c6d1c0d" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "6dfd6cab22ed22ac8ed1f9dd05d52fc1" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "4ad119f39644e166ae2ae2345c0197a4" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "32ea37656715b673f87d6fd73ee99b46" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "f9e2a4a537c4c206d0972325b2292a8a" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "5fda937a6bd8c857ee77ba307faaf58b" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "a7315c70f544b0ce785115dbb40fdbf3" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "d1bbb4e2dc4e876e9a80422fbb4051bc" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "549fb58d8da89f6337d9171af15c6f9f" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "e9db1027b1797c1ec358af6438c4c03e" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "716d61cc0c1182ae73d392effe9f2187" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "839d94d971a581cf0a17db58e1ddcbf9" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "31e00d5546eda244f787faa6aa5fb443" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "2a422277e825cd84244699ed66b26e7b" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "1b7b67272b6c50f2a0cd66d958e46d23" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "61cc7ae67ebdc9c08c10e85385ffbeb3" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "c448af309825b0dbd0fb7adce4ac56c8" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "66e6028b8d72fcf2592e78014bb0ecb3" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "3a8e9aaafbe9fac760d98e0ea1ab3cd8" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "a388f1a7362b3375574c8fd63b5c1087" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "3bd7a6f63f72fda1bd84a24b92afbca3" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "ab462233c37b693d758802b384562448" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "d5b2e8a21b803c8a8c6efcf73023694d" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "af159f5d2a5dbf0292d4494fbbf50a97" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "d893962727f50563245a587f96bb2c86" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "7d22da6078bc4ca452a2cdcd8462ef09" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "007ac04f3b00c86ea26c091a63902482" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "1d58332121cad996e9f26e20783df3e6" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "0b539aeb01011e057fbdbf512e10e62c" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 1376, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "4a4d78b033f6e76de618d557c4d74ac0" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 512, 22016 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "9184aec3df1c7720d4931cae24053fd3" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 512, 12288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "63c1711a4c5a8d500d19801ac612690e" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 21045248, "records": [ { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 344, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 128, 22016 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 128, 12288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 512, 4096 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 128, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 19996672 } ], "md5sum": "0cc9e283b0da30c406c450e5ed7737d4" } ] }