{ "metadata": { "ParamSize": 805, "ParamBytes": 31776318464.0, "BitsPerParam": 3.04023285660184 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 420679680, "records": [ { "name": "lm_head.q_weight", "shape": [ 128256, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 420679680, "byteOffset": 0 } ], "md5sum": "79cdbbd110de7ccddb1a2f1e20de7ac6" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 52584960, "records": [ { "name": "lm_head.q_scale", "shape": [ 128256, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52584960, "byteOffset": 0 } ], "md5sum": "3b30e9c6343f50a2014db07dfbe274f1" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.78.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "a7b494a842b6e7d72a57b59255484045" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.79.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "5afaa909ee6b8ce2418a868954ce8424" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 23560192, "records": [ { "name": "model.layers.78.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 0 }, { "name": "model.layers.78.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 16384 }, { "name": "model.layers.78.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23527424 }, { "name": "model.layers.79.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23543808 } ], "md5sum": "41d0b8400d88b59a867d034358fcd411" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.79.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "059a7d85ec21ea071c8381af2bed14ac" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.79.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "ef39d92b6f5455fa49778bd3963bb8de" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.79.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "697e37f857f64b4d54cf5c6545a58c7a" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.79.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "b40ffe847002075ab000d2a8cea5cba1" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 420679680, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 128256, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 420679680, "byteOffset": 0 } ], "md5sum": "8b8a02a0c14245a32993c53a4df286b0" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 52584960, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 128256, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52584960, "byteOffset": 0 } ], "md5sum": "8c3f7f6a3dadd8a3303d0c97575306d9" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "b82f2cdf9161f01f3a257210e6dea0bb" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "9021d99e6f10c7498a85b258b0c9db1c" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "7523c5ce188328e68cca18b82b72f266" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "3391a2b732afdb68726847e610d4a1fc" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 31117312, "records": [ { "name": "model.layers.79.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.79.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.79.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11763712 }, { "name": "model.layers.79.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15962112 }, { "name": "model.norm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19320832 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19337216 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 19353600 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31100928 } ], "md5sum": "6f8397f19dcf553a905a285aa02a5c4c" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 31068160, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 4198400 } ], "md5sum": "7b32bb3187133e9811f7869bd1620210" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "5a9d015e8949959bac87ad362341081c" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "21d48abcf0a261bb44f041d194ee4b22" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "72574b86e684cce8814a63021574b434" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 31068160, "records": [ { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 3358720 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 26869760 } ], "md5sum": "0b4b5789767ab69f93ab3bb991fd8d65" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "1d95b2b19abe8fade99cb5cf301e75d7" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "5089b37b21725132bf244cf93afc5874" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "2146dee05b38ccf4a6915cbb3ae7721b" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "6431c5131a115c69d8687c340763e2ed" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "33eb6e733bd215ffadc0639f47f85473" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "d2cbdaa4be2069ef4e112323ed9919e3" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 31117312, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3358720 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 3375104 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15122432 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15138816 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 15155200 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26902528 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 26918912 } ], "md5sum": "caac8ef607ba128f493ef871b3d6c3d9" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "7efda4188558a786382fd72c76eafcd1" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "d7021b32dac33c8a58f74f24a19fd983" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "7abcbb5242ba1ced66655bb9cdf9a265" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "77d07962f836339f528b07673fdb6539" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "2c76c1f794e309ac1614ddff620f7fd3" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "00287df312930aaff8bc0afce7c7d992" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "574d306ed44157352a8a6ac450e732fd" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "6cb374c34d43b3bda41a3bcc20e91615" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 30269440, "records": [ { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3358720 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 3375104 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15122432 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 15138816 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 19337216 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 22695936 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 26894336 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 30253056 } ], "md5sum": "7d11b5bf2c7dfa2ea80ed04612f0d019" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "fa6abf918e2cba5baa602c47b37727ec" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "162120f64eac538bdbc18ed9d164aa38" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "cf529e13d0a28768f15eb2d96a47be9a" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "673768d25d49f7cc3525e4e5f20bbb01" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "49dc3ac2600f045763b41e5b0451948a" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "a75e4683bfe12c348c4e8f8f0ea0c2cc" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "8e87607706352f9f0c658f2f30b1bd22" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "54a06b6517666da3d7f271e0044d61f3" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 31100928, "records": [ { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11763712 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15962112 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19320832 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 19337216 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31084544 } ], "md5sum": "4f1050b26006ec257aecb49ade39f629" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 31068160, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 4198400 } ], "md5sum": "a4e0f6947b19f9b7219fa61272eea4ff" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "ab673a66b934a744f1d9efd50765507a" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "9b66573f29463afb9d8331a9705d1af2" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "d613aeeaa93405e664ffa2a3cc588257" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "a3d6ea4031e8e46f50fce6416dc207d8" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "35448ff6b1aa9513ed35e5b57dc0e19d" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "3822c3342588492b7b412d62851cfade" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 22712320, "records": [ { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 3358720 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 7557120 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 10915840 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 10932224 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 22679552 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 22695936 } ], "md5sum": "6d7993aa2ea490cd2856b677bd7729b3" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "3942b36b592b53da4ebc8abb695f3db4" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "1ac979e0c2775869b9ca326522f9734e" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "4f60a10700340c096b4dead70d9b6268" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "53545cb46e48722d3567abc3a677df24" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "fa614df1d0324627f48c7f0c9fcb9687" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "48c9d5a3710bdcb9cf572e767d97710a" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "a366c667027f4fc409215aebc4efcc31" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "d25a823c473265e49522aab71be34c7a" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 31100928, "records": [ { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11763712 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 11780096 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23527424 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 23543808 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 27742208 } ], "md5sum": "c8ec37280ac85cc5c96be4dbca1d4b37" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "546dadc3565b34f60167de534790be83" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "9ecc7f3527689391882f6000e81c31b0" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "3e3e59e4a99e7a9806f655f704dc70c8" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "7d82284e2f0cf6c60406a6e02376bd1a" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "8b50484ccf2d837042b58a1eb259d90b" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "6c7b439e92d9e940919473f8ebe99753" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "bf119fb2787116fd582d9bba550bcd0b" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "3c4f41cb4f44a8267e3ca67f5ed62508" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 31117312, "records": [ { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11747328 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15945728 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19304448 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19320832 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19337216 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 19353600 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31100928 } ], "md5sum": "2714eb8a7458f6b74aff9552f572b4fb" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 31068160, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 4198400 } ], "md5sum": "4c915342c44dbe5ee9a38ac7095db674" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "6ea2d5d6db9bf65d390a0276a98633c9" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "dc560a16cc7946e22de920a8f6de703f" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "2607cb04f50769c84154b4f7c45f6056" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "a5302ba67d3bbefa995039869e6a8f22" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "6b1ce68d4e9e4d85c7f4b38b5efe9ce8" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "44c0281c903b1712d7ae71dcf4947c98" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "cd01533c8b4721d8ff627059b7cc4b4b" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "bf1f7b509c2d2963ffac1487c37d3e34" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "a0c08e392b1bb60286378ca288a735c4" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "45bbaea2c1f015acc7cf1321faf11b21" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 30269440, "records": [ { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3358720 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 3375104 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15122432 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 15138816 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 19337216 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 22695936 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 26894336 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 30253056 } ], "md5sum": "99bce1b9114cde1414fb0ae3f1094c60" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "be83a617003dc96cbf2053fbd050e00d" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "67dfb500296a6a256548d2fd1c47be54" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "80c71deda6c8d9b3d6e0c20e254fe920" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "d0dd28446eb0199978718d3b3970857c" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "d76c78dd46d04d849e8d01e6b52f25e1" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "c3d3e83889eef688383ee2202ad00e07" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 31117312, "records": [ { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11763712 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 11780096 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23527424 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 23543808 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 27742208 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31100928 } ], "md5sum": "1cb20e1716bad3289753847f06da6d29" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "8ea3e0a0e37899be5bab0bdf7cbc57ff" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "4aca62becbf6cc729d7e6aef0999a913" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "3e6f3f0e50b4d324495aab3160961083" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "4a2c70ef3a66bb61fc35f43a107c8d59" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "9b32f32e8b1d1fd719f5fadad7937ce4" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "fd1b9fce3abe778e9b654f015a91ef0f" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "6eac00b45db06328cafc7d6fcfafadd2" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 26894336, "records": [ { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11763712 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15962112 }, { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 19320832 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 23519232 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26877952 } ], "md5sum": "79893ca5618452bd49d6eb21e5c1a27b" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "3f0c693ce55ccdbf2c3d5ad2cb33a478" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "013a6ad49c2c89a770d0b71baaa5a202" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "918baa92d5735a0148edec7aa7ba8ac3" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "037c6f0beda4210e71295c6521d55dfc" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "6794aa484651ba1f514b8be9bf5f1704" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "875cbb2031a94ae1f853981809b8587f" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "ede4fc697012b793b478d8dbe77c4bf7" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "d91bc729e1a85bb7dc6a6230c762aaf1" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 31100928, "records": [ { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11763712 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 11780096 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23527424 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 23543808 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 27742208 } ], "md5sum": "07831c160677febc293de24ebeaee1f5" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "83e87195b485e1813b80721ccdedc4d3" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "4d909964566221a2a05ffb8e7fe01dc0" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "cb62e3a4a4fb14ce35120e60019b85b2" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "51ffaf0b31cab2039f1e3ef69b426cbc" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "a5da1369a747d0223c2245787565b600" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "068270946e8e0b94c743b1f570ad0456" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "054a2a63f28c370a626ed2d5fc54764c" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "97a643f062521abc831f7cacfcc67d67" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 31117312, "records": [ { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11747328 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15945728 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19304448 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19320832 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19337216 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 19353600 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31100928 } ], "md5sum": "dabac5556e2d11b86e247eace4a23a77" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 31068160, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 0 }, { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 4198400 } ], "md5sum": "4f6076424dfcbab6a1b97f4b9190eee4" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "d26049e021ad899f06f6b85ce25a0226" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "a7a5fe64d267f4fb48a5d21b2665b76c" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "25ce498101097e8054a3b21a698e4141" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "ae744aa77b778893fd6665ae437fd6c7" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "0a622fceb54960e5aa5fe935cf6352c9" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "574f2b7e69c232d661e0eece3fc0bb13" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "908d156e2e53561115ed30635cd3a1b2" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "4da05985a10aab57721bddf5596a4421" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "1ca84ba5dcde33f2d17b55da92835fd7" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "5b18be04adfcfdaa0d8d4dbbb166de4a" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 30269440, "records": [ { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3358720 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 3375104 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15122432 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 15138816 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 19337216 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 22695936 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 26894336 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 30253056 } ], "md5sum": "d7f13270e21c825c9e0f18874aaafb81" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "172be0424d1897c86e4dea8d668f961b" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "dbc94bcf8183d7d9609e3c37e37bb1ca" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "acf5655337f0c0e05adef76409060404" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "0d3cca735c48a57939ced788634bbd28" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "42c76d7d4eac5d3dac12f2042071d753" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "c4118a266c0cda1550d297d153749240" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 31117312, "records": [ { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11763712 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 11780096 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23527424 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 23543808 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 27742208 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31100928 } ], "md5sum": "7b6e7d6eefcc9ea2818a9708b4f1da70" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "8f47b61f443463f7a0c27dc599d39b18" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "c44caceb4fe53ab301b92a19da3a386e" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "4e1b264d9e67a68c7235b774bab88ef5" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "518b11bc9b5e5f482864a06b4a40fc16" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "39409c4c628752ace29be02806776f8e" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "5007e02dff6e1ad4b78c33a5cd5c4c6b" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "36aa7935e6228ffcc35d90057d0160ab" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 26894336, "records": [ { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11763712 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15962112 }, { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 19320832 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 23519232 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26877952 } ], "md5sum": "f468ba4244fd3f70815035540e0510e7" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "1f56ff72f84dbd031f690b4160bb3f68" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "8cab4529afddd38601da177c4f4836bd" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "7b51246adf348ffd195531e806bcb534" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "4adc63be9ed1998130780a2864731a6b" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "10712d57bfc1ffa1dc4cfa4f64985ec7" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "d5b8efc7319653852fb002ea580d1843" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "6f6cfb266b551435acd874e25ffbaa1a" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "bafc49958ff513d1356ffcd631b91293" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 31100928, "records": [ { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11763712 }, { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 11780096 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23527424 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 23543808 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 27742208 } ], "md5sum": "42979570be595430e4b23fd5281a70f2" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "c5303995cfed1bdfe7973a43d3ab1f9d" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "1c3de56b47ae0f62ea38b7502f12b84e" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "a71c322be37a9a7886e193387affd261" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "1794aa25f83b0fb9c5d819691b893ed1" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "1fea0f5c0b1c2b827f3c3e2a5ef5f758" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "4a9300e211ba14bb1e054b8c74d23730" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "9db2edfdbe4e0657fae707eb51f7e028" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "b07642c5024a2a657e97074ec458b8d8" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 31117312, "records": [ { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11747328 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15945728 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19304448 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19320832 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19337216 }, { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 19353600 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31100928 } ], "md5sum": "eabee2f9ab92842183e87dc223d90009" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 31068160, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 0 }, { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 4198400 } ], "md5sum": "30b53461342fbc2c797adff85f9366fc" }, { "dataPath": "params_shard_163.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.32.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "73149fafdf03eb08ca1c8fb8e61eea64" }, { "dataPath": "params_shard_164.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "f0f4552979d86929e06df418e80ebd9c" }, { "dataPath": "params_shard_165.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "b94d938d07caa89bb35894ea758630c3" }, { "dataPath": "params_shard_166.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.32.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "af234ea2cbcea120b6837e1c12ca7092" }, { "dataPath": "params_shard_167.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.32.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "eac5c2b117b8409e55076c9f1bdc74ce" }, { "dataPath": "params_shard_168.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "c80b53aacb20c5676ef7ec612607fe16" }, { "dataPath": "params_shard_169.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "f7b7da3d0f08043e70b99f315ad7b03f" }, { "dataPath": "params_shard_170.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.33.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "fca06c40ae2a3f1ea892fe980b5a530b" }, { "dataPath": "params_shard_171.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.33.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "5ef30eb53ef019e81caa284dc55ce9da" }, { "dataPath": "params_shard_172.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.33.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "d5cd403e3e382705f37a22fb175fcd32" }, { "dataPath": "params_shard_173.bin", "format": "raw-shard", "nbytes": 30269440, "records": [ { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3358720 }, { "name": "model.layers.32.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 3375104 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15122432 }, { "name": "model.layers.32.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 15138816 }, { "name": "model.layers.32.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 19337216 }, { "name": "model.layers.33.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 22695936 }, { "name": "model.layers.33.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 26894336 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 30253056 } ], "md5sum": "ce04c8ca2f38d4e1395c88dd9e5d0c3c" }, { "dataPath": "params_shard_174.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.34.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "bc22a3549a56bf22fe17f2004121e938" }, { "dataPath": "params_shard_175.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "538c33199e314389ad8211ca3d80140d" }, { "dataPath": "params_shard_176.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "8930f7d891c246fc119ae97597faaf14" }, { "dataPath": "params_shard_177.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.34.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "699754b50406b68c05d15b1ff4bfe4ba" }, { "dataPath": "params_shard_178.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.34.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "a01143ead721a2a30686497e7c2afeba" }, { "dataPath": "params_shard_179.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.35.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "50ff92ad68f01b3434bfcd4a12f57f56" }, { "dataPath": "params_shard_180.bin", "format": "raw-shard", "nbytes": 31117312, "records": [ { "name": "model.layers.33.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11763712 }, { "name": "model.layers.34.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 11780096 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23527424 }, { "name": "model.layers.34.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 23543808 }, { "name": "model.layers.34.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 27742208 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31100928 } ], "md5sum": "e0329e0d6b2b14385247f977f4a28c7b" }, { "dataPath": "params_shard_181.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "40f84e7ff2af9bf9196de4a33e4cf801" }, { "dataPath": "params_shard_182.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "5591299adfe8c138137e2c052ec17b8e" }, { "dataPath": "params_shard_183.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.35.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "c62225c3f4c474fe464b02dfa8fc6540" }, { "dataPath": "params_shard_184.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.35.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "ff71307d37e26c96b4b952a876bb475a" }, { "dataPath": "params_shard_185.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.36.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "876aeec6869ebbed6d90d0737a73fa12" }, { "dataPath": "params_shard_186.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.36.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "2103c34abcad05aeed4e8d3c245b10eb" }, { "dataPath": "params_shard_187.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.36.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "7218a22920352a90745ae74e4ca58da5" }, { "dataPath": "params_shard_188.bin", "format": "raw-shard", "nbytes": 26894336, "records": [ { "name": "model.layers.35.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.35.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11763712 }, { "name": "model.layers.35.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15962112 }, { "name": "model.layers.36.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 19320832 }, { "name": "model.layers.36.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 23519232 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26877952 } ], "md5sum": "e97411b24eed3b70b47cbb31f536afb4" }, { "dataPath": "params_shard_189.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "19fe866ac68693e567a5c4f0de6b68d0" }, { "dataPath": "params_shard_190.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "e4ab2a94cc7c569fbd05e41468b319d4" }, { "dataPath": "params_shard_191.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.37.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "83c65ba5244fea1ccea6451d10b5511e" }, { "dataPath": "params_shard_192.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "d5ee69673f7d8e1886cc2374b83e769c" }, { "dataPath": "params_shard_193.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "69c836efc877ad6e92fa9df264a9303e" }, { "dataPath": "params_shard_194.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.37.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "c2abbd1f851192a726a10cd63b18c3e2" }, { "dataPath": "params_shard_195.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.37.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "56332ae2c9c1e14610d1f83fc28a19d7" }, { "dataPath": "params_shard_196.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.38.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "d401ad9e22a12d5f2abe86c5caa08cef" }, { "dataPath": "params_shard_197.bin", "format": "raw-shard", "nbytes": 31100928, "records": [ { "name": "model.layers.36.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11763712 }, { "name": "model.layers.37.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 11780096 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23527424 }, { "name": "model.layers.37.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 23543808 }, { "name": "model.layers.37.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 27742208 } ], "md5sum": "3b6b2358bce51c681365b2f4956c7c57" }, { "dataPath": "params_shard_198.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "05ecf3beb0277c095808ef1a5943c136" }, { "dataPath": "params_shard_199.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "241d92b247c245e7e0ac2da4f0d5bd62" }, { "dataPath": "params_shard_200.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.38.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "fdecd51d38dc83044fc47cb0ae0cb435" }, { "dataPath": "params_shard_201.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.38.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "49d3307a595f781a81f34a272c655f12" }, { "dataPath": "params_shard_202.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.39.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "06e3dd351f6ecfd40c646059f16021a9" }, { "dataPath": "params_shard_203.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "3b2e0909542eb62c028e64172f46e279" }, { "dataPath": "params_shard_204.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "62f0a6b3ac812f7d9455f448ffcb0b6f" }, { "dataPath": "params_shard_205.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.39.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "358b1193d40d7c4d3f70245b12aa2129" }, { "dataPath": "params_shard_206.bin", "format": "raw-shard", "nbytes": 31117312, "records": [ { "name": "model.layers.38.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.38.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11747328 }, { "name": "model.layers.38.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15945728 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19304448 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19320832 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19337216 }, { "name": "model.layers.39.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 19353600 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31100928 } ], "md5sum": "4d6fafc77d655743d145ba393d881192" }, { "dataPath": "params_shard_207.bin", "format": "raw-shard", "nbytes": 31068160, "records": [ { "name": "model.layers.39.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 0 }, { "name": "model.layers.39.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 4198400 } ], "md5sum": "14d2e0bc76058a10dce186660e72e17e" }, { "dataPath": "params_shard_208.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.40.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "c42829898f93df85944c0fedf30997cc" }, { "dataPath": "params_shard_209.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "04394ed5e3865d539ef72795043d224a" }, { "dataPath": "params_shard_210.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "5f5c2d0fdc01b6f5f544901104e08695" }, { "dataPath": "params_shard_211.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.40.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "97de6a18cf242be2a183b1ff794a7d5a" }, { "dataPath": "params_shard_212.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.40.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "520c87a2811aa53d65f32381b0237f37" }, { "dataPath": "params_shard_213.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "ab016179053ac29bc00f73d45ff98898" }, { "dataPath": "params_shard_214.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "2ebd840ac59090ff7da413d4cc3a84fa" }, { "dataPath": "params_shard_215.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.41.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "5c7f6842cceeb439a08bbd4f77fc612f" }, { "dataPath": "params_shard_216.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.41.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "6fa171fe6de3d2997c727a5fe88e1178" }, { "dataPath": "params_shard_217.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "3fa3c72e836f68c5d40fccb61bc0cf14" }, { "dataPath": "params_shard_218.bin", "format": "raw-shard", "nbytes": 30269440, "records": [ { "name": "model.layers.39.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.40.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3358720 }, { "name": "model.layers.40.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 3375104 }, { "name": "model.layers.40.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15122432 }, { "name": "model.layers.40.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 15138816 }, { "name": "model.layers.40.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 19337216 }, { "name": "model.layers.41.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 22695936 }, { "name": "model.layers.41.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 26894336 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 30253056 } ], "md5sum": "8c4496bfc94552d61c0d182fc084409a" }, { "dataPath": "params_shard_219.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "957e51f3ad0776015c785be0bf79da16" }, { "dataPath": "params_shard_220.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "4ee1b4147bed836fcfd9bf492f345c68" }, { "dataPath": "params_shard_221.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "f945bd179235defc00b62b5770f086fe" }, { "dataPath": "params_shard_222.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "3ebb0c3a508d54fd0fb5044cf18c40da" }, { "dataPath": "params_shard_223.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "0b98cde111da46f3a946380406313141" }, { "dataPath": "params_shard_224.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "fc013304ac4ace481b14d7c49e2966a2" }, { "dataPath": "params_shard_225.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "2639b0e379aeb19d979f2fc43e853609" }, { "dataPath": "params_shard_226.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "5df595698a188d6ab3187c7f90236f72" }, { "dataPath": "params_shard_227.bin", "format": "raw-shard", "nbytes": 31100928, "records": [ { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11763712 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 11780096 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23527424 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 23543808 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 27742208 } ], "md5sum": "71cc8b9df4656cf45616d9b750e23e4b" }, { "dataPath": "params_shard_228.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "287e82dc565c2726d617afb009692160" }, { "dataPath": "params_shard_229.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "44e54d00ef58276303e6bdbdb0375278" }, { "dataPath": "params_shard_230.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "1e254cc58c2a200e17353789bcb4fffc" }, { "dataPath": "params_shard_231.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "d43bd645d9afb081fdd7f39b343af674" }, { "dataPath": "params_shard_232.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.41.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "236c110f2f2840e78210550645b17576" }, { "dataPath": "params_shard_233.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.42.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "9087b2e8d805b71c42102bb4f351161a" }, { "dataPath": "params_shard_234.bin", "format": "raw-shard", "nbytes": 31100928, "records": [ { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11747328 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15945728 }, { "name": "model.layers.41.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19304448 }, { "name": "model.layers.41.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 19320832 }, { "name": "model.layers.41.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31068160 }, { "name": "model.layers.42.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31084544 } ], "md5sum": "39a2c36063fc64ad5122c5034ec3e7f8" }, { "dataPath": "params_shard_235.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "11f7de14aeb0c0579dd1bce561e0dd73" }, { "dataPath": "params_shard_236.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "10f91ca3f51462a32795aacfdb8a3841" }, { "dataPath": "params_shard_237.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.42.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "6194b469e0869bd15edbaa1e9c76ff19" }, { "dataPath": "params_shard_238.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.42.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "95ba0bab1b63e8801a5ddf7ef3860617" }, { "dataPath": "params_shard_239.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.43.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "fef775966e3548b7e7c5c7fbac9e6d1c" }, { "dataPath": "params_shard_240.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "1a2f56bb4ebc0f8ffd1b175ddedb5473" }, { "dataPath": "params_shard_241.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "21bea814718e73c0415edd2326a98784" }, { "dataPath": "params_shard_242.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.43.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "522f98957f243ec7998a0cb9f08523bc" }, { "dataPath": "params_shard_243.bin", "format": "raw-shard", "nbytes": 31100928, "records": [ { "name": "model.layers.42.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.42.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.42.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11763712 }, { "name": "model.layers.42.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15962112 }, { "name": "model.layers.43.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19320832 }, { "name": "model.layers.43.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 19337216 }, { "name": "model.layers.43.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31084544 } ], "md5sum": "d9f770610a52fecb8a46e22c5cc30c7a" }, { "dataPath": "params_shard_244.bin", "format": "raw-shard", "nbytes": 31068160, "records": [ { "name": "model.layers.43.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 0 }, { "name": "model.layers.43.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 4198400 } ], "md5sum": "d7e309fa76463d77dc8e3435308773f7" }, { "dataPath": "params_shard_245.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.44.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "ea487b1d07da4adab3160179c7014ad7" }, { "dataPath": "params_shard_246.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.44.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "79d2389ff5c4124d2b9c2ae162513924" }, { "dataPath": "params_shard_247.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.44.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "d48aefee9f7db4a9d198d96100fe85d9" }, { "dataPath": "params_shard_248.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "63d23d1e98b618b29495c16ecf6288ee" }, { "dataPath": "params_shard_249.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "74080dad057426f845e457bb34a7e5d9" }, { "dataPath": "params_shard_250.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.45.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "955e06b79eebff3d644f0d7d3c27ba83" }, { "dataPath": "params_shard_251.bin", "format": "raw-shard", "nbytes": 22712320, "records": [ { "name": "model.layers.43.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.44.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 3358720 }, { "name": "model.layers.44.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 7557120 }, { "name": "model.layers.44.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 10915840 }, { "name": "model.layers.44.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 10932224 }, { "name": "model.layers.44.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 22679552 }, { "name": "model.layers.45.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 22695936 } ], "md5sum": "2697843ee12f3473efac4131fc9b1e53" }, { "dataPath": "params_shard_252.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "f8dab8040bfd516e013a4683601e3f23" }, { "dataPath": "params_shard_253.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "31f59ce0e1e3dd3c622f93279f6cb666" }, { "dataPath": "params_shard_254.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.45.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "608552569e685977ae7f97a2e07ded78" }, { "dataPath": "params_shard_255.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.45.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "7bc55de92185fc310ef82c6f93c2ae61" }, { "dataPath": "params_shard_256.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.46.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "4436711989be1fceee06fba9b82412e7" }, { "dataPath": "params_shard_257.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "17ae88f77e0743ce409bcb4b1ab8b3d2" }, { "dataPath": "params_shard_258.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "5f591c9b73d52af3b6ed049aff80ee23" }, { "dataPath": "params_shard_259.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.46.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "4d461a1d95e1f66ce720e7c49ab9fb3f" }, { "dataPath": "params_shard_260.bin", "format": "raw-shard", "nbytes": 31068160, "records": [ { "name": "model.layers.45.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.45.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.45.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11763712 }, { "name": "model.layers.45.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15962112 }, { "name": "model.layers.46.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 19320832 } ], "md5sum": "4d6cd28c9beeca0af8aa39606f21b8ef" }, { "dataPath": "params_shard_261.bin", "format": "raw-shard", "nbytes": 31068160, "records": [ { "name": "model.layers.46.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 0 }, { "name": "model.layers.46.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 4198400 } ], "md5sum": "d906f0718b295a0f77c4a1523e8c0e94" }, { "dataPath": "params_shard_262.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.47.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "afa50c16d1a6086a5091008fb1892295" }, { "dataPath": "params_shard_263.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "165c4ded8825dff48392a6345b80a570" }, { "dataPath": "params_shard_264.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "1685d8e93bbf9a8176fe9eb6c0f0e850" }, { "dataPath": "params_shard_265.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.47.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "8195047858a3e08d93b79b626a7e15b0" }, { "dataPath": "params_shard_266.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.47.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "9219787f636fc35fc28569063f0fda82" }, { "dataPath": "params_shard_267.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.48.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "5fa06e749aa3b4ce3f7c91e3b4c85dc9" }, { "dataPath": "params_shard_268.bin", "format": "raw-shard", "nbytes": 22745088, "records": [ { "name": "model.layers.46.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.46.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3358720 }, { "name": "model.layers.46.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3375104 }, { "name": "model.layers.47.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3391488 }, { "name": "model.layers.47.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 3407872 }, { "name": "model.layers.47.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15155200 }, { "name": "model.layers.47.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 15171584 }, { "name": "model.layers.47.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 19369984 }, { "name": "model.layers.48.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 22728704 } ], "md5sum": "0f542d99cd9b653d4b0bf82a6b004f37" }, { "dataPath": "params_shard_269.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.48.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "94fc05aea36b09d469becb3f13860218" }, { "dataPath": "params_shard_270.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.48.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "84ce2d9c88615f93bdc206222cd3d5b2" }, { "dataPath": "params_shard_271.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.48.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "4d14fc90894dcae4e327bd2af70f762a" }, { "dataPath": "params_shard_272.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.48.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "911716952cd15efcd81fa6057960e0d8" }, { "dataPath": "params_shard_273.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.49.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "7ef2cc2c8a3594af1aeb7b48128caeec" }, { "dataPath": "params_shard_274.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.49.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "bdc63c6c35351c670810b5271c95d6cc" }, { "dataPath": "params_shard_275.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.49.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "6904cbc7443e52e04a194a954de7725a" }, { "dataPath": "params_shard_276.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.49.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "70385f3a6ee3c547b8d76c5aa591a83d" }, { "dataPath": "params_shard_277.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.49.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "3ea65371824ccd24d321a104ea87cac3" }, { "dataPath": "params_shard_278.bin", "format": "raw-shard", "nbytes": 26894336, "records": [ { "name": "model.layers.48.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.48.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.48.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11763712 }, { "name": "model.layers.48.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15962112 }, { "name": "model.layers.49.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 19320832 }, { "name": "model.layers.49.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 23519232 }, { "name": "model.layers.49.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26877952 } ], "md5sum": "cc021b1c69ef95f85ee3ba5f4e3ea2a8" }, { "dataPath": "params_shard_279.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.50.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "543572eec04507cdb18a8db520f08dbc" }, { "dataPath": "params_shard_280.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.50.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "11c32cce2adc64001006f4ac242863fb" }, { "dataPath": "params_shard_281.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.50.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "f7e59f9a27acefe6643d22ac6a3936b9" }, { "dataPath": "params_shard_282.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.50.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "1f92881b093cbb5ce56ca19b30e426c8" }, { "dataPath": "params_shard_283.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.50.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "5cad703ba954c1e923829d564d09ebf1" }, { "dataPath": "params_shard_284.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.51.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "b211388796819b029ee29dcc23a98da4" }, { "dataPath": "params_shard_285.bin", "format": "raw-shard", "nbytes": 31117312, "records": [ { "name": "model.layers.49.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.49.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.50.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11763712 }, { "name": "model.layers.50.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 11780096 }, { "name": "model.layers.50.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23527424 }, { "name": "model.layers.50.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 23543808 }, { "name": "model.layers.50.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 27742208 }, { "name": "model.layers.51.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31100928 } ], "md5sum": "788fb4a4a47432952f9e8dd713e687cb" }, { "dataPath": "params_shard_286.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.51.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "ebc273ee4a6c850fcf4bbab13f7bedbc" }, { "dataPath": "params_shard_287.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.51.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "6b9bf4c6296bee6705ed9edaf3290f82" }, { "dataPath": "params_shard_288.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.51.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "cf77dc8e8b511f833c53ed09bd17c169" }, { "dataPath": "params_shard_289.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.51.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "5c56984c66bba9ddce651a320787ae78" }, { "dataPath": "params_shard_290.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.52.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "536d57e7c7b61009db0e62417efbb3a4" }, { "dataPath": "params_shard_291.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.52.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "5a70f4ee59a22575ce0a203832cdabb9" }, { "dataPath": "params_shard_292.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.52.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "579c9e05885e158ff88e9cd870b1962f" }, { "dataPath": "params_shard_293.bin", "format": "raw-shard", "nbytes": 26894336, "records": [ { "name": "model.layers.51.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.51.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.51.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11763712 }, { "name": "model.layers.51.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15962112 }, { "name": "model.layers.52.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 19320832 }, { "name": "model.layers.52.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 23519232 }, { "name": "model.layers.52.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26877952 } ], "md5sum": "783c9338fa120e3f36a8be6d8d2dfc8a" }, { "dataPath": "params_shard_294.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.52.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "cf159c5d60489a4d0db03a0330dd06a4" }, { "dataPath": "params_shard_295.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.52.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "679abefffac9bb8e2eeda2695931c039" }, { "dataPath": "params_shard_296.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.53.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "de51b0921287e8627e9737aca0852d14" }, { "dataPath": "params_shard_297.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.53.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "babf2ac4eba0ebda0b3191bd24dd97a2" }, { "dataPath": "params_shard_298.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.53.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "f2698420f460893a1b14234263feb000" }, { "dataPath": "params_shard_299.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.53.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "cd8ea1ee642120f1f89d6427a974fa35" }, { "dataPath": "params_shard_300.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.53.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "c49e69e9764786ce6ca9ec99aa222466" }, { "dataPath": "params_shard_301.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.54.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "061c1e2c3e818091113b6a398159dd9a" }, { "dataPath": "params_shard_302.bin", "format": "raw-shard", "nbytes": 31100928, "records": [ { "name": "model.layers.52.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.52.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.53.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11763712 }, { "name": "model.layers.53.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 11780096 }, { "name": "model.layers.53.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23527424 }, { "name": "model.layers.53.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 23543808 }, { "name": "model.layers.53.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 27742208 } ], "md5sum": "517c78d92d193228f06361c2704b1d64" }, { "dataPath": "params_shard_303.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.54.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "c7e0f8a48e3bfcc9af344746f1bec5a3" }, { "dataPath": "params_shard_304.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.54.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "ab628697a59bb432d004bf83c99e30de" }, { "dataPath": "params_shard_305.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.54.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "63c5db62a180d667c71bf4a6634d2b69" }, { "dataPath": "params_shard_306.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.54.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "1d5b4b9c67bab6aad9f6338d3d69e531" }, { "dataPath": "params_shard_307.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.55.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "f0652c9d425680f694f8d11b13e40816" }, { "dataPath": "params_shard_308.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "14de629b5da0b34389fe2cadbea949c0" }, { "dataPath": "params_shard_309.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "7165cebe5c7ed88ee883df3ebaf4ed1e" }, { "dataPath": "params_shard_310.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.55.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "b4e35b505385ae5daa99c28ceb6f6492" }, { "dataPath": "params_shard_311.bin", "format": "raw-shard", "nbytes": 31117312, "records": [ { "name": "model.layers.54.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.54.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11747328 }, { "name": "model.layers.54.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15945728 }, { "name": "model.layers.54.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19304448 }, { "name": "model.layers.54.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19320832 }, { "name": "model.layers.55.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19337216 }, { "name": "model.layers.55.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 19353600 }, { "name": "model.layers.55.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31100928 } ], "md5sum": "1aec315da1ffe96dbc4e95ae99c921c2" }, { "dataPath": "params_shard_312.bin", "format": "raw-shard", "nbytes": 31068160, "records": [ { "name": "model.layers.55.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 0 }, { "name": "model.layers.55.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 4198400 } ], "md5sum": "112e6607a463fb0e4d1d7ea378d889eb" }, { "dataPath": "params_shard_313.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.56.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "63d8118088309b8ff7b50ca43193c60a" }, { "dataPath": "params_shard_314.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.56.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "51980f9f99479fa3d1991e9d6e249201" }, { "dataPath": "params_shard_315.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.56.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "4e06aab8439948471b6e9702c817868c" }, { "dataPath": "params_shard_316.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.56.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "0be825e789de71459c9d38fa8fac65bb" }, { "dataPath": "params_shard_317.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.56.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "73eea1166bcb6376f5e12abc44551361" }, { "dataPath": "params_shard_318.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.57.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "b63095e6f1509b2929c4792cd6d9a925" }, { "dataPath": "params_shard_319.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.57.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "7b4a667fa3eed9f56e95d98bcc3a6bd0" }, { "dataPath": "params_shard_320.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.57.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "f8149f51eddd259d173242c02237a751" }, { "dataPath": "params_shard_321.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.57.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "1afebf0312fb1503d9bb7d4f35ed572c" }, { "dataPath": "params_shard_322.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.57.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "8840d363d1fe9ec152c0c3c884678586" }, { "dataPath": "params_shard_323.bin", "format": "raw-shard", "nbytes": 30269440, "records": [ { "name": "model.layers.55.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.56.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3358720 }, { "name": "model.layers.56.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 3375104 }, { "name": "model.layers.56.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15122432 }, { "name": "model.layers.56.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 15138816 }, { "name": "model.layers.56.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 19337216 }, { "name": "model.layers.57.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 22695936 }, { "name": "model.layers.57.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 26894336 }, { "name": "model.layers.57.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 30253056 } ], "md5sum": "56a253f5052fe183c6cb287c7f991e81" }, { "dataPath": "params_shard_324.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.58.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "757b668328ba63deb853d87cb0ffb311" }, { "dataPath": "params_shard_325.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.58.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "8a5da53d1c1c18edb84b5556c8447824" }, { "dataPath": "params_shard_326.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.58.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "2639f2132d4be9946c2885ae1ab63c7b" }, { "dataPath": "params_shard_327.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.58.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "eee87b51a4b468bf1fd02464fe09e411" }, { "dataPath": "params_shard_328.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.58.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "0063c32518767ed5b1d95fd5367ebe77" }, { "dataPath": "params_shard_329.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.59.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "e5e0d0bee5a2d331524cc568c2e94a86" }, { "dataPath": "params_shard_330.bin", "format": "raw-shard", "nbytes": 31117312, "records": [ { "name": "model.layers.57.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.57.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.58.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11763712 }, { "name": "model.layers.58.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 11780096 }, { "name": "model.layers.58.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23527424 }, { "name": "model.layers.58.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 23543808 }, { "name": "model.layers.58.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 27742208 }, { "name": "model.layers.59.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31100928 } ], "md5sum": "888bfac2bfb8eaca6c9c2bbace82537b" }, { "dataPath": "params_shard_331.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.59.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "5d51b6693dbdbecdf0109ece08125f6c" }, { "dataPath": "params_shard_332.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.59.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "06fc54f807880bb21eec5b5be909c4f4" }, { "dataPath": "params_shard_333.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.59.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "fed3599a9e0cd58a5a0ff652c019015c" }, { "dataPath": "params_shard_334.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.59.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "4ca9de0ac832a539cbc11a474f510d3e" }, { "dataPath": "params_shard_335.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.60.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "e8783ff87570b118e00f7577d4821320" }, { "dataPath": "params_shard_336.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.60.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "e8bab125bac7293dfa3829f76ca9ac83" }, { "dataPath": "params_shard_337.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "5ac99e7af6c163ee9fd1c908fd5222e1" }, { "dataPath": "params_shard_338.bin", "format": "raw-shard", "nbytes": 26927104, "records": [ { "name": "model.layers.59.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.59.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.59.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11763712 }, { "name": "model.layers.59.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15962112 }, { "name": "model.layers.60.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 19320832 }, { "name": "model.layers.60.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 23519232 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26877952 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26894336 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26910720 } ], "md5sum": "312dd9a20f74870378f85c9a691f266b" }, { "dataPath": "params_shard_339.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "745de801519d0beb5f85e9c906464e90" }, { "dataPath": "params_shard_340.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "2147cab5b36ff2efacc6628e380fe105" }, { "dataPath": "params_shard_341.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "6c636b78f31152713800ce78637e7dfc" }, { "dataPath": "params_shard_342.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "35c9d44d80887961910f32484ff44247" }, { "dataPath": "params_shard_343.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "ba77d65afb818284e72c34a53a2e9c66" }, { "dataPath": "params_shard_344.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "7f3bba8acf23ba3d324797d3015a78df" }, { "dataPath": "params_shard_345.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "0cfc916f4d513ccd4930cead36daaea4" }, { "dataPath": "params_shard_346.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "cba0f9afe96dad1ead23599725c38837" }, { "dataPath": "params_shard_347.bin", "format": "raw-shard", "nbytes": 31100928, "records": [ { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11763712 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15962112 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19320832 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 19337216 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31084544 } ], "md5sum": "a527a3a89ed1619865e88e9afbf38896" }, { "dataPath": "params_shard_348.bin", "format": "raw-shard", "nbytes": 31068160, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 4198400 } ], "md5sum": "ca1a13cec5750139aa001d05069d9361" }, { "dataPath": "params_shard_349.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "e73492c4c6c7ad5a24205e4125b42923" }, { "dataPath": "params_shard_350.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "1e1511af563f8a4d20e4d4a1d4bc8630" }, { "dataPath": "params_shard_351.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.60.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "23a1bee3401d40537b395fb673738802" }, { "dataPath": "params_shard_352.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.60.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "6aa2f2d8cfdbd82b4776b6068f84f1a7" }, { "dataPath": "params_shard_353.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.60.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "8aeead059ec0bc46d1d4382eb2721804" }, { "dataPath": "params_shard_354.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.61.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "7c8f2d25bfab6313cdf34a7bd91fc4cb" }, { "dataPath": "params_shard_355.bin", "format": "raw-shard", "nbytes": 22712320, "records": [ { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 3358720 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 7557120 }, { "name": "model.layers.60.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 10915840 }, { "name": "model.layers.60.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 10932224 }, { "name": "model.layers.60.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 22679552 }, { "name": "model.layers.61.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 22695936 } ], "md5sum": "f036caf5164b8124ecf21205a13fb8f2" }, { "dataPath": "params_shard_356.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.61.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "567e22d32bfc6d2c68bddcd9e2cc415c" }, { "dataPath": "params_shard_357.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.61.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "8f3a708d56b035f193e6701848448a4c" }, { "dataPath": "params_shard_358.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.61.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "d0f06a13ca193e5686fa378293c3279e" }, { "dataPath": "params_shard_359.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.61.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "66b3075a348c192c23c4e0241149af02" }, { "dataPath": "params_shard_360.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.62.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "2d6077133a92d11f3b4b51527c13b0b5" }, { "dataPath": "params_shard_361.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.62.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "17c1403ca9d36c4a1e2fc82db9af3009" }, { "dataPath": "params_shard_362.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.62.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "21673c0591dc34e515c14995f7c84734" }, { "dataPath": "params_shard_363.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.62.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "e5946d1d0bc1c73cbc045963f04f4d74" }, { "dataPath": "params_shard_364.bin", "format": "raw-shard", "nbytes": 31068160, "records": [ { "name": "model.layers.61.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.61.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.61.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11763712 }, { "name": "model.layers.61.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15962112 }, { "name": "model.layers.62.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 19320832 } ], "md5sum": "d75bbf2ffa6fcf72d7162b2140512bc6" }, { "dataPath": "params_shard_365.bin", "format": "raw-shard", "nbytes": 31068160, "records": [ { "name": "model.layers.62.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 0 }, { "name": "model.layers.62.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 4198400 } ], "md5sum": "48d9bea1426872f792fd1cc8c051caad" }, { "dataPath": "params_shard_366.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.63.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "23b4fcd1b7b99a184812687e93ed81d8" }, { "dataPath": "params_shard_367.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.63.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "f60a3350343f307f84c847a4b98916f8" }, { "dataPath": "params_shard_368.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.63.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "549a917f37aa1fba5be5827eab355487" }, { "dataPath": "params_shard_369.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.63.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "9c18363da8b41f9c513028429c651c23" }, { "dataPath": "params_shard_370.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.63.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "eef554a964357b58b5b416cb3fa20449" }, { "dataPath": "params_shard_371.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.64.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "30f0c74583d34cf5275e21a328d3230f" }, { "dataPath": "params_shard_372.bin", "format": "raw-shard", "nbytes": 22745088, "records": [ { "name": "model.layers.62.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.62.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3358720 }, { "name": "model.layers.62.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3375104 }, { "name": "model.layers.63.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3391488 }, { "name": "model.layers.63.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 3407872 }, { "name": "model.layers.63.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15155200 }, { "name": "model.layers.63.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 15171584 }, { "name": "model.layers.63.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 19369984 }, { "name": "model.layers.64.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 22728704 } ], "md5sum": "e32dda050e123b799ea8a56c13d50e19" }, { "dataPath": "params_shard_373.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.64.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "d43f98754798afa035c4ed1fc7269d85" }, { "dataPath": "params_shard_374.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.64.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "c1da5ad42d1b271ea327c06b87227738" }, { "dataPath": "params_shard_375.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.64.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "b65c1de0b7691c9cd9da137b142ca02e" }, { "dataPath": "params_shard_376.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.64.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "28651aee1ee8091c18e7284bb2e16e66" }, { "dataPath": "params_shard_377.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.65.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "5da05ac00e46adb6b499554d02f85355" }, { "dataPath": "params_shard_378.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.65.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "4f4687e51c1bb229a9183002d757a865" }, { "dataPath": "params_shard_379.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.65.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "977ffbf711f0a7f1d67a422f23a05db5" }, { "dataPath": "params_shard_380.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.65.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "01f6eaad3bda217d13720dcee9a5b87b" }, { "dataPath": "params_shard_381.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.65.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "9932fdec007234c931eb2935836a1f3b" }, { "dataPath": "params_shard_382.bin", "format": "raw-shard", "nbytes": 26894336, "records": [ { "name": "model.layers.64.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.64.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.64.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11763712 }, { "name": "model.layers.64.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15962112 }, { "name": "model.layers.65.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 19320832 }, { "name": "model.layers.65.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 23519232 }, { "name": "model.layers.65.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26877952 } ], "md5sum": "728c8eeb7eaffea8ecb137f822c81e08" }, { "dataPath": "params_shard_383.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.66.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "1a788b7acb15cbec07145042f27b2fba" }, { "dataPath": "params_shard_384.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.66.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "ede815323e680576a8d729ffd48b993d" }, { "dataPath": "params_shard_385.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.66.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "73343b40700331490a292d7e7d3c8035" }, { "dataPath": "params_shard_386.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.66.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "f341d90460f8448a3723b6306e530f1a" }, { "dataPath": "params_shard_387.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.66.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "15ba85d227658d4aee4817f478b577e5" }, { "dataPath": "params_shard_388.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.67.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "c8bd54a4f25343c79da5049089bc7dad" }, { "dataPath": "params_shard_389.bin", "format": "raw-shard", "nbytes": 31117312, "records": [ { "name": "model.layers.65.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.65.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.66.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11763712 }, { "name": "model.layers.66.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 11780096 }, { "name": "model.layers.66.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23527424 }, { "name": "model.layers.66.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 23543808 }, { "name": "model.layers.66.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 27742208 }, { "name": "model.layers.67.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31100928 } ], "md5sum": "15776278ddf20a23c83a86bbc7486b6c" }, { "dataPath": "params_shard_390.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.67.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "e62c4b8d697f8873ac94f17658b1323a" }, { "dataPath": "params_shard_391.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.67.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "5485b495a33a9356ea476068f9f795d6" }, { "dataPath": "params_shard_392.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.67.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "5b9359e0a54e466b5909f31542f7f036" }, { "dataPath": "params_shard_393.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.67.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "2846db55feb473e0dd945b74de3fd239" }, { "dataPath": "params_shard_394.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.68.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "11cfc34fd57b477db7c2cdb45ad215b0" }, { "dataPath": "params_shard_395.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.68.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "f16321c71f20019d772892cbaed106a1" }, { "dataPath": "params_shard_396.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.68.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "78b8b00443ad4f31f67f997577e34fa5" }, { "dataPath": "params_shard_397.bin", "format": "raw-shard", "nbytes": 26894336, "records": [ { "name": "model.layers.67.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.67.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.67.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11763712 }, { "name": "model.layers.67.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15962112 }, { "name": "model.layers.68.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 19320832 }, { "name": "model.layers.68.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 23519232 }, { "name": "model.layers.68.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26877952 } ], "md5sum": "cb755c8a9873d47a2bc2f0ec15f645d8" }, { "dataPath": "params_shard_398.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.68.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "2b4dd5448dea8b14fe85a6b0945ddaff" }, { "dataPath": "params_shard_399.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.68.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "9e96fda54e0654811127719cfc172a51" }, { "dataPath": "params_shard_400.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.69.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "32851c03f419fb8f7a0c008c0787df68" }, { "dataPath": "params_shard_401.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.69.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "85defc1b92290a22e98f3d0197e9cc5e" }, { "dataPath": "params_shard_402.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.69.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "759432737f374e1a5f269696fd86f726" }, { "dataPath": "params_shard_403.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.69.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "17c23e216f3e5b0ec782c6cec59b2f43" }, { "dataPath": "params_shard_404.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.69.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "0030e1ec7cd4958bd2b9ddeb5c2e02dd" }, { "dataPath": "params_shard_405.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.70.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "60e05c79cddecc3a39e72adbd8888d42" }, { "dataPath": "params_shard_406.bin", "format": "raw-shard", "nbytes": 31100928, "records": [ { "name": "model.layers.68.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.68.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.69.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11763712 }, { "name": "model.layers.69.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 11780096 }, { "name": "model.layers.69.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23527424 }, { "name": "model.layers.69.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 23543808 }, { "name": "model.layers.69.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 27742208 } ], "md5sum": "9bcbc7b2095532fc61af1b4fbf08b2da" }, { "dataPath": "params_shard_407.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.70.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "47fd824b3de6eed43c6ad44a6ae6e85a" }, { "dataPath": "params_shard_408.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.70.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "463402ad51c3cf01550169915021e10c" }, { "dataPath": "params_shard_409.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.70.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "58d1f8220a4cb612f5883bc523a6c8fb" }, { "dataPath": "params_shard_410.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.70.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "bf5db19ac9e7d12d022f2c7f5cafc36e" }, { "dataPath": "params_shard_411.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.71.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "98f6d04e2edd7c37d8695e3fe253621b" }, { "dataPath": "params_shard_412.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.71.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "7f065ea5725ba560e84934340a2b0a25" }, { "dataPath": "params_shard_413.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.71.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "1f155c42228cfb49b985edbcb1d0da85" }, { "dataPath": "params_shard_414.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.71.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "da782ab956192657acfee4aa18822252" }, { "dataPath": "params_shard_415.bin", "format": "raw-shard", "nbytes": 31117312, "records": [ { "name": "model.layers.70.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.70.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11747328 }, { "name": "model.layers.70.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15945728 }, { "name": "model.layers.70.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19304448 }, { "name": "model.layers.70.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19320832 }, { "name": "model.layers.71.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19337216 }, { "name": "model.layers.71.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 19353600 }, { "name": "model.layers.71.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31100928 } ], "md5sum": "a4e6af3b3e9f89b67fa10cf6a8587d0e" }, { "dataPath": "params_shard_416.bin", "format": "raw-shard", "nbytes": 31068160, "records": [ { "name": "model.layers.71.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 0 }, { "name": "model.layers.71.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 4198400 } ], "md5sum": "2221d5851b96036ef00d092be251995c" }, { "dataPath": "params_shard_417.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.72.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "c27aafdcb1fd510cd92400c88cf2b51f" }, { "dataPath": "params_shard_418.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.72.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "63886943a345bc134f9022ea336d796c" }, { "dataPath": "params_shard_419.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.72.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "f44b52cb74fa76468da62cec012e395b" }, { "dataPath": "params_shard_420.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.72.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "f3530ed03f31e068a89db1db2f95e3a1" }, { "dataPath": "params_shard_421.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.72.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "1a83e6700efabe396d339a91c79d5856" }, { "dataPath": "params_shard_422.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.73.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "9b85f3a5f0e27093370681a394111ae0" }, { "dataPath": "params_shard_423.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.73.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "12a0fd0cf6303d6c2b093636ef107f6e" }, { "dataPath": "params_shard_424.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.73.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "2374a16faac055035792e859ec5d83fc" }, { "dataPath": "params_shard_425.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.73.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "d2ccebd7916bcbe6c281ff38a01dc25d" }, { "dataPath": "params_shard_426.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.73.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "a4d75e38e466bac0bd61d08ec8fe7cf2" }, { "dataPath": "params_shard_427.bin", "format": "raw-shard", "nbytes": 30269440, "records": [ { "name": "model.layers.71.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 0 }, { "name": "model.layers.72.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 3358720 }, { "name": "model.layers.72.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 3375104 }, { "name": "model.layers.72.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15122432 }, { "name": "model.layers.72.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 15138816 }, { "name": "model.layers.72.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 19337216 }, { "name": "model.layers.73.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 22695936 }, { "name": "model.layers.73.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 26894336 }, { "name": "model.layers.73.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 30253056 } ], "md5sum": "e8466b1ee854925f3f47edc4f69d1ec1" }, { "dataPath": "params_shard_428.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.74.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "7fce10fd2b99ee65baf900c9cf245edf" }, { "dataPath": "params_shard_429.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.74.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "5bee1023b5f7b65e4f1f88abd1649041" }, { "dataPath": "params_shard_430.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.74.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "704744da29c5075546d90229eee36f92" }, { "dataPath": "params_shard_431.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.74.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "bd8feed5ee06472ac8ed0703bc4f061e" }, { "dataPath": "params_shard_432.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.74.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "00a5d074b63a65296875e14ea6844d9c" }, { "dataPath": "params_shard_433.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.75.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "dc087afb2c283f6ad74f548c5cf0e8d8" }, { "dataPath": "params_shard_434.bin", "format": "raw-shard", "nbytes": 31117312, "records": [ { "name": "model.layers.73.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.73.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.74.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11763712 }, { "name": "model.layers.74.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 11780096 }, { "name": "model.layers.74.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23527424 }, { "name": "model.layers.74.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 23543808 }, { "name": "model.layers.74.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 27742208 }, { "name": "model.layers.75.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31100928 } ], "md5sum": "2d20721605716c031658bca06d557721" }, { "dataPath": "params_shard_435.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.75.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "0ad90a190f98943f7708505b5523d319" }, { "dataPath": "params_shard_436.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.75.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "bb1b528a70b68292f3f22265765a2284" }, { "dataPath": "params_shard_437.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.75.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "d81c21ea696779f6cf5e7583c3490086" }, { "dataPath": "params_shard_438.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.75.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "73fe87191405725d19feac281c70fdb3" }, { "dataPath": "params_shard_439.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.76.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "869e55962861fb8abfe9f23285ba353b" }, { "dataPath": "params_shard_440.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.76.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "09e4b49f22e8a7b91fee83ebdc61608f" }, { "dataPath": "params_shard_441.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.76.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "b594fd883e16dfe18df811d2307bb876" }, { "dataPath": "params_shard_442.bin", "format": "raw-shard", "nbytes": 26894336, "records": [ { "name": "model.layers.75.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.75.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.75.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11763712 }, { "name": "model.layers.75.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15962112 }, { "name": "model.layers.76.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 19320832 }, { "name": "model.layers.76.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 23519232 }, { "name": "model.layers.76.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26877952 } ], "md5sum": "6bdadf102248847734ba597bfe8c41b4" }, { "dataPath": "params_shard_443.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.76.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "e1e4ec48b412f31c1fd0f74a92b7d3f0" }, { "dataPath": "params_shard_444.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.76.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "909761c5b71d9b3eae6a056ce710483b" }, { "dataPath": "params_shard_445.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.77.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "72dfdacf07339de7217de2512609e7dd" }, { "dataPath": "params_shard_446.bin", "format": "raw-shard", "nbytes": 188088320, "records": [ { "name": "model.layers.77.mlp.gate_up_proj.q_weight", "shape": [ 57344, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 188088320, "byteOffset": 0 } ], "md5sum": "f547514098e4448601da1545ee4abcbc" }, { "dataPath": "params_shard_447.bin", "format": "raw-shard", "nbytes": 23511040, "records": [ { "name": "model.layers.77.mlp.gate_up_proj.q_scale", "shape": [ 57344, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23511040, "byteOffset": 0 } ], "md5sum": "248431a4d54737d3054fa3eb04ac1d81" }, { "dataPath": "params_shard_448.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.77.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "c5a1f3e7b594932d22d3d0d9bc15cd5f" }, { "dataPath": "params_shard_449.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.77.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "b11ab96c461449a5bef8f81cf4895d6c" }, { "dataPath": "params_shard_450.bin", "format": "raw-shard", "nbytes": 93978624, "records": [ { "name": "model.layers.78.mlp.down_proj.q_weight", "shape": [ 8192, 2868 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 93978624, "byteOffset": 0 } ], "md5sum": "7fb381d24a0d0a94eae345c1dc793fef" }, { "dataPath": "params_shard_451.bin", "format": "raw-shard", "nbytes": 31100928, "records": [ { "name": "model.layers.76.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.76.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11747328 }, { "name": "model.layers.77.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11763712 }, { "name": "model.layers.77.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 11780096 }, { "name": "model.layers.77.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23527424 }, { "name": "model.layers.77.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 23543808 }, { "name": "model.layers.77.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 27742208 } ], "md5sum": "0275bb6c6530aa37dd71952e4a224be5" }, { "dataPath": "params_shard_452.bin", "format": "raw-shard", "nbytes": 33587200, "records": [ { "name": "model.layers.78.self_attn.qkv_proj.q_weight", "shape": [ 10240, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33587200, "byteOffset": 0 } ], "md5sum": "4992695c89fe0c277765c4d787fcfb4c" }, { "dataPath": "params_shard_453.bin", "format": "raw-shard", "nbytes": 26869760, "records": [ { "name": "model.layers.78.self_attn.o_proj.q_weight", "shape": [ 8192, 820 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 26869760, "byteOffset": 0 } ], "md5sum": "11aeafea7270233b3856538aba8478c6" }, { "dataPath": "params_shard_454.bin", "format": "raw-shard", "nbytes": 19304448, "records": [ { "name": "model.layers.78.mlp.down_proj.q_scale", "shape": [ 8192, 717 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 11747328, "byteOffset": 0 }, { "name": "model.layers.78.self_attn.qkv_proj.q_scale", "shape": [ 10240, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4198400, "byteOffset": 11747328 }, { "name": "model.layers.78.self_attn.o_proj.q_scale", "shape": [ 8192, 205 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3358720, "byteOffset": 15945728 } ], "md5sum": "1fedb58739022a8929ff28d88dce4c6e" } ] }