diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,3807 @@ +{ + "metadata": { + "ParamSize": 305, + "ParamBytes": 1801420800.0, + "BitsPerParam": 3.6099566223450714 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 62447616, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 308, + 50688 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 62447616, + "byteOffset": 0 + } + ], + "md5sum": "91c335fd9de370bd474fe8971d2a2fa7" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 22708224, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 308, + 18432 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22708224, + "byteOffset": 0 + } + ], + "md5sum": "1a7a84a5093664b9a1c8ca35abbbfead" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 23430144, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 77, + 50688 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7805952, + "byteOffset": 0 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 7805952 + }, + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 924, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 7812096 + }, + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 231, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 19166208 + }, + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 77, + 18432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2838528, + "byteOffset": 20585472 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 23424000 + } + ], + "md5sum": "c829ae187d928c8b48e39596009e7cce" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 22708224, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 308, + 18432 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22708224, + "byteOffset": 0 + } + ], + "md5sum": "c6c74c228c91ce6abe35c6f3f9f9da92" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 32655360, + "records": [ + { + "name": "model.layers.19.self_attn.qkv_proj.q_weight", + "shape": [ + 308, + 9216 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 0 + }, + { + "name": "model.layers.19.self_attn.qkv_proj.q_scale", + "shape": [ + 77, + 9216 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 11354112 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 308, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3784704, + "byteOffset": 12773376 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 77, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 473088, + "byteOffset": 16558080 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17031168 + }, + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 924, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 17037312 + }, + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 231, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 28391424 + }, + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 77, + 18432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2838528, + "byteOffset": 29810688 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 32649216 + } + ], + "md5sum": "e74a4ca2026632c8f90e46dce2f8c8e9" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 22708224, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 308, + 18432 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22708224, + "byteOffset": 0 + } + ], + "md5sum": "d9b4fdc07b2e81e600649e75bc2837cf" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 32655360, + "records": [ + { + "name": "model.layers.20.self_attn.qkv_proj.q_weight", + "shape": [ + 308, + 9216 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 0 + }, + { + "name": "model.layers.20.self_attn.qkv_proj.q_scale", + "shape": [ + 77, + 9216 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 11354112 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 308, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3784704, + "byteOffset": 12773376 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 77, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 473088, + "byteOffset": 16558080 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17031168 + }, + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 924, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 17037312 + }, + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 231, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 28391424 + }, + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 77, + 18432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2838528, + "byteOffset": 29810688 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 32649216 + } + ], + "md5sum": "ee1d731a048076f7e502ca23847da72f" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 22708224, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 308, + 18432 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22708224, + "byteOffset": 0 + } + ], + "md5sum": "247dc6d313e701bad9f5a40093ee5459" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 32655360, + "records": [ + { + "name": "model.layers.21.self_attn.qkv_proj.q_weight", + "shape": [ + 308, + 9216 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 0 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_scale", + "shape": [ + 77, + 9216 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 11354112 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 308, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3784704, + "byteOffset": 12773376 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 77, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 473088, + "byteOffset": 16558080 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17031168 + }, + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 924, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 17037312 + }, + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 231, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 28391424 + }, + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 77, + 18432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2838528, + "byteOffset": 29810688 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 32649216 + } + ], + "md5sum": "1dde16ca8f3e4a64e63d97e41bda3d8b" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 22708224, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 308, + 18432 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22708224, + "byteOffset": 0 + } + ], + "md5sum": "75d25bcb8b6c9de07e6640c765a19aa5" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 32655360, + "records": [ + { + "name": "model.layers.22.self_attn.qkv_proj.q_weight", + "shape": [ + 308, + 9216 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 0 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_scale", + "shape": [ + 77, + 9216 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 11354112 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 308, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3784704, + "byteOffset": 12773376 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 77, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 473088, + "byteOffset": 16558080 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17031168 + }, + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 924, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 17037312 + }, + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 231, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 28391424 + }, + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 77, + 18432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2838528, + "byteOffset": 29810688 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 32649216 + } + ], + "md5sum": "47adc9d0a94328236765c3b2552f6412" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 22708224, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 308, + 18432 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22708224, + "byteOffset": 0 + } + ], + "md5sum": "8d0ed414a2e18c71e8665036e4350a4e" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 32655360, + "records": [ + { + "name": "model.layers.23.self_attn.qkv_proj.q_weight", + "shape": [ + 308, + 9216 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 0 + }, + { + "name": "model.layers.23.self_attn.qkv_proj.q_scale", + "shape": [ + 77, + 9216 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 11354112 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 308, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3784704, + "byteOffset": 12773376 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 77, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 473088, + "byteOffset": 16558080 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17031168 + }, + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 924, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 17037312 + }, + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 231, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 28391424 + }, + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 77, + 18432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2838528, + "byteOffset": 29810688 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 32649216 + } + ], + "md5sum": "524da43ffad39581badfabeb37e86087" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 22708224, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 308, + 18432 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22708224, + "byteOffset": 0 + } + ], + "md5sum": "acd418c8917f2bf5157e56dbd7297045" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 32655360, + "records": [ + { + "name": "model.layers.24.self_attn.qkv_proj.q_weight", + "shape": [ + 308, + 9216 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 0 + }, + { + "name": "model.layers.24.self_attn.qkv_proj.q_scale", + "shape": [ + 77, + 9216 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 11354112 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 308, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3784704, + "byteOffset": 12773376 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 77, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 473088, + "byteOffset": 16558080 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17031168 + }, + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 924, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 17037312 + }, + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 231, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 28391424 + }, + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 77, + 18432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2838528, + "byteOffset": 29810688 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 32649216 + } + ], + "md5sum": "a74cc899962bfb9d0eb026175fd370d4" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 22708224, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 308, + 18432 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22708224, + "byteOffset": 0 + } + ], + "md5sum": "8b385f6ceab9d746ec0e2099224f5fe0" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 32655360, + "records": [ + { + "name": "model.layers.25.self_attn.qkv_proj.q_weight", + "shape": [ + 308, + 9216 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 0 + }, + { + "name": "model.layers.25.self_attn.qkv_proj.q_scale", + "shape": [ + 77, + 9216 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 11354112 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 308, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3784704, + "byteOffset": 12773376 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 77, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 473088, + "byteOffset": 16558080 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17031168 + }, + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 924, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 17037312 + }, + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 231, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 28391424 + }, + { + "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 77, + 18432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2838528, + "byteOffset": 29810688 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 32649216 + } + ], + "md5sum": "dead80419776be7c48015b5364c00b68" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 22708224, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 308, + 18432 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22708224, + "byteOffset": 0 + } + ], + "md5sum": "1053a184e834b6bcb267a35c7a43bbad" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 32655360, + "records": [ + { + "name": "model.layers.26.self_attn.qkv_proj.q_weight", + "shape": [ + 308, + 9216 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 0 + }, + { + "name": "model.layers.26.self_attn.qkv_proj.q_scale", + "shape": [ + 77, + 9216 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 11354112 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 308, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3784704, + "byteOffset": 12773376 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 77, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 473088, + "byteOffset": 16558080 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17031168 + }, + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 924, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 17037312 + }, + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 231, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 28391424 + }, + { + "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 77, + 18432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2838528, + "byteOffset": 29810688 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 32649216 + } + ], + "md5sum": "7add06c05fa800ed49ac82f34e027f88" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 22708224, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_weight", + "shape": [ + 308, + 18432 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22708224, + "byteOffset": 0 + } + ], + "md5sum": "1a1b5808bc2f9f8171559100797b10cf" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 32655360, + "records": [ + { + "name": "model.layers.27.self_attn.qkv_proj.q_weight", + "shape": [ + 308, + 9216 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 0 + }, + { + "name": "model.layers.27.self_attn.qkv_proj.q_scale", + "shape": [ + 77, + 9216 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 11354112 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 308, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3784704, + "byteOffset": 12773376 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 77, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 473088, + "byteOffset": 16558080 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17031168 + }, + { + "name": "model.layers.28.mlp.down_proj.q_weight", + "shape": [ + 924, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 17037312 + }, + { + "name": "model.layers.28.mlp.down_proj.q_scale", + "shape": [ + 231, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 28391424 + }, + { + "name": "model.layers.28.mlp.gate_up_proj.q_scale", + "shape": [ + 77, + 18432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2838528, + "byteOffset": 29810688 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 32649216 + } + ], + "md5sum": "371d4a3a6dceaa7fb204b77b1ab8f158" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 22708224, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_weight", + "shape": [ + 308, + 18432 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22708224, + "byteOffset": 0 + } + ], + "md5sum": "716e310a586505c425143d9d58db2a94" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 32655360, + "records": [ + { + "name": "model.layers.28.self_attn.qkv_proj.q_weight", + "shape": [ + 308, + 9216 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 0 + }, + { + "name": "model.layers.28.self_attn.qkv_proj.q_scale", + "shape": [ + 77, + 9216 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 11354112 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_weight", + "shape": [ + 308, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3784704, + "byteOffset": 12773376 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_scale", + "shape": [ + 77, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 473088, + "byteOffset": 16558080 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17031168 + }, + { + "name": "model.layers.29.mlp.down_proj.q_weight", + "shape": [ + 924, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 17037312 + }, + { + "name": "model.layers.29.mlp.down_proj.q_scale", + "shape": [ + 231, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 28391424 + }, + { + "name": "model.layers.29.mlp.gate_up_proj.q_scale", + "shape": [ + 77, + 18432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2838528, + "byteOffset": 29810688 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 32649216 + } + ], + "md5sum": "071e0b44a5fa53b5edfb101907f9633a" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 62447616, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 50688, + 308 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 62447616, + "byteOffset": 0 + } + ], + "md5sum": "d14a46b199f1189aee934273737e3a81" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 24849408, + "records": [ + { + "name": "model.layers.29.self_attn.qkv_proj.q_weight", + "shape": [ + 308, + 9216 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 0 + }, + { + "name": "model.layers.29.self_attn.qkv_proj.q_scale", + "shape": [ + 77, + 9216 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 11354112 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_weight", + "shape": [ + 308, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3784704, + "byteOffset": 12773376 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_scale", + "shape": [ + 77, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 473088, + "byteOffset": 16558080 + }, + { + "name": "model.norm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 17031168 + }, + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 50688, + 77 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 7805952, + "byteOffset": 17037312 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 24843264 + } + ], + "md5sum": "736b0fa55c9e70eafd94a03940c5482b" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 22708224, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 308, + 18432 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22708224, + "byteOffset": 0 + } + ], + "md5sum": "2b7f941b15567eeef2d9ede5c4031b0e" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 32655360, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 924, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 0 + }, + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 231, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 11354112 + }, + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 77, + 18432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2838528, + "byteOffset": 12773376 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15611904 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.q_weight", + "shape": [ + 308, + 9216 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 15618048 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.q_scale", + "shape": [ + 77, + 9216 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 26972160 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 308, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3784704, + "byteOffset": 28391424 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 77, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 473088, + "byteOffset": 32176128 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 32649216 + } + ], + "md5sum": "e1e7c88d1832d47cfa5c8f5fd7109004" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 22708224, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 308, + 18432 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22708224, + "byteOffset": 0 + } + ], + "md5sum": "f6f59476f93ff2f928e7629fb584a01a" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 32655360, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 924, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 0 + }, + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 231, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 11354112 + }, + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 77, + 18432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2838528, + "byteOffset": 12773376 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15611904 + }, + { + "name": "model.layers.1.self_attn.qkv_proj.q_weight", + "shape": [ + 308, + 9216 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 15618048 + }, + { + "name": "model.layers.1.self_attn.qkv_proj.q_scale", + "shape": [ + 77, + 9216 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 26972160 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 308, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3784704, + "byteOffset": 28391424 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 77, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 473088, + "byteOffset": 32176128 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 32649216 + } + ], + "md5sum": "9af159944093e4a4297ecabea4531016" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 22708224, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 308, + 18432 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22708224, + "byteOffset": 0 + } + ], + "md5sum": "62881db654be8d0a954afd57668dd68c" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 32655360, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 924, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 0 + }, + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 231, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 11354112 + }, + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 77, + 18432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2838528, + "byteOffset": 12773376 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15611904 + }, + { + "name": "model.layers.10.self_attn.qkv_proj.q_weight", + "shape": [ + 308, + 9216 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 15618048 + }, + { + "name": "model.layers.10.self_attn.qkv_proj.q_scale", + "shape": [ + 77, + 9216 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 26972160 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 308, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3784704, + "byteOffset": 28391424 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 77, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 473088, + "byteOffset": 32176128 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 32649216 + } + ], + "md5sum": "8860cbe57cf220746fd4c2e4b1165a7b" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 22708224, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 308, + 18432 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22708224, + "byteOffset": 0 + } + ], + "md5sum": "e0df0968c2aaf922b1ef9e933461a867" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 32655360, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 924, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 0 + }, + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 231, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 11354112 + }, + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 77, + 18432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2838528, + "byteOffset": 12773376 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15611904 + }, + { + "name": "model.layers.11.self_attn.qkv_proj.q_weight", + "shape": [ + 308, + 9216 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 15618048 + }, + { + "name": "model.layers.11.self_attn.qkv_proj.q_scale", + "shape": [ + 77, + 9216 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 26972160 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 308, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3784704, + "byteOffset": 28391424 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 77, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 473088, + "byteOffset": 32176128 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 32649216 + } + ], + "md5sum": "edf20728c8eaaff653d4c52b4cd4449e" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 22708224, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 308, + 18432 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22708224, + "byteOffset": 0 + } + ], + "md5sum": "ae911ec6a863b853ed5aecf730ef34d5" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 32655360, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 924, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 0 + }, + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 231, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 11354112 + }, + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 77, + 18432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2838528, + "byteOffset": 12773376 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15611904 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_weight", + "shape": [ + 308, + 9216 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 15618048 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_scale", + "shape": [ + 77, + 9216 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 26972160 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 308, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3784704, + "byteOffset": 28391424 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 77, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 473088, + "byteOffset": 32176128 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 32649216 + } + ], + "md5sum": "0191d20dd0ea644bad41d1e7e66d4121" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 22708224, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 308, + 18432 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22708224, + "byteOffset": 0 + } + ], + "md5sum": "f9140b6203a517950f4ff46ed3f3f66c" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 32655360, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 924, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 0 + }, + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 231, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 11354112 + }, + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 77, + 18432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2838528, + "byteOffset": 12773376 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15611904 + }, + { + "name": "model.layers.13.self_attn.qkv_proj.q_weight", + "shape": [ + 308, + 9216 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 15618048 + }, + { + "name": "model.layers.13.self_attn.qkv_proj.q_scale", + "shape": [ + 77, + 9216 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 26972160 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 308, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3784704, + "byteOffset": 28391424 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 77, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 473088, + "byteOffset": 32176128 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 32649216 + } + ], + "md5sum": "0ed73bdaef16f64e931ad0cf0f7d2429" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 22708224, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 308, + 18432 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22708224, + "byteOffset": 0 + } + ], + "md5sum": "ce9f0f5d0fb128447109e9e6b1da2604" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 32655360, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 924, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 0 + }, + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 231, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 11354112 + }, + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 77, + 18432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2838528, + "byteOffset": 12773376 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15611904 + }, + { + "name": "model.layers.14.self_attn.qkv_proj.q_weight", + "shape": [ + 308, + 9216 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 15618048 + }, + { + "name": "model.layers.14.self_attn.qkv_proj.q_scale", + "shape": [ + 77, + 9216 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 26972160 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 308, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3784704, + "byteOffset": 28391424 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 77, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 473088, + "byteOffset": 32176128 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 32649216 + } + ], + "md5sum": "b9d98ea36dde592f1e9d3d6761b26341" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 22708224, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 308, + 18432 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22708224, + "byteOffset": 0 + } + ], + "md5sum": "43357afd67911005f83964e7c6503b2c" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 32655360, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 924, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 0 + }, + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 231, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 11354112 + }, + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 77, + 18432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2838528, + "byteOffset": 12773376 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15611904 + }, + { + "name": "model.layers.15.self_attn.qkv_proj.q_weight", + "shape": [ + 308, + 9216 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 15618048 + }, + { + "name": "model.layers.15.self_attn.qkv_proj.q_scale", + "shape": [ + 77, + 9216 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 26972160 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 308, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3784704, + "byteOffset": 28391424 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 77, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 473088, + "byteOffset": 32176128 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 32649216 + } + ], + "md5sum": "b9d0a7e51b4d7b09435be5329915f081" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 22708224, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 308, + 18432 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22708224, + "byteOffset": 0 + } + ], + "md5sum": "d3c99dde42b549b9b082bfb5401ace53" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 32655360, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 924, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 0 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 231, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 11354112 + }, + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 77, + 18432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2838528, + "byteOffset": 12773376 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15611904 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_weight", + "shape": [ + 308, + 9216 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 15618048 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_scale", + "shape": [ + 77, + 9216 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 26972160 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 308, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3784704, + "byteOffset": 28391424 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 77, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 473088, + "byteOffset": 32176128 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 32649216 + } + ], + "md5sum": "42b4f795e3e7ac69e1d665e947ec5a7f" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 22708224, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 308, + 18432 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22708224, + "byteOffset": 0 + } + ], + "md5sum": "11eb8e84f58219b14a7784cdcd1e2ab0" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 32655360, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 924, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 0 + }, + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 231, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 11354112 + }, + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 77, + 18432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2838528, + "byteOffset": 12773376 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15611904 + }, + { + "name": "model.layers.17.self_attn.qkv_proj.q_weight", + "shape": [ + 308, + 9216 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 15618048 + }, + { + "name": "model.layers.17.self_attn.qkv_proj.q_scale", + "shape": [ + 77, + 9216 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 26972160 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 308, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3784704, + "byteOffset": 28391424 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 77, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 473088, + "byteOffset": 32176128 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 32649216 + } + ], + "md5sum": "d41cd731a0b9de85cc503bd6a5ab91be" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 22708224, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 308, + 18432 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22708224, + "byteOffset": 0 + } + ], + "md5sum": "2a73c311fd24218090e3515a35b927ea" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 32655360, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 924, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 0 + }, + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 231, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 11354112 + }, + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 77, + 18432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2838528, + "byteOffset": 12773376 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15611904 + }, + { + "name": "model.layers.18.self_attn.qkv_proj.q_weight", + "shape": [ + 308, + 9216 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 15618048 + }, + { + "name": "model.layers.18.self_attn.qkv_proj.q_scale", + "shape": [ + 77, + 9216 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 26972160 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 308, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3784704, + "byteOffset": 28391424 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 77, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 473088, + "byteOffset": 32176128 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 32649216 + } + ], + "md5sum": "608b8981886465a32ddefc97339e3059" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 22708224, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 308, + 18432 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22708224, + "byteOffset": 0 + } + ], + "md5sum": "76fede07f99ecbc57752b3d9dceed75e" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 32655360, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 924, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 0 + }, + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 231, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 11354112 + }, + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 77, + 18432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2838528, + "byteOffset": 12773376 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15611904 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_weight", + "shape": [ + 308, + 9216 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 15618048 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_scale", + "shape": [ + 77, + 9216 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 26972160 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 308, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3784704, + "byteOffset": 28391424 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 77, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 473088, + "byteOffset": 32176128 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 32649216 + } + ], + "md5sum": "6fbe20776fac4df5bad9bdcfb53f49e2" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 22708224, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 308, + 18432 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22708224, + "byteOffset": 0 + } + ], + "md5sum": "c0cff2f8bc0934840325164f3089cce4" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 32655360, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 924, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 0 + }, + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 231, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 11354112 + }, + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 77, + 18432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2838528, + "byteOffset": 12773376 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15611904 + }, + { + "name": "model.layers.3.self_attn.qkv_proj.q_weight", + "shape": [ + 308, + 9216 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 15618048 + }, + { + "name": "model.layers.3.self_attn.qkv_proj.q_scale", + "shape": [ + 77, + 9216 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 26972160 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 308, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3784704, + "byteOffset": 28391424 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 77, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 473088, + "byteOffset": 32176128 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 32649216 + } + ], + "md5sum": "55ba3949dc056e7dca9ed07decfb26ee" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 22708224, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 308, + 18432 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22708224, + "byteOffset": 0 + } + ], + "md5sum": "6eeeb756120f22ce577398706295b577" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 32655360, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 924, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 0 + }, + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 231, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 11354112 + }, + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 77, + 18432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2838528, + "byteOffset": 12773376 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15611904 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_weight", + "shape": [ + 308, + 9216 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 15618048 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_scale", + "shape": [ + 77, + 9216 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 26972160 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 308, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3784704, + "byteOffset": 28391424 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 77, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 473088, + "byteOffset": 32176128 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 32649216 + } + ], + "md5sum": "2dbf75c968192a58c56c4857032d1dcd" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 22708224, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 308, + 18432 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22708224, + "byteOffset": 0 + } + ], + "md5sum": "e6e383641f1c0e4854fbab2e2e9daea9" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 32655360, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 924, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 0 + }, + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 231, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 11354112 + }, + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 77, + 18432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2838528, + "byteOffset": 12773376 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15611904 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.q_weight", + "shape": [ + 308, + 9216 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 15618048 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.q_scale", + "shape": [ + 77, + 9216 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 26972160 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 308, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3784704, + "byteOffset": 28391424 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 77, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 473088, + "byteOffset": 32176128 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 32649216 + } + ], + "md5sum": "a1b2765eedc5f9cd9cb7df82b3aba1b8" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 22708224, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 308, + 18432 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22708224, + "byteOffset": 0 + } + ], + "md5sum": "a830f730ab5f6bd6c33a885623655769" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 32655360, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 924, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 0 + }, + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 231, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 11354112 + }, + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 77, + 18432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2838528, + "byteOffset": 12773376 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15611904 + }, + { + "name": "model.layers.6.self_attn.qkv_proj.q_weight", + "shape": [ + 308, + 9216 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 15618048 + }, + { + "name": "model.layers.6.self_attn.qkv_proj.q_scale", + "shape": [ + 77, + 9216 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 26972160 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 308, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3784704, + "byteOffset": 28391424 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 77, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 473088, + "byteOffset": 32176128 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 32649216 + } + ], + "md5sum": "ea20cc8d6cdee0d5c5c1e520e52871c1" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 22708224, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 308, + 18432 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22708224, + "byteOffset": 0 + } + ], + "md5sum": "d2f18d1c4860ad6074b9cf1a21a4c52d" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 32655360, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 924, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 0 + }, + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 231, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 11354112 + }, + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 77, + 18432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2838528, + "byteOffset": 12773376 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15611904 + }, + { + "name": "model.layers.7.self_attn.qkv_proj.q_weight", + "shape": [ + 308, + 9216 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 15618048 + }, + { + "name": "model.layers.7.self_attn.qkv_proj.q_scale", + "shape": [ + 77, + 9216 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 26972160 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 308, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3784704, + "byteOffset": 28391424 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 77, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 473088, + "byteOffset": 32176128 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 32649216 + } + ], + "md5sum": "5178372e80dcc99bddf5eea223683d39" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 22708224, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 308, + 18432 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22708224, + "byteOffset": 0 + } + ], + "md5sum": "01e6314cc10571b232370671faa108df" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 32655360, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 924, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 0 + }, + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 231, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 11354112 + }, + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 77, + 18432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2838528, + "byteOffset": 12773376 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15611904 + }, + { + "name": "model.layers.8.self_attn.qkv_proj.q_weight", + "shape": [ + 308, + 9216 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 15618048 + }, + { + "name": "model.layers.8.self_attn.qkv_proj.q_scale", + "shape": [ + 77, + 9216 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 26972160 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 308, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3784704, + "byteOffset": 28391424 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 77, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 473088, + "byteOffset": 32176128 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 32649216 + } + ], + "md5sum": "444e450aa5fd0e7dd9ddc73ec1b5d613" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 22708224, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 308, + 18432 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22708224, + "byteOffset": 0 + } + ], + "md5sum": "88b42f1cf5aa35e4d0d40ee86d295f70" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 32649216, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 924, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 0 + }, + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 231, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 11354112 + }, + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 77, + 18432 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2838528, + "byteOffset": 12773376 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 15611904 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_weight", + "shape": [ + 308, + 9216 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 11354112, + "byteOffset": 15618048 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_scale", + "shape": [ + 77, + 9216 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1419264, + "byteOffset": 26972160 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 308, + 3072 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3784704, + "byteOffset": 28391424 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 77, + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 473088, + "byteOffset": 32176128 + } + ], + "md5sum": "c42365757247247c92b3472728babb3e" + } + ] +} \ No newline at end of file