{ "metadata": { "ParamSize": 65, "ParamBytes": 63323136.0, "BitsPerParam": 5.002662055470865 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 32629248, "records": [ { "name": "lm_head.q_weight", "shape": [ 32128, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12337152, "byteOffset": 0 }, { "name": "lm_head.q_scale", "shape": [ 32128, 24 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1542144, "byteOffset": 12337152 }, { "name": "model.embed_tokens.q_weight", "shape": [ 32128, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12337152, "byteOffset": 13879296 }, { "name": "model.embed_tokens.q_scale", "shape": [ 32128, 24 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1542144, "byteOffset": 26216448 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 27758592 }, { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 768, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 27760128 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 768, 96 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 28939776 }, { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 6144, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 29087232 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 6144, 24 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 31446528 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 31741440 }, { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 1280, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 31742976 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 1280, 24 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 61440, "byteOffset": 32234496 }, { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 32295936 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 768, 24 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 32590848 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 32627712 } ], "md5sum": "9d6b148e542ce32b3d03a9ab2f3061ed" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 24345600, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 768, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 0 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 768, 96 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 1179648 }, { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 6144, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 1327104 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 6144, 24 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 3686400 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 3981312 }, { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 1280, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 3982848 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 1280, 24 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 61440, "byteOffset": 4474368 }, { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 4535808 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 768, 24 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 4830720 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 4867584 }, { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 768, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 4869120 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 768, 96 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 6048768 }, { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 6144, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 6196224 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 6144, 24 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 8555520 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 8850432 }, { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 1280, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 8851968 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 1280, 24 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 61440, "byteOffset": 9343488 }, { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 9404928 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 768, 24 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 9699840 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 9736704 }, { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 768, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 9738240 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 768, 96 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 10917888 }, { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 6144, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 11065344 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 6144, 24 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 13424640 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 13719552 }, { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 1280, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 13721088 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 1280, 24 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 61440, "byteOffset": 14212608 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 14274048 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 768, 24 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 14568960 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 14605824 }, { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 768, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 14607360 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 768, 96 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 15787008 }, { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 6144, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 15934464 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 6144, 24 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 18293760 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 18588672 }, { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 1280, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 18590208 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 1280, 24 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 61440, "byteOffset": 19081728 }, { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 19143168 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 768, 24 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 19438080 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 19474944 }, { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 768, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 1179648, "byteOffset": 19476480 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 768, 96 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 20656128 }, { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 6144, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 20803584 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 6144, 24 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 23162880 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 23457792 }, { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 1280, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 491520, "byteOffset": 23459328 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 1280, 24 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 61440, "byteOffset": 23950848 }, { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 768, 96 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 24012288 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 768, 24 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 36864, "byteOffset": 24307200 }, { "name": "model.norm.weight", "shape": [ 768 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1536, "byteOffset": 24344064 } ], "md5sum": "0de23251ab4684bf693a425bc20ab403" } ] }