|
{ |
|
"metadata": { |
|
"ParamSize": 31, |
|
"ParamBytes": 4044459008.0, |
|
"BitsPerParam": 16.0 |
|
}, |
|
"records": [ |
|
{ |
|
"dataPath": "params_shard_0.bin", |
|
"format": "raw-shard", |
|
"nbytes": 1089994752, |
|
"records": [ |
|
{ |
|
"name": "model.embed_tokens.weight", |
|
"shape": [ |
|
152064, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1089994752, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "04271b91cafad82f630343dcc1156d75" |
|
}, |
|
{ |
|
"dataPath": "params_shard_1.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25690112, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.self_attn.o_proj.weight", |
|
"shape": [ |
|
3584, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25690112, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "23278f4c2c2bd26e27c57bf98ad4ef8c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_2.bin", |
|
"format": "raw-shard", |
|
"nbytes": 271581184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
37888, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 271581184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "eab8149d005afce177602e3a0502e2d5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_3.bin", |
|
"format": "raw-shard", |
|
"nbytes": 135790592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.mlp.down_proj.weight", |
|
"shape": [ |
|
3584, |
|
18944 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 135790592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d8c5636ba8c54a1bf9a9be52028edd24" |
|
}, |
|
{ |
|
"dataPath": "params_shard_4.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33030144, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.self_attn.c_attn.weight", |
|
"shape": [ |
|
4608, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33030144, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c03154c2e7e28390bd1554a72a582034" |
|
}, |
|
{ |
|
"dataPath": "params_shard_5.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25690112, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.self_attn.o_proj.weight", |
|
"shape": [ |
|
3584, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25690112, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d46398b81504bd2d82830108afdd8407" |
|
}, |
|
{ |
|
"dataPath": "params_shard_6.bin", |
|
"format": "raw-shard", |
|
"nbytes": 271581184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
37888, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 271581184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "92a7bea5fcddf575091c43b1c4df714c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_7.bin", |
|
"format": "raw-shard", |
|
"nbytes": 135790592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.mlp.down_proj.weight", |
|
"shape": [ |
|
3584, |
|
18944 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 135790592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ccb811f73867d1f386520f524f2c01a2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_8.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33030144, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.self_attn.c_attn.weight", |
|
"shape": [ |
|
4608, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33030144, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8af6d905b41911171901c0c9b1309afb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_9.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25690112, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.self_attn.o_proj.weight", |
|
"shape": [ |
|
3584, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25690112, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "126e17f7b67d299092e34621d3d01ba5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_10.bin", |
|
"format": "raw-shard", |
|
"nbytes": 271581184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
37888, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 271581184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "56b5611e7e548cbe8da292871a122b53" |
|
}, |
|
{ |
|
"dataPath": "params_shard_11.bin", |
|
"format": "raw-shard", |
|
"nbytes": 135790592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.mlp.down_proj.weight", |
|
"shape": [ |
|
3584, |
|
18944 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 135790592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "dd79708597c0102a78c1c7487114aafe" |
|
}, |
|
{ |
|
"dataPath": "params_shard_12.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33030144, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.self_attn.c_attn.weight", |
|
"shape": [ |
|
4608, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33030144, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "10c68f4a89d8c9e5ac3fd00b5ce2ee47" |
|
}, |
|
{ |
|
"dataPath": "params_shard_13.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25690112, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.self_attn.o_proj.weight", |
|
"shape": [ |
|
3584, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25690112, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e655468aacdcefbc5172993aea2634e2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_14.bin", |
|
"format": "raw-shard", |
|
"nbytes": 271581184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
37888, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 271581184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ce9430ea30508f4c11646d9f6987c7d5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_15.bin", |
|
"format": "raw-shard", |
|
"nbytes": 135790592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.mlp.down_proj.weight", |
|
"shape": [ |
|
3584, |
|
18944 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 135790592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8463225b5d39d8aa6befc7e394ff9a80" |
|
}, |
|
{ |
|
"dataPath": "params_shard_16.bin", |
|
"format": "raw-shard", |
|
"nbytes": 1089994752, |
|
"records": [ |
|
{ |
|
"name": "lm_head.weight", |
|
"shape": [ |
|
152064, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1089994752, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "edc0c39312d7ac18516c511b41a46f02" |
|
}, |
|
{ |
|
"dataPath": "params_shard_17.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33131520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.self_attn.c_attn.weight", |
|
"shape": [ |
|
4608, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33030144, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 33030144 |
|
}, |
|
{ |
|
"name": "model.layers.0.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 33039360 |
|
}, |
|
{ |
|
"name": "model.layers.0.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 33046528 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 33053696 |
|
}, |
|
{ |
|
"name": "model.layers.1.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 33062912 |
|
}, |
|
{ |
|
"name": "model.layers.1.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 33070080 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 33077248 |
|
}, |
|
{ |
|
"name": "model.layers.2.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 33086464 |
|
}, |
|
{ |
|
"name": "model.layers.2.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 33093632 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 33100800 |
|
}, |
|
{ |
|
"name": "model.layers.3.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 33110016 |
|
}, |
|
{ |
|
"name": "model.layers.3.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 33117184 |
|
}, |
|
{ |
|
"name": "model.norm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 33124352 |
|
} |
|
], |
|
"md5sum": "70fbdf29d3964389bcae6e3d22e00b4c" |
|
} |
|
] |
|
} |