|
{ |
|
"metadata": { |
|
"ParamSize": 451, |
|
"ParamBytes": 65527752704.0, |
|
"BitsPerParam": 11.661296738129801 |
|
}, |
|
"records": [ |
|
{ |
|
"dataPath": "params_shard_0.bin", |
|
"format": "raw-shard", |
|
"nbytes": 1557135360, |
|
"records": [ |
|
{ |
|
"name": "lm_head.weight", |
|
"shape": [ |
|
152064, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1557135360, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4ebab047815dc4a25bb4a12821329a31" |
|
}, |
|
{ |
|
"dataPath": "params_shard_1.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.63.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6142757cb6b949f8f7251628aa53e4f3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_2.bin", |
|
"format": "raw-shard", |
|
"nbytes": 566231040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.63.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
55296, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 566231040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "91faf2badaab41bdb1a119d100ec0e11" |
|
}, |
|
{ |
|
"dataPath": "params_shard_3.bin", |
|
"format": "raw-shard", |
|
"nbytes": 1557135360, |
|
"records": [ |
|
{ |
|
"name": "model.embed_tokens.weight", |
|
"shape": [ |
|
152064, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1557135360, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f32a4192cd8fa57457ae61588672cc91" |
|
}, |
|
{ |
|
"dataPath": "params_shard_4.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "abdab6316e1f0240a5c48f6934b7c803" |
|
}, |
|
{ |
|
"dataPath": "params_shard_5.bin", |
|
"format": "raw-shard", |
|
"nbytes": 566231040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
55296, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 566231040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "382ff2b7c106176ea687a2d36e53743a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_6.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6be1bba2fa49ccb4609c3a1c92c0f894" |
|
}, |
|
{ |
|
"dataPath": "params_shard_7.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9d4716d700845daf9fc4f8c28d19897f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_8.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "770243e89abb4649829aa8fcc9cf0c67" |
|
}, |
|
{ |
|
"dataPath": "params_shard_9.bin", |
|
"format": "raw-shard", |
|
"nbytes": 566231040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
55296, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 566231040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7df3208af16bc2ae8863a9eec77be716" |
|
}, |
|
{ |
|
"dataPath": "params_shard_10.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "91c38c8695518da71cf3e46654085bc6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_11.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e4e948f162104e91f173a206ed0de9b3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_12.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f26d4e6e67bf3d40c2c50ea7678a7e7f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_13.bin", |
|
"format": "raw-shard", |
|
"nbytes": 566231040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
55296, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 566231040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5fee2de2dd83ff1c6a6fdc087b1840a1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_14.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b2060836044ee9f7885bebfe6e3da7a6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_15.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3c6e07eb974be53096b51603dfd06fff" |
|
}, |
|
{ |
|
"dataPath": "params_shard_16.bin", |
|
"format": "raw-shard", |
|
"nbytes": 566231040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
55296, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 566231040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c55ddfe5d12b81d7258b59b94e0bf992" |
|
}, |
|
{ |
|
"dataPath": "params_shard_17.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "785c497d374e811d009dc3d319449270" |
|
}, |
|
{ |
|
"dataPath": "params_shard_18.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "15fbccc5ccb57d0381ba4b0e5a1c651a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_19.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5ca40b4e619ca1a7ac932e679f5e741c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_20.bin", |
|
"format": "raw-shard", |
|
"nbytes": 566231040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
55296, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 566231040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "71b37c8dad20cb0a133533477b0754b7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_21.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a426c453fde1e3336b1eb45f289ab00f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_22.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7107cf91ed4a0af103d08b8e81ae0894" |
|
}, |
|
{ |
|
"dataPath": "params_shard_23.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4653f0d314336ae44d02a078098f93d8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_24.bin", |
|
"format": "raw-shard", |
|
"nbytes": 566231040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
55296, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 566231040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "000807c8c442b1b1696900ca2a5f116b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_25.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "30f96456b2b6876144335b2003d13893" |
|
}, |
|
{ |
|
"dataPath": "params_shard_26.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "854cf5cfe00f7158daf90ded7b8ee20d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_27.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e2a3544e90ab15e2a0f569e90b379053" |
|
}, |
|
{ |
|
"dataPath": "params_shard_28.bin", |
|
"format": "raw-shard", |
|
"nbytes": 566231040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
55296, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 566231040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f1ee861a1dada0ea71da4218f039b68e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_29.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7275fac192b4cad69e5074c69d52aba4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_30.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "23ce9daaf4e621477a729ee020e7518e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_31.bin", |
|
"format": "raw-shard", |
|
"nbytes": 566231040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
55296, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 566231040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "12b8bec623afd3341bae1faa676e02ba" |
|
}, |
|
{ |
|
"dataPath": "params_shard_32.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "610d70642569d95aa85f1492577ab4d7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_33.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "69ea0bfc4416eb502764acf912d8a11c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_34.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b8bce5b75c42ce1824ec9ea8057fcafa" |
|
}, |
|
{ |
|
"dataPath": "params_shard_35.bin", |
|
"format": "raw-shard", |
|
"nbytes": 566231040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
55296, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 566231040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9770dcf5bb59855d7fdd0ed4b27a8882" |
|
}, |
|
{ |
|
"dataPath": "params_shard_36.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "db41009da66906ca62c42ca9f24119bf" |
|
}, |
|
{ |
|
"dataPath": "params_shard_37.bin", |
|
"format": "raw-shard", |
|
"nbytes": 566231040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
55296, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 566231040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "03e307e54107da0846ac526958e5665b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_38.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "74e5234f7e3c3131ef4ceb18b8623b7f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_39.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5da39242e00ec8f0c8d12754d058e72b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_40.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b91e173eeffccfd061983aa4e32251e5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_41.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "445be9c217cfc3311c2767fba30f483a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_42.bin", |
|
"format": "raw-shard", |
|
"nbytes": 566231040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
55296, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 566231040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7fd10b234863a2a3f5f5d6f55c01e2b6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_43.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e2d5c6b716ad99228ca267bb77ff0dfe" |
|
}, |
|
{ |
|
"dataPath": "params_shard_44.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0bac228d75f853237b101da4e6ebb03e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_45.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ebcb979a17b06e6170bb7b732b0aae8c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_46.bin", |
|
"format": "raw-shard", |
|
"nbytes": 566231040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
55296, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 566231040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "69319ee3bf80711c82664076292f1599" |
|
}, |
|
{ |
|
"dataPath": "params_shard_47.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e30d834c2f15c74ed98dd2015fd29e19" |
|
}, |
|
{ |
|
"dataPath": "params_shard_48.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c63feb59b8feb980e602a40284fcf693" |
|
}, |
|
{ |
|
"dataPath": "params_shard_49.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1b3029721555a18fd9a0fdfb8eb1b1db" |
|
}, |
|
{ |
|
"dataPath": "params_shard_50.bin", |
|
"format": "raw-shard", |
|
"nbytes": 566231040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
55296, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 566231040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "454d8125c44eff2155573a21fc32afb5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_51.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "94c7b7d4cb975f169f88759ffd10c7e0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_52.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "69580377f1181157d97e210dc9df708e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_53.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f6ad7015f508cfa1c51ba48c188b42d8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_54.bin", |
|
"format": "raw-shard", |
|
"nbytes": 566231040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
55296, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 566231040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e2904f05ca0bd62197a1f0a4c8ad221d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_55.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3b59cb145359a5f3c1360f84c26091cc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_56.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1469d93d763ae166c9161b9e611d736a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_57.bin", |
|
"format": "raw-shard", |
|
"nbytes": 566231040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
55296, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 566231040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "16e7595d9738fb616834867bfce50650" |
|
}, |
|
{ |
|
"dataPath": "params_shard_58.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d50a01c6c39712c5f5689e2e12ce56d8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_59.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c470551d31f13b004b8df0fadf7b1082" |
|
}, |
|
{ |
|
"dataPath": "params_shard_60.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "546cebe8240473b9067d77b1e73f0d6a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_61.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9bb8a20a5e2c8a98860fc65324540c94" |
|
}, |
|
{ |
|
"dataPath": "params_shard_62.bin", |
|
"format": "raw-shard", |
|
"nbytes": 566231040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
55296, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 566231040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "45fc87f148f3b4ae7b6b75586da37025" |
|
}, |
|
{ |
|
"dataPath": "params_shard_63.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1b4c0bf675a99c0cd0e8368815548ff3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_64.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6ca521f51ba3d708113d817b95922aa6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_65.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d38948c44febbb26a677b15dc8b691bb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_66.bin", |
|
"format": "raw-shard", |
|
"nbytes": 566231040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
55296, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 566231040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c5ada97fad88dd5e5f6159b02e4af9f2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_67.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "188b8079cd8fa755f24c1126916c9fdc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_68.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "26c1787fa658e73f3ff29e62da4ef5e3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_69.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "05d78a9452b47c2282e3094f6808cb20" |
|
}, |
|
{ |
|
"dataPath": "params_shard_70.bin", |
|
"format": "raw-shard", |
|
"nbytes": 566231040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
55296, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 566231040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fc9a874e7cf6a81b1af4240b007cfa66" |
|
}, |
|
{ |
|
"dataPath": "params_shard_71.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "12be63e3ffbcb107d6a4321bbe471158" |
|
}, |
|
{ |
|
"dataPath": "params_shard_72.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "edd51b2f71391d694d08ac588fa0fe4b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_73.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8d1ea58909ab251bdaa5e6a47e9e5e7c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_74.bin", |
|
"format": "raw-shard", |
|
"nbytes": 566231040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
55296, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 566231040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b2cbfb366f7d92fad6c5842e208b5e10" |
|
}, |
|
{ |
|
"dataPath": "params_shard_75.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c621d64dd47ae8906bc02ae03151cb7f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_76.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4952d4c7d963ba963c88b67ef35fdeb9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_77.bin", |
|
"format": "raw-shard", |
|
"nbytes": 566231040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
55296, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 566231040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c18ceafb27e183c674d42854be7a6f8b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_78.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5cfd2120d0c42e087ea7ff446ef26e76" |
|
}, |
|
{ |
|
"dataPath": "params_shard_79.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5c38216022eda052326434dd748b132f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_80.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5c8a9ba9e81fc11cc4d58ec9ecf10c27" |
|
}, |
|
{ |
|
"dataPath": "params_shard_81.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fcc2b896d9e6be08ec775d4d47321f31" |
|
}, |
|
{ |
|
"dataPath": "params_shard_82.bin", |
|
"format": "raw-shard", |
|
"nbytes": 566231040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
55296, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 566231040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e729b04b35469a70f432074077221419" |
|
}, |
|
{ |
|
"dataPath": "params_shard_83.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e0d4177b685dd557f9ab6ac55613fcf2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_84.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "52c07274a578a32208fdec9bf74d588f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_85.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f74d437d00f777b0b33e70da165fcd6a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_86.bin", |
|
"format": "raw-shard", |
|
"nbytes": 566231040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
55296, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 566231040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5ece0f6862e393595eeb2196e42f4069" |
|
}, |
|
{ |
|
"dataPath": "params_shard_87.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7114cf51bfdf05b0b92524ecb3aee10d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_88.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fac02bf0dcb7572fb443b0d256d7cc56" |
|
}, |
|
{ |
|
"dataPath": "params_shard_89.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7972c8c587aeb14e4fbfdc9e32016f21" |
|
}, |
|
{ |
|
"dataPath": "params_shard_90.bin", |
|
"format": "raw-shard", |
|
"nbytes": 566231040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
55296, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 566231040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "05d5c2ca8ff615dd0981e44bed495d15" |
|
}, |
|
{ |
|
"dataPath": "params_shard_91.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ee72627e8c3ba622691ebc1f59dee36a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_92.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d471b5765e36a9b3e253cfd0b6260e7a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_93.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4bbfd95cd338662510f285ae6e846af5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_94.bin", |
|
"format": "raw-shard", |
|
"nbytes": 566231040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
55296, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 566231040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "69f4848aef2979d706382b3c3e34d429" |
|
}, |
|
{ |
|
"dataPath": "params_shard_95.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3c4d4dcbf6e321a0647be13366a86075" |
|
}, |
|
{ |
|
"dataPath": "params_shard_96.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "896a8920caaa46fdccd8b3c2952e1e3e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_97.bin", |
|
"format": "raw-shard", |
|
"nbytes": 566231040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
55296, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 566231040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "29e636180a9cadfb645c3ff141152e4c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_98.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "68543852537255de0f59460b3b5b320a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_99.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cc0e894779991be6e3c1f82c31681a55" |
|
}, |
|
{ |
|
"dataPath": "params_shard_100.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "08321ef408d9badb99c9045c1683b9d7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_101.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "135a27d3a9626df0a13d070a6104e36c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_102.bin", |
|
"format": "raw-shard", |
|
"nbytes": 566231040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
55296, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 566231040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d4a4158c02ad72cca6541654cc8960a7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_103.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "47784749f7911a10c61a7d19b0bce23a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_104.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "583dc8ed6fb3b9f12eb8a157e6a31888" |
|
}, |
|
{ |
|
"dataPath": "params_shard_105.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "01d82109e2b053392c58c112837aa3e1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_106.bin", |
|
"format": "raw-shard", |
|
"nbytes": 566231040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
55296, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 566231040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d5c20b92279f030e665f87498574c552" |
|
}, |
|
{ |
|
"dataPath": "params_shard_107.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4616c54b751908ccb1ac86edaf98eb11" |
|
}, |
|
{ |
|
"dataPath": "params_shard_108.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b0580b4c326008dd79b4ed2a69b49eef" |
|
}, |
|
{ |
|
"dataPath": "params_shard_109.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "076e5da95262790a5d3f1426bfb6cb1d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_110.bin", |
|
"format": "raw-shard", |
|
"nbytes": 566231040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
55296, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 566231040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d2f6104d4972749df63e01bbc8fa4626" |
|
}, |
|
{ |
|
"dataPath": "params_shard_111.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1ed0af9be86b3f82693a098876ec4054" |
|
}, |
|
{ |
|
"dataPath": "params_shard_112.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f1731442dbf7b6836eea60e7e5341c74" |
|
}, |
|
{ |
|
"dataPath": "params_shard_113.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.32.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8a3a2769015eeda747c6dab404627649" |
|
}, |
|
{ |
|
"dataPath": "params_shard_114.bin", |
|
"format": "raw-shard", |
|
"nbytes": 566231040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.32.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
55296, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 566231040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a03f74a04f9262079f9d4a494589d4f9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_115.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.32.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f75c890b4feee824e3c706fec9e319ce" |
|
}, |
|
{ |
|
"dataPath": "params_shard_116.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.32.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d5546f2cd028c025073bb0c62beb07ba" |
|
}, |
|
{ |
|
"dataPath": "params_shard_117.bin", |
|
"format": "raw-shard", |
|
"nbytes": 566231040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.33.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
55296, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 566231040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ab957e4cb6263347f7d3c207e3427c62" |
|
}, |
|
{ |
|
"dataPath": "params_shard_118.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.33.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "75f63149319fd2dee3bc4b1fa2fa9e00" |
|
}, |
|
{ |
|
"dataPath": "params_shard_119.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.33.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3fcc1802ee292e02a7af6e0768afc3ee" |
|
}, |
|
{ |
|
"dataPath": "params_shard_120.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ec6f8aa47f6fc1e95c15f411a99ff467" |
|
}, |
|
{ |
|
"dataPath": "params_shard_121.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "93b2e92475ecb2964c181b8c522e072d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_122.bin", |
|
"format": "raw-shard", |
|
"nbytes": 566231040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
55296, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 566231040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5c6b90cce7bcb06f02e5b66eb5d0cb4b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_123.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "604da35f9395a3d2d46afc1de505cffc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_124.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "58463736c560a47373eaf9600bfdb157" |
|
}, |
|
{ |
|
"dataPath": "params_shard_125.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7a48ef08092fc550e5bda22fde150eb1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_126.bin", |
|
"format": "raw-shard", |
|
"nbytes": 566231040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
55296, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 566231040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6f19fe3de5ac81e68742b58d8495a2d2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_127.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f7d6a10c272a5df1b280352813835523" |
|
}, |
|
{ |
|
"dataPath": "params_shard_128.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3ca029383ab487142802f4042c7cf851" |
|
}, |
|
{ |
|
"dataPath": "params_shard_129.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7e98d663037efc91cd25ea1bda673a4a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_130.bin", |
|
"format": "raw-shard", |
|
"nbytes": 566231040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
55296, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 566231040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "45ffdc357bf418534543301eef9b3614" |
|
}, |
|
{ |
|
"dataPath": "params_shard_131.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6b6e399c0b6c67e00bd528b16e12b4ba" |
|
}, |
|
{ |
|
"dataPath": "params_shard_132.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ea1ef05775ff2fc9eb91f2c32de3a145" |
|
}, |
|
{ |
|
"dataPath": "params_shard_133.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f797387bcca0e89d5a12ab5e6569a9d1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_134.bin", |
|
"format": "raw-shard", |
|
"nbytes": 566231040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
55296, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 566231040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b9e9731eee6323be581ed42eb04eec4b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_135.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1f10cadffe8744c670137d0bd8cba5c8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_136.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "58032f77710c05aa28ebc0922a055641" |
|
}, |
|
{ |
|
"dataPath": "params_shard_137.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "73d25a7990b064d75d3940f8c828da7a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_138.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ea0b36a56939d2eded2d2ece611bc8fe" |
|
}, |
|
{ |
|
"dataPath": "params_shard_139.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.33.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "468de7fe1d77509417133e0d49d1c042" |
|
}, |
|
{ |
|
"dataPath": "params_shard_140.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.34.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3d5600b51ab16792c6a937f4f641ba25" |
|
}, |
|
{ |
|
"dataPath": "params_shard_141.bin", |
|
"format": "raw-shard", |
|
"nbytes": 566231040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.34.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
55296, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 566231040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "132779a872e73947380f72cf48fc06af" |
|
}, |
|
{ |
|
"dataPath": "params_shard_142.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.34.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b528191419d940cf7fe619d5088ed136" |
|
}, |
|
{ |
|
"dataPath": "params_shard_143.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.34.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2dd22a63aef81eb91ab5dd0e3335e202" |
|
}, |
|
{ |
|
"dataPath": "params_shard_144.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.35.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0d986048289c70a931cad927ffd7078c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_145.bin", |
|
"format": "raw-shard", |
|
"nbytes": 566231040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.35.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
55296, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 566231040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c2d37cb2bd7eefe6ac17b98e0c990d20" |
|
}, |
|
{ |
|
"dataPath": "params_shard_146.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.35.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2192ca6c2f5cc3023848c9a6576a1247" |
|
}, |
|
{ |
|
"dataPath": "params_shard_147.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.35.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "869b0c193e7916866f27f663ba3e9660" |
|
}, |
|
{ |
|
"dataPath": "params_shard_148.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.36.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9adbee946b8f33957ea9cfaf7a53be9f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_149.bin", |
|
"format": "raw-shard", |
|
"nbytes": 566231040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.36.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
55296, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 566231040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ec7d72a81fdde034f3b413b16de40431" |
|
}, |
|
{ |
|
"dataPath": "params_shard_150.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.36.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a49d10483400832264db2a92069e6e62" |
|
}, |
|
{ |
|
"dataPath": "params_shard_151.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.36.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2592c76625234644b3964b28dc488f97" |
|
}, |
|
{ |
|
"dataPath": "params_shard_152.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.37.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e7f2978264a91a83c0ff74573f61edb9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_153.bin", |
|
"format": "raw-shard", |
|
"nbytes": 566231040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.37.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
55296, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 566231040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "af23706921ecca51196c2c13c0e7c28d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_154.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.37.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5d86f0c588d4dd7bf4a123611e937b93" |
|
}, |
|
{ |
|
"dataPath": "params_shard_155.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.37.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f620578b0a699d4d82592520f1f52950" |
|
}, |
|
{ |
|
"dataPath": "params_shard_156.bin", |
|
"format": "raw-shard", |
|
"nbytes": 566231040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.38.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
55296, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 566231040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "842314b084d3dd62d928ded98968e224" |
|
}, |
|
{ |
|
"dataPath": "params_shard_157.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.38.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1dee23de3fa38e9aa1eedbde4a11fb46" |
|
}, |
|
{ |
|
"dataPath": "params_shard_158.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.38.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "59d1abc636058cb4135de1a0dbdab0ca" |
|
}, |
|
{ |
|
"dataPath": "params_shard_159.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.38.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3a282bceb506182c3e9ae3fc7ff44a4e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_160.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.39.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c2cb53a6d26354ccc2c0c9773d3c3e26" |
|
}, |
|
{ |
|
"dataPath": "params_shard_161.bin", |
|
"format": "raw-shard", |
|
"nbytes": 566231040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.39.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
55296, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 566231040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "dc27334ca214721382370a88c5a9affc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_162.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.39.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b2be639cdca8d9d084145e185c95d7b0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_163.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.39.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9492eba46a87dc48fae76324e919ff55" |
|
}, |
|
{ |
|
"dataPath": "params_shard_164.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.40.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4dc90d2c33953941d6a7fe2ec2cfd7e1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_165.bin", |
|
"format": "raw-shard", |
|
"nbytes": 566231040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.40.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
55296, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 566231040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "586efb33c244d2f305a84c0ebe6d7c32" |
|
}, |
|
{ |
|
"dataPath": "params_shard_166.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.40.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4eb6bf4f0a27244e3239739a29e8bf53" |
|
}, |
|
{ |
|
"dataPath": "params_shard_167.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.40.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "538d6c66016a52cbf34dd186dd1bbfa4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_168.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.41.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e326d4b86bd1443ef3e68d08a3346a3e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_169.bin", |
|
"format": "raw-shard", |
|
"nbytes": 566231040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.41.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
55296, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 566231040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b32f0d441ffc3b46bd09bb3d4650800f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_170.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.41.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ecca691dbbca4299299804138aacbe17" |
|
}, |
|
{ |
|
"dataPath": "params_shard_171.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.41.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ffebf087642c164f8be9aaef00589f47" |
|
}, |
|
{ |
|
"dataPath": "params_shard_172.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.42.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bb48ee3a23058d83cb8b8ccbc56bba25" |
|
}, |
|
{ |
|
"dataPath": "params_shard_173.bin", |
|
"format": "raw-shard", |
|
"nbytes": 566231040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.42.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
55296, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 566231040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "388345b13cd50c0200f13de5a43e3e9a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_174.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.42.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e9b75d26352670a40f87247ee820dafc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_175.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.42.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ecfd9294bd47734f2902651909c6ab09" |
|
}, |
|
{ |
|
"dataPath": "params_shard_176.bin", |
|
"format": "raw-shard", |
|
"nbytes": 566231040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.43.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
55296, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 566231040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5464b8e45a4b216e23a8e599a6246f00" |
|
}, |
|
{ |
|
"dataPath": "params_shard_177.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.43.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9595dbb3f9a60cc90539eabcabdcb914" |
|
}, |
|
{ |
|
"dataPath": "params_shard_178.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.43.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8a63ce0ac904e74a0f732c069b763f16" |
|
}, |
|
{ |
|
"dataPath": "params_shard_179.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.43.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f310e2ff59128e9c169fc020668533dd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_180.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.44.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4dbeedde5619d1fb6d11effdf24fde83" |
|
}, |
|
{ |
|
"dataPath": "params_shard_181.bin", |
|
"format": "raw-shard", |
|
"nbytes": 566231040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.44.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
55296, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 566231040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4807437c8a9bed455068e281c4aeb899" |
|
}, |
|
{ |
|
"dataPath": "params_shard_182.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.44.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b1f9e05f9a849f05fdf91e05f34a85a2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_183.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.44.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "778870337876ee649f0f02ea787a5ea4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_184.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.45.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "64e3c421234fb6370434664d0e8ac2df" |
|
}, |
|
{ |
|
"dataPath": "params_shard_185.bin", |
|
"format": "raw-shard", |
|
"nbytes": 566231040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.45.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
55296, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 566231040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "08cbd0bd9dc093fba6be11b6e211dd22" |
|
}, |
|
{ |
|
"dataPath": "params_shard_186.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.45.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d7b3d79282204719afaef5ad03fa5cc3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_187.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.45.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7589121c42cfea25580df5cb35dbe3f5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_188.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.46.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fac93ee37d4966f8f7bed7baa0d77628" |
|
}, |
|
{ |
|
"dataPath": "params_shard_189.bin", |
|
"format": "raw-shard", |
|
"nbytes": 566231040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.46.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
55296, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 566231040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "87b7da44649bc6767a3323d17c3846ff" |
|
}, |
|
{ |
|
"dataPath": "params_shard_190.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.46.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "60516058dd9cf879b9ca8cda82223623" |
|
}, |
|
{ |
|
"dataPath": "params_shard_191.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.46.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "19807242e41bd91bbf3eec7191427a84" |
|
}, |
|
{ |
|
"dataPath": "params_shard_192.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.47.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "39603864d378d3039e8bd56b96b9ba3c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_193.bin", |
|
"format": "raw-shard", |
|
"nbytes": 566231040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.47.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
55296, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 566231040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "19729fd254d74a6d6ebe7cccfb537607" |
|
}, |
|
{ |
|
"dataPath": "params_shard_194.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.47.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "32ef9c6d917e76bea36db52e89ca0e3a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_195.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.47.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c8c55042629e486107c3f5147beab130" |
|
}, |
|
{ |
|
"dataPath": "params_shard_196.bin", |
|
"format": "raw-shard", |
|
"nbytes": 566231040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.48.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
55296, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 566231040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7082a8710212efa0919c70c5cd85e36b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_197.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.48.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6ce012223592455e085628da5152a287" |
|
}, |
|
{ |
|
"dataPath": "params_shard_198.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.48.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5dfc357b6cbfac9e9931f26a7cec3653" |
|
}, |
|
{ |
|
"dataPath": "params_shard_199.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.48.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c39efd7684406799a10dc3b5cf43acc9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_200.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.49.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d590f2d9de53027aafbfbb438ff5d0de" |
|
}, |
|
{ |
|
"dataPath": "params_shard_201.bin", |
|
"format": "raw-shard", |
|
"nbytes": 566231040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.49.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
55296, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 566231040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "67f93d0184da1a9eee0988388e56b388" |
|
}, |
|
{ |
|
"dataPath": "params_shard_202.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.49.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e112a3cd9c9f15178d698fa982a9212e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_203.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.49.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8a5c44176e9d36a912e3eef33394d5f8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_204.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.50.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "eb1f92c2f4d2df62e0e25ef202f6a71b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_205.bin", |
|
"format": "raw-shard", |
|
"nbytes": 566231040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.50.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
55296, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 566231040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "25fd1c2497046ff741da909e549a8003" |
|
}, |
|
{ |
|
"dataPath": "params_shard_206.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.50.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a7968fb24c9e1efb26125c3cb1157687" |
|
}, |
|
{ |
|
"dataPath": "params_shard_207.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.50.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c90952a99572d8ea4c966f680ce5ea3a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_208.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.51.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3506ba9a7341df82ccebaa702e3e31b4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_209.bin", |
|
"format": "raw-shard", |
|
"nbytes": 566231040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.51.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
55296, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 566231040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bb4cb4411f9699ba9edd42a14fe63dcc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_210.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.51.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3d844ee9ead864f8e7ba3f7d79a2b067" |
|
}, |
|
{ |
|
"dataPath": "params_shard_211.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.51.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c5bd9fd77f1ee2a466538f6cccc4abb3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_212.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.52.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0a07afe99c02acc6623314f98e0e7042" |
|
}, |
|
{ |
|
"dataPath": "params_shard_213.bin", |
|
"format": "raw-shard", |
|
"nbytes": 566231040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.52.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
55296, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 566231040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "727ded046641d0d7291ae1f6dd7cca15" |
|
}, |
|
{ |
|
"dataPath": "params_shard_214.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.52.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d2a6b8e3dab3fdaa3c3b94682ad971d8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_215.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.52.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e69b713c7ab4cb9a0804aa5d8512a944" |
|
}, |
|
{ |
|
"dataPath": "params_shard_216.bin", |
|
"format": "raw-shard", |
|
"nbytes": 566231040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.53.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
55296, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 566231040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7ec40c8bf46499301bba07fb92ce64f6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_217.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.53.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "458c5049fb5a98958bc2789fd8a4661b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_218.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.53.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "846c92564124f3de0da9c1fe9c222c9a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_219.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.53.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5a13bbc57d9fb9cedd7f8d58ef997cd3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_220.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.54.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "08910347e75d55743bfba18270d4da5f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_221.bin", |
|
"format": "raw-shard", |
|
"nbytes": 566231040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.54.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
55296, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 566231040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "94145c2ab896c5e42bdfa716a60d20bf" |
|
}, |
|
{ |
|
"dataPath": "params_shard_222.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.54.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "062b832f222716190f719186bed31161" |
|
}, |
|
{ |
|
"dataPath": "params_shard_223.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.54.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c45f9599200903b95b612ea2930ffd79" |
|
}, |
|
{ |
|
"dataPath": "params_shard_224.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.55.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c4b10136d11eef3df3941315dd57eb86" |
|
}, |
|
{ |
|
"dataPath": "params_shard_225.bin", |
|
"format": "raw-shard", |
|
"nbytes": 566231040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.55.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
55296, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 566231040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b3a7f6c44bfaf5bd11100daa3935cf91" |
|
}, |
|
{ |
|
"dataPath": "params_shard_226.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.55.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "734d1983c20928b5c14074faa06b0eaf" |
|
}, |
|
{ |
|
"dataPath": "params_shard_227.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.55.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "418af5d0f9eb4c38d241baca02f4f1a5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_228.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.56.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c342fc790357827dfc56071aad6d0b11" |
|
}, |
|
{ |
|
"dataPath": "params_shard_229.bin", |
|
"format": "raw-shard", |
|
"nbytes": 566231040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.56.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
55296, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 566231040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b9f762474812f9ab39c37be73882ae17" |
|
}, |
|
{ |
|
"dataPath": "params_shard_230.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.56.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a33eeba8c40e0d6d0d14fc9f5f3180a9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_231.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.56.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8da40d4c433458bb42e523c8ebc0febf" |
|
}, |
|
{ |
|
"dataPath": "params_shard_232.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.57.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cfbd4d64c9591a96c1f791484331d9c5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_233.bin", |
|
"format": "raw-shard", |
|
"nbytes": 566231040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.57.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
55296, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 566231040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6501aad225757bd2858110ee07fa6d1d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_234.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.57.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "134d4486a8a90d47c5b84628347e7cc8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_235.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.57.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "490e63dd84e3e19fc59ef6b4fea67900" |
|
}, |
|
{ |
|
"dataPath": "params_shard_236.bin", |
|
"format": "raw-shard", |
|
"nbytes": 566231040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.58.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
55296, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 566231040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "82d1aafa5fd2c2cde3816dc1cf158b5d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_237.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.58.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "81eaa978116ae4062ab46264a84fca05" |
|
}, |
|
{ |
|
"dataPath": "params_shard_238.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.58.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b3e348383eb3dc470f594bd0f7ef9164" |
|
}, |
|
{ |
|
"dataPath": "params_shard_239.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.58.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d14fe54e17c9c10414b9f1e9bd1f525e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_240.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.59.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f308449b6c35588f8f4bf69d05f7d4cf" |
|
}, |
|
{ |
|
"dataPath": "params_shard_241.bin", |
|
"format": "raw-shard", |
|
"nbytes": 566231040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.59.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
55296, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 566231040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1ce849d2b66bb5fbf05b17f37836041c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_242.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.59.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "988d3abb58e781a668859bcd8be3e0c6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_243.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.59.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "848c8c3f55ca6846f5c19919b2d3c73e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_244.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.60.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4011187c2b3c9702ec28b7c22d500325" |
|
}, |
|
{ |
|
"dataPath": "params_shard_245.bin", |
|
"format": "raw-shard", |
|
"nbytes": 566231040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.60.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
55296, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 566231040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1602243caa912687bd3a27f9ebfea73e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_246.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.60.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1ba75f1772670be818f3941927a26f74" |
|
}, |
|
{ |
|
"dataPath": "params_shard_247.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.60.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ee8d0b6f2a0c66c627f33e747692139d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_248.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.61.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b4bf9f0a00431e29f0a0959bf57047e6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_249.bin", |
|
"format": "raw-shard", |
|
"nbytes": 566231040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.61.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
55296, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 566231040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4ac1f798741e90add03065eb1803b681" |
|
}, |
|
{ |
|
"dataPath": "params_shard_250.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.61.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ef70bcd2d50ae806e11b4f1852e0fa41" |
|
}, |
|
{ |
|
"dataPath": "params_shard_251.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.61.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5afd078c2166291b9e066a8240c50b60" |
|
}, |
|
{ |
|
"dataPath": "params_shard_252.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.62.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
27648 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "78a19235c7baf9cac6a8da2cb2937a4f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_253.bin", |
|
"format": "raw-shard", |
|
"nbytes": 566231040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.62.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
55296, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 566231040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0d78ce4c346fc113a819b37d2eafe4d3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_254.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.62.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cfacd9675e9fae450c86cdb490ed189f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_255.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.62.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e1eb88ea9c0dc29197fd966114235f10" |
|
}, |
|
{ |
|
"dataPath": "params_shard_256.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.63.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "193101b6973c37967f9be0ced8cce1a6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_257.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.63.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ecfeee3d0547a155fe21b45f886697f4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_258.bin", |
|
"format": "raw-shard", |
|
"nbytes": 2238464, |
|
"records": [ |
|
{ |
|
"name": "model.layers.63.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.63.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 10240 |
|
}, |
|
{ |
|
"name": "model.norm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 20480 |
|
}, |
|
{ |
|
"name": "model.layers.0.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 30720 |
|
}, |
|
{ |
|
"name": "model.layers.0.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 40960 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 51200 |
|
}, |
|
{ |
|
"name": "model.layers.1.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 65536 |
|
}, |
|
{ |
|
"name": "model.layers.1.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 75776 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 86016 |
|
}, |
|
{ |
|
"name": "model.layers.2.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 100352 |
|
}, |
|
{ |
|
"name": "model.layers.2.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 110592 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 120832 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 135168 |
|
}, |
|
{ |
|
"name": "model.layers.10.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 149504 |
|
}, |
|
{ |
|
"name": "model.layers.10.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 159744 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 169984 |
|
}, |
|
{ |
|
"name": "model.layers.11.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 184320 |
|
}, |
|
{ |
|
"name": "model.layers.11.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 194560 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 204800 |
|
}, |
|
{ |
|
"name": "model.layers.12.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 219136 |
|
}, |
|
{ |
|
"name": "model.layers.12.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 229376 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 239616 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 253952 |
|
}, |
|
{ |
|
"name": "model.layers.8.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 268288 |
|
}, |
|
{ |
|
"name": "model.layers.8.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 278528 |
|
}, |
|
{ |
|
"name": "model.layers.9.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 288768 |
|
}, |
|
{ |
|
"name": "model.layers.9.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 299008 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 309248 |
|
}, |
|
{ |
|
"name": "model.layers.13.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 323584 |
|
}, |
|
{ |
|
"name": "model.layers.13.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 333824 |
|
}, |
|
{ |
|
"name": "model.layers.14.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 344064 |
|
}, |
|
{ |
|
"name": "model.layers.14.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 354304 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 364544 |
|
}, |
|
{ |
|
"name": "model.layers.15.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 378880 |
|
}, |
|
{ |
|
"name": "model.layers.15.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 389120 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 399360 |
|
}, |
|
{ |
|
"name": "model.layers.16.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 413696 |
|
}, |
|
{ |
|
"name": "model.layers.16.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 423936 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 434176 |
|
}, |
|
{ |
|
"name": "model.layers.17.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 448512 |
|
}, |
|
{ |
|
"name": "model.layers.17.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 458752 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 468992 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 483328 |
|
}, |
|
{ |
|
"name": "model.layers.18.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 497664 |
|
}, |
|
{ |
|
"name": "model.layers.18.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 507904 |
|
}, |
|
{ |
|
"name": "model.layers.19.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 518144 |
|
}, |
|
{ |
|
"name": "model.layers.19.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 528384 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 538624 |
|
}, |
|
{ |
|
"name": "model.layers.20.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 552960 |
|
}, |
|
{ |
|
"name": "model.layers.20.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 563200 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 573440 |
|
}, |
|
{ |
|
"name": "model.layers.21.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 587776 |
|
}, |
|
{ |
|
"name": "model.layers.21.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 598016 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 608256 |
|
}, |
|
{ |
|
"name": "model.layers.22.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 622592 |
|
}, |
|
{ |
|
"name": "model.layers.22.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 632832 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 643072 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 657408 |
|
}, |
|
{ |
|
"name": "model.layers.23.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 671744 |
|
}, |
|
{ |
|
"name": "model.layers.23.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 681984 |
|
}, |
|
{ |
|
"name": "model.layers.24.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 692224 |
|
}, |
|
{ |
|
"name": "model.layers.24.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 702464 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 712704 |
|
}, |
|
{ |
|
"name": "model.layers.25.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 727040 |
|
}, |
|
{ |
|
"name": "model.layers.25.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 737280 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 747520 |
|
}, |
|
{ |
|
"name": "model.layers.26.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 761856 |
|
}, |
|
{ |
|
"name": "model.layers.26.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 772096 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 782336 |
|
}, |
|
{ |
|
"name": "model.layers.27.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 796672 |
|
}, |
|
{ |
|
"name": "model.layers.27.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 806912 |
|
}, |
|
{ |
|
"name": "model.layers.27.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 817152 |
|
}, |
|
{ |
|
"name": "model.layers.28.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 831488 |
|
}, |
|
{ |
|
"name": "model.layers.28.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 845824 |
|
}, |
|
{ |
|
"name": "model.layers.28.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 856064 |
|
}, |
|
{ |
|
"name": "model.layers.29.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 866304 |
|
}, |
|
{ |
|
"name": "model.layers.29.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 876544 |
|
}, |
|
{ |
|
"name": "model.layers.29.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 886784 |
|
}, |
|
{ |
|
"name": "model.layers.30.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 901120 |
|
}, |
|
{ |
|
"name": "model.layers.30.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 911360 |
|
}, |
|
{ |
|
"name": "model.layers.30.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 921600 |
|
}, |
|
{ |
|
"name": "model.layers.31.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 935936 |
|
}, |
|
{ |
|
"name": "model.layers.31.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 946176 |
|
}, |
|
{ |
|
"name": "model.layers.31.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 956416 |
|
}, |
|
{ |
|
"name": "model.layers.32.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 970752 |
|
}, |
|
{ |
|
"name": "model.layers.32.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 980992 |
|
}, |
|
{ |
|
"name": "model.layers.32.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 991232 |
|
}, |
|
{ |
|
"name": "model.layers.33.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 1005568 |
|
}, |
|
{ |
|
"name": "model.layers.3.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1019904 |
|
}, |
|
{ |
|
"name": "model.layers.3.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1030144 |
|
}, |
|
{ |
|
"name": "model.layers.4.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1040384 |
|
}, |
|
{ |
|
"name": "model.layers.4.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1050624 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 1060864 |
|
}, |
|
{ |
|
"name": "model.layers.5.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1075200 |
|
}, |
|
{ |
|
"name": "model.layers.5.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1085440 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 1095680 |
|
}, |
|
{ |
|
"name": "model.layers.6.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1110016 |
|
}, |
|
{ |
|
"name": "model.layers.6.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1120256 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 1130496 |
|
}, |
|
{ |
|
"name": "model.layers.7.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1144832 |
|
}, |
|
{ |
|
"name": "model.layers.7.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1155072 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 1165312 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 1179648 |
|
}, |
|
{ |
|
"name": "model.layers.33.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1193984 |
|
}, |
|
{ |
|
"name": "model.layers.33.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1204224 |
|
}, |
|
{ |
|
"name": "model.layers.34.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1214464 |
|
}, |
|
{ |
|
"name": "model.layers.34.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1224704 |
|
}, |
|
{ |
|
"name": "model.layers.34.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 1234944 |
|
}, |
|
{ |
|
"name": "model.layers.35.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1249280 |
|
}, |
|
{ |
|
"name": "model.layers.35.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1259520 |
|
}, |
|
{ |
|
"name": "model.layers.35.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 1269760 |
|
}, |
|
{ |
|
"name": "model.layers.36.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1284096 |
|
}, |
|
{ |
|
"name": "model.layers.36.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1294336 |
|
}, |
|
{ |
|
"name": "model.layers.36.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 1304576 |
|
}, |
|
{ |
|
"name": "model.layers.37.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1318912 |
|
}, |
|
{ |
|
"name": "model.layers.37.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1329152 |
|
}, |
|
{ |
|
"name": "model.layers.37.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 1339392 |
|
}, |
|
{ |
|
"name": "model.layers.38.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 1353728 |
|
}, |
|
{ |
|
"name": "model.layers.38.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1368064 |
|
}, |
|
{ |
|
"name": "model.layers.38.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1378304 |
|
}, |
|
{ |
|
"name": "model.layers.39.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1388544 |
|
}, |
|
{ |
|
"name": "model.layers.39.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1398784 |
|
}, |
|
{ |
|
"name": "model.layers.39.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 1409024 |
|
}, |
|
{ |
|
"name": "model.layers.40.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1423360 |
|
}, |
|
{ |
|
"name": "model.layers.40.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1433600 |
|
}, |
|
{ |
|
"name": "model.layers.40.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 1443840 |
|
}, |
|
{ |
|
"name": "model.layers.41.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1458176 |
|
}, |
|
{ |
|
"name": "model.layers.41.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1468416 |
|
}, |
|
{ |
|
"name": "model.layers.41.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 1478656 |
|
}, |
|
{ |
|
"name": "model.layers.42.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1492992 |
|
}, |
|
{ |
|
"name": "model.layers.42.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1503232 |
|
}, |
|
{ |
|
"name": "model.layers.42.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 1513472 |
|
}, |
|
{ |
|
"name": "model.layers.43.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 1527808 |
|
}, |
|
{ |
|
"name": "model.layers.43.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1542144 |
|
}, |
|
{ |
|
"name": "model.layers.43.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1552384 |
|
}, |
|
{ |
|
"name": "model.layers.44.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1562624 |
|
}, |
|
{ |
|
"name": "model.layers.44.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1572864 |
|
}, |
|
{ |
|
"name": "model.layers.44.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 1583104 |
|
}, |
|
{ |
|
"name": "model.layers.45.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1597440 |
|
}, |
|
{ |
|
"name": "model.layers.45.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1607680 |
|
}, |
|
{ |
|
"name": "model.layers.45.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 1617920 |
|
}, |
|
{ |
|
"name": "model.layers.46.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1632256 |
|
}, |
|
{ |
|
"name": "model.layers.46.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1642496 |
|
}, |
|
{ |
|
"name": "model.layers.46.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 1652736 |
|
}, |
|
{ |
|
"name": "model.layers.47.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1667072 |
|
}, |
|
{ |
|
"name": "model.layers.47.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1677312 |
|
}, |
|
{ |
|
"name": "model.layers.47.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 1687552 |
|
}, |
|
{ |
|
"name": "model.layers.48.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 1701888 |
|
}, |
|
{ |
|
"name": "model.layers.48.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1716224 |
|
}, |
|
{ |
|
"name": "model.layers.48.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1726464 |
|
}, |
|
{ |
|
"name": "model.layers.49.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1736704 |
|
}, |
|
{ |
|
"name": "model.layers.49.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1746944 |
|
}, |
|
{ |
|
"name": "model.layers.49.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 1757184 |
|
}, |
|
{ |
|
"name": "model.layers.50.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1771520 |
|
}, |
|
{ |
|
"name": "model.layers.50.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1781760 |
|
}, |
|
{ |
|
"name": "model.layers.50.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 1792000 |
|
}, |
|
{ |
|
"name": "model.layers.51.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1806336 |
|
}, |
|
{ |
|
"name": "model.layers.51.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1816576 |
|
}, |
|
{ |
|
"name": "model.layers.51.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 1826816 |
|
}, |
|
{ |
|
"name": "model.layers.52.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1841152 |
|
}, |
|
{ |
|
"name": "model.layers.52.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1851392 |
|
}, |
|
{ |
|
"name": "model.layers.52.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 1861632 |
|
}, |
|
{ |
|
"name": "model.layers.53.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 1875968 |
|
}, |
|
{ |
|
"name": "model.layers.53.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1890304 |
|
}, |
|
{ |
|
"name": "model.layers.53.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1900544 |
|
}, |
|
{ |
|
"name": "model.layers.54.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1910784 |
|
}, |
|
{ |
|
"name": "model.layers.54.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1921024 |
|
}, |
|
{ |
|
"name": "model.layers.54.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 1931264 |
|
}, |
|
{ |
|
"name": "model.layers.55.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1945600 |
|
}, |
|
{ |
|
"name": "model.layers.55.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1955840 |
|
}, |
|
{ |
|
"name": "model.layers.55.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 1966080 |
|
}, |
|
{ |
|
"name": "model.layers.56.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1980416 |
|
}, |
|
{ |
|
"name": "model.layers.56.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1990656 |
|
}, |
|
{ |
|
"name": "model.layers.56.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 2000896 |
|
}, |
|
{ |
|
"name": "model.layers.57.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 2015232 |
|
}, |
|
{ |
|
"name": "model.layers.57.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 2025472 |
|
}, |
|
{ |
|
"name": "model.layers.57.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 2035712 |
|
}, |
|
{ |
|
"name": "model.layers.58.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 2050048 |
|
}, |
|
{ |
|
"name": "model.layers.58.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 2064384 |
|
}, |
|
{ |
|
"name": "model.layers.58.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 2074624 |
|
}, |
|
{ |
|
"name": "model.layers.59.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 2084864 |
|
}, |
|
{ |
|
"name": "model.layers.59.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 2095104 |
|
}, |
|
{ |
|
"name": "model.layers.59.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 2105344 |
|
}, |
|
{ |
|
"name": "model.layers.60.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 2119680 |
|
}, |
|
{ |
|
"name": "model.layers.60.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 2129920 |
|
}, |
|
{ |
|
"name": "model.layers.60.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 2140160 |
|
}, |
|
{ |
|
"name": "model.layers.61.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 2154496 |
|
}, |
|
{ |
|
"name": "model.layers.61.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 2164736 |
|
}, |
|
{ |
|
"name": "model.layers.61.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 2174976 |
|
}, |
|
{ |
|
"name": "model.layers.62.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 2189312 |
|
}, |
|
{ |
|
"name": "model.layers.62.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 2199552 |
|
}, |
|
{ |
|
"name": "model.layers.62.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 2209792 |
|
}, |
|
{ |
|
"name": "model.layers.63.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 2224128 |
|
} |
|
], |
|
"md5sum": "b0f5a80d026b3517cfd2a7df2fe6b1bf" |
|
} |
|
] |
|
} |