CharlieFRuan's picture
Upload folder using huggingface_hub
167e4e4 verified
{
"metadata": {
"ParamSize": 451,
"ParamBytes": 65527752704.0,
"BitsPerParam": 11.661296738129801
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 1557135360,
"records": [
{
"name": "lm_head.weight",
"shape": [
152064,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1557135360,
"byteOffset": 0
}
],
"md5sum": "4ebab047815dc4a25bb4a12821329a31"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.63.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "6142757cb6b949f8f7251628aa53e4f3"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.63.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "91faf2badaab41bdb1a119d100ec0e11"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 1557135360,
"records": [
{
"name": "model.embed_tokens.weight",
"shape": [
152064,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1557135360,
"byteOffset": 0
}
],
"md5sum": "f32a4192cd8fa57457ae61588672cc91"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.0.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "abdab6316e1f0240a5c48f6934b7c803"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "382ff2b7c106176ea687a2d36e53743a"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.0.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "6be1bba2fa49ccb4609c3a1c92c0f894"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.0.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "9d4716d700845daf9fc4f8c28d19897f"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.1.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "770243e89abb4649829aa8fcc9cf0c67"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "7df3208af16bc2ae8863a9eec77be716"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.1.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "91c38c8695518da71cf3e46654085bc6"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.1.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "e4e948f162104e91f173a206ed0de9b3"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.2.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "f26d4e6e67bf3d40c2c50ea7678a7e7f"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "5fee2de2dd83ff1c6a6fdc087b1840a1"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.2.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "b2060836044ee9f7885bebfe6e3da7a6"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.2.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "3c6e07eb974be53096b51603dfd06fff"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "c55ddfe5d12b81d7258b59b94e0bf992"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.3.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "785c497d374e811d009dc3d319449270"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.3.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "15fbccc5ccb57d0381ba4b0e5a1c651a"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.10.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "5ca40b4e619ca1a7ac932e679f5e741c"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "71b37c8dad20cb0a133533477b0754b7"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.10.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "a426c453fde1e3336b1eb45f289ab00f"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.10.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "7107cf91ed4a0af103d08b8e81ae0894"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.11.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "4653f0d314336ae44d02a078098f93d8"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "000807c8c442b1b1696900ca2a5f116b"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.11.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "30f96456b2b6876144335b2003d13893"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.11.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "854cf5cfe00f7158daf90ded7b8ee20d"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.12.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "e2a3544e90ab15e2a0f569e90b379053"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "f1ee861a1dada0ea71da4218f039b68e"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.12.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "7275fac192b4cad69e5074c69d52aba4"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.12.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "23ce9daaf4e621477a729ee020e7518e"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "12b8bec623afd3341bae1faa676e02ba"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.13.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "610d70642569d95aa85f1492577ab4d7"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.13.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "69ea0bfc4416eb502764acf912d8a11c"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.8.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "b8bce5b75c42ce1824ec9ea8057fcafa"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "9770dcf5bb59855d7fdd0ed4b27a8882"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.9.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "db41009da66906ca62c42ca9f24119bf"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "03e307e54107da0846ac526958e5665b"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.9.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "74e5234f7e3c3131ef4ceb18b8623b7f"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.9.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "5da39242e00ec8f0c8d12754d058e72b"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.13.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "b91e173eeffccfd061983aa4e32251e5"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.14.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "445be9c217cfc3311c2767fba30f483a"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "7fd10b234863a2a3f5f5d6f55c01e2b6"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.14.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "e2d5c6b716ad99228ca267bb77ff0dfe"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.14.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "0bac228d75f853237b101da4e6ebb03e"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.15.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "ebcb979a17b06e6170bb7b732b0aae8c"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "69319ee3bf80711c82664076292f1599"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.15.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "e30d834c2f15c74ed98dd2015fd29e19"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.15.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "c63feb59b8feb980e602a40284fcf693"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.16.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "1b3029721555a18fd9a0fdfb8eb1b1db"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "454d8125c44eff2155573a21fc32afb5"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.16.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "94c7b7d4cb975f169f88759ffd10c7e0"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.16.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "69580377f1181157d97e210dc9df708e"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.17.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "f6ad7015f508cfa1c51ba48c188b42d8"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "e2904f05ca0bd62197a1f0a4c8ad221d"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.17.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "3b59cb145359a5f3c1360f84c26091cc"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.17.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "1469d93d763ae166c9161b9e611d736a"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "16e7595d9738fb616834867bfce50650"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.18.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "d50a01c6c39712c5f5689e2e12ce56d8"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.18.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "c470551d31f13b004b8df0fadf7b1082"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.18.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "546cebe8240473b9067d77b1e73f0d6a"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.19.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "9bb8a20a5e2c8a98860fc65324540c94"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "45fc87f148f3b4ae7b6b75586da37025"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.19.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "1b4c0bf675a99c0cd0e8368815548ff3"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.19.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "6ca521f51ba3d708113d817b95922aa6"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.20.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "d38948c44febbb26a677b15dc8b691bb"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "c5ada97fad88dd5e5f6159b02e4af9f2"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.20.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "188b8079cd8fa755f24c1126916c9fdc"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.20.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "26c1787fa658e73f3ff29e62da4ef5e3"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.21.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "05d78a9452b47c2282e3094f6808cb20"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "fc9a874e7cf6a81b1af4240b007cfa66"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.21.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "12be63e3ffbcb107d6a4321bbe471158"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.21.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "edd51b2f71391d694d08ac588fa0fe4b"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.22.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "8d1ea58909ab251bdaa5e6a47e9e5e7c"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "b2cbfb366f7d92fad6c5842e208b5e10"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.22.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "c621d64dd47ae8906bc02ae03151cb7f"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.22.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "4952d4c7d963ba963c88b67ef35fdeb9"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "c18ceafb27e183c674d42854be7a6f8b"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.23.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "5cfd2120d0c42e087ea7ff446ef26e76"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.23.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "5c38216022eda052326434dd748b132f"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.23.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "5c8a9ba9e81fc11cc4d58ec9ecf10c27"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.24.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "fcc2b896d9e6be08ec775d4d47321f31"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "e729b04b35469a70f432074077221419"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.24.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "e0d4177b685dd557f9ab6ac55613fcf2"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.24.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "52c07274a578a32208fdec9bf74d588f"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.25.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "f74d437d00f777b0b33e70da165fcd6a"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "5ece0f6862e393595eeb2196e42f4069"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.25.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "7114cf51bfdf05b0b92524ecb3aee10d"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.25.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "fac02bf0dcb7572fb443b0d256d7cc56"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.26.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "7972c8c587aeb14e4fbfdc9e32016f21"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "05d5c2ca8ff615dd0981e44bed495d15"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.26.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "ee72627e8c3ba622691ebc1f59dee36a"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.26.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "d471b5765e36a9b3e253cfd0b6260e7a"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.27.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "4bbfd95cd338662510f285ae6e846af5"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "69f4848aef2979d706382b3c3e34d429"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.27.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "3c4d4dcbf6e321a0647be13366a86075"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.27.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "896a8920caaa46fdccd8b3c2952e1e3e"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "29e636180a9cadfb645c3ff141152e4c"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.28.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "68543852537255de0f59460b3b5b320a"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.28.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "cc0e894779991be6e3c1f82c31681a55"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.28.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "08321ef408d9badb99c9045c1683b9d7"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.29.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "135a27d3a9626df0a13d070a6104e36c"
},
{
"dataPath": "params_shard_102.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "d4a4158c02ad72cca6541654cc8960a7"
},
{
"dataPath": "params_shard_103.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.29.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "47784749f7911a10c61a7d19b0bce23a"
},
{
"dataPath": "params_shard_104.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.29.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "583dc8ed6fb3b9f12eb8a157e6a31888"
},
{
"dataPath": "params_shard_105.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.30.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "01d82109e2b053392c58c112837aa3e1"
},
{
"dataPath": "params_shard_106.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "d5c20b92279f030e665f87498574c552"
},
{
"dataPath": "params_shard_107.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.30.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "4616c54b751908ccb1ac86edaf98eb11"
},
{
"dataPath": "params_shard_108.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.30.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "b0580b4c326008dd79b4ed2a69b49eef"
},
{
"dataPath": "params_shard_109.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.31.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "076e5da95262790a5d3f1426bfb6cb1d"
},
{
"dataPath": "params_shard_110.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "d2f6104d4972749df63e01bbc8fa4626"
},
{
"dataPath": "params_shard_111.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.31.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "1ed0af9be86b3f82693a098876ec4054"
},
{
"dataPath": "params_shard_112.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.31.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "f1731442dbf7b6836eea60e7e5341c74"
},
{
"dataPath": "params_shard_113.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.32.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "8a3a2769015eeda747c6dab404627649"
},
{
"dataPath": "params_shard_114.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.32.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "a03f74a04f9262079f9d4a494589d4f9"
},
{
"dataPath": "params_shard_115.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.32.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "f75c890b4feee824e3c706fec9e319ce"
},
{
"dataPath": "params_shard_116.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.32.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "d5546f2cd028c025073bb0c62beb07ba"
},
{
"dataPath": "params_shard_117.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.33.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "ab957e4cb6263347f7d3c207e3427c62"
},
{
"dataPath": "params_shard_118.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.33.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "75f63149319fd2dee3bc4b1fa2fa9e00"
},
{
"dataPath": "params_shard_119.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.33.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "3fcc1802ee292e02a7af6e0768afc3ee"
},
{
"dataPath": "params_shard_120.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.3.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "ec6f8aa47f6fc1e95c15f411a99ff467"
},
{
"dataPath": "params_shard_121.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.4.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "93b2e92475ecb2964c181b8c522e072d"
},
{
"dataPath": "params_shard_122.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "5c6b90cce7bcb06f02e5b66eb5d0cb4b"
},
{
"dataPath": "params_shard_123.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.4.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "604da35f9395a3d2d46afc1de505cffc"
},
{
"dataPath": "params_shard_124.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.4.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "58463736c560a47373eaf9600bfdb157"
},
{
"dataPath": "params_shard_125.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.5.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "7a48ef08092fc550e5bda22fde150eb1"
},
{
"dataPath": "params_shard_126.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "6f19fe3de5ac81e68742b58d8495a2d2"
},
{
"dataPath": "params_shard_127.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.5.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "f7d6a10c272a5df1b280352813835523"
},
{
"dataPath": "params_shard_128.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.5.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "3ca029383ab487142802f4042c7cf851"
},
{
"dataPath": "params_shard_129.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.6.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "7e98d663037efc91cd25ea1bda673a4a"
},
{
"dataPath": "params_shard_130.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "45ffdc357bf418534543301eef9b3614"
},
{
"dataPath": "params_shard_131.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.6.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "6b6e399c0b6c67e00bd528b16e12b4ba"
},
{
"dataPath": "params_shard_132.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.6.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "ea1ef05775ff2fc9eb91f2c32de3a145"
},
{
"dataPath": "params_shard_133.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.7.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "f797387bcca0e89d5a12ab5e6569a9d1"
},
{
"dataPath": "params_shard_134.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "b9e9731eee6323be581ed42eb04eec4b"
},
{
"dataPath": "params_shard_135.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.7.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "1f10cadffe8744c670137d0bd8cba5c8"
},
{
"dataPath": "params_shard_136.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.7.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "58032f77710c05aa28ebc0922a055641"
},
{
"dataPath": "params_shard_137.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.8.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "73d25a7990b064d75d3940f8c828da7a"
},
{
"dataPath": "params_shard_138.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.8.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "ea0b36a56939d2eded2d2ece611bc8fe"
},
{
"dataPath": "params_shard_139.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.33.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "468de7fe1d77509417133e0d49d1c042"
},
{
"dataPath": "params_shard_140.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.34.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "3d5600b51ab16792c6a937f4f641ba25"
},
{
"dataPath": "params_shard_141.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.34.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "132779a872e73947380f72cf48fc06af"
},
{
"dataPath": "params_shard_142.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.34.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "b528191419d940cf7fe619d5088ed136"
},
{
"dataPath": "params_shard_143.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.34.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "2dd22a63aef81eb91ab5dd0e3335e202"
},
{
"dataPath": "params_shard_144.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.35.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "0d986048289c70a931cad927ffd7078c"
},
{
"dataPath": "params_shard_145.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.35.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "c2d37cb2bd7eefe6ac17b98e0c990d20"
},
{
"dataPath": "params_shard_146.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.35.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "2192ca6c2f5cc3023848c9a6576a1247"
},
{
"dataPath": "params_shard_147.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.35.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "869b0c193e7916866f27f663ba3e9660"
},
{
"dataPath": "params_shard_148.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.36.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "9adbee946b8f33957ea9cfaf7a53be9f"
},
{
"dataPath": "params_shard_149.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.36.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "ec7d72a81fdde034f3b413b16de40431"
},
{
"dataPath": "params_shard_150.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.36.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "a49d10483400832264db2a92069e6e62"
},
{
"dataPath": "params_shard_151.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.36.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "2592c76625234644b3964b28dc488f97"
},
{
"dataPath": "params_shard_152.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.37.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "e7f2978264a91a83c0ff74573f61edb9"
},
{
"dataPath": "params_shard_153.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.37.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "af23706921ecca51196c2c13c0e7c28d"
},
{
"dataPath": "params_shard_154.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.37.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "5d86f0c588d4dd7bf4a123611e937b93"
},
{
"dataPath": "params_shard_155.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.37.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "f620578b0a699d4d82592520f1f52950"
},
{
"dataPath": "params_shard_156.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.38.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "842314b084d3dd62d928ded98968e224"
},
{
"dataPath": "params_shard_157.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.38.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "1dee23de3fa38e9aa1eedbde4a11fb46"
},
{
"dataPath": "params_shard_158.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.38.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "59d1abc636058cb4135de1a0dbdab0ca"
},
{
"dataPath": "params_shard_159.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.38.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "3a282bceb506182c3e9ae3fc7ff44a4e"
},
{
"dataPath": "params_shard_160.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.39.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "c2cb53a6d26354ccc2c0c9773d3c3e26"
},
{
"dataPath": "params_shard_161.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.39.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "dc27334ca214721382370a88c5a9affc"
},
{
"dataPath": "params_shard_162.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.39.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "b2be639cdca8d9d084145e185c95d7b0"
},
{
"dataPath": "params_shard_163.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.39.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "9492eba46a87dc48fae76324e919ff55"
},
{
"dataPath": "params_shard_164.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.40.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "4dc90d2c33953941d6a7fe2ec2cfd7e1"
},
{
"dataPath": "params_shard_165.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.40.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "586efb33c244d2f305a84c0ebe6d7c32"
},
{
"dataPath": "params_shard_166.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.40.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "4eb6bf4f0a27244e3239739a29e8bf53"
},
{
"dataPath": "params_shard_167.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.40.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "538d6c66016a52cbf34dd186dd1bbfa4"
},
{
"dataPath": "params_shard_168.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.41.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "e326d4b86bd1443ef3e68d08a3346a3e"
},
{
"dataPath": "params_shard_169.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.41.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "b32f0d441ffc3b46bd09bb3d4650800f"
},
{
"dataPath": "params_shard_170.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.41.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "ecca691dbbca4299299804138aacbe17"
},
{
"dataPath": "params_shard_171.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.41.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "ffebf087642c164f8be9aaef00589f47"
},
{
"dataPath": "params_shard_172.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.42.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "bb48ee3a23058d83cb8b8ccbc56bba25"
},
{
"dataPath": "params_shard_173.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.42.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "388345b13cd50c0200f13de5a43e3e9a"
},
{
"dataPath": "params_shard_174.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.42.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "e9b75d26352670a40f87247ee820dafc"
},
{
"dataPath": "params_shard_175.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.42.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "ecfd9294bd47734f2902651909c6ab09"
},
{
"dataPath": "params_shard_176.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.43.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "5464b8e45a4b216e23a8e599a6246f00"
},
{
"dataPath": "params_shard_177.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.43.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "9595dbb3f9a60cc90539eabcabdcb914"
},
{
"dataPath": "params_shard_178.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.43.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "8a63ce0ac904e74a0f732c069b763f16"
},
{
"dataPath": "params_shard_179.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.43.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "f310e2ff59128e9c169fc020668533dd"
},
{
"dataPath": "params_shard_180.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.44.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "4dbeedde5619d1fb6d11effdf24fde83"
},
{
"dataPath": "params_shard_181.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.44.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "4807437c8a9bed455068e281c4aeb899"
},
{
"dataPath": "params_shard_182.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.44.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "b1f9e05f9a849f05fdf91e05f34a85a2"
},
{
"dataPath": "params_shard_183.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.44.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "778870337876ee649f0f02ea787a5ea4"
},
{
"dataPath": "params_shard_184.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.45.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "64e3c421234fb6370434664d0e8ac2df"
},
{
"dataPath": "params_shard_185.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.45.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "08cbd0bd9dc093fba6be11b6e211dd22"
},
{
"dataPath": "params_shard_186.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.45.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "d7b3d79282204719afaef5ad03fa5cc3"
},
{
"dataPath": "params_shard_187.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.45.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "7589121c42cfea25580df5cb35dbe3f5"
},
{
"dataPath": "params_shard_188.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.46.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "fac93ee37d4966f8f7bed7baa0d77628"
},
{
"dataPath": "params_shard_189.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.46.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "87b7da44649bc6767a3323d17c3846ff"
},
{
"dataPath": "params_shard_190.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.46.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "60516058dd9cf879b9ca8cda82223623"
},
{
"dataPath": "params_shard_191.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.46.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "19807242e41bd91bbf3eec7191427a84"
},
{
"dataPath": "params_shard_192.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.47.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "39603864d378d3039e8bd56b96b9ba3c"
},
{
"dataPath": "params_shard_193.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.47.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "19729fd254d74a6d6ebe7cccfb537607"
},
{
"dataPath": "params_shard_194.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.47.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "32ef9c6d917e76bea36db52e89ca0e3a"
},
{
"dataPath": "params_shard_195.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.47.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "c8c55042629e486107c3f5147beab130"
},
{
"dataPath": "params_shard_196.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.48.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "7082a8710212efa0919c70c5cd85e36b"
},
{
"dataPath": "params_shard_197.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.48.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "6ce012223592455e085628da5152a287"
},
{
"dataPath": "params_shard_198.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.48.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "5dfc357b6cbfac9e9931f26a7cec3653"
},
{
"dataPath": "params_shard_199.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.48.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "c39efd7684406799a10dc3b5cf43acc9"
},
{
"dataPath": "params_shard_200.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.49.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "d590f2d9de53027aafbfbb438ff5d0de"
},
{
"dataPath": "params_shard_201.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.49.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "67f93d0184da1a9eee0988388e56b388"
},
{
"dataPath": "params_shard_202.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.49.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "e112a3cd9c9f15178d698fa982a9212e"
},
{
"dataPath": "params_shard_203.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.49.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "8a5c44176e9d36a912e3eef33394d5f8"
},
{
"dataPath": "params_shard_204.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.50.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "eb1f92c2f4d2df62e0e25ef202f6a71b"
},
{
"dataPath": "params_shard_205.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.50.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "25fd1c2497046ff741da909e549a8003"
},
{
"dataPath": "params_shard_206.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.50.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "a7968fb24c9e1efb26125c3cb1157687"
},
{
"dataPath": "params_shard_207.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.50.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "c90952a99572d8ea4c966f680ce5ea3a"
},
{
"dataPath": "params_shard_208.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.51.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "3506ba9a7341df82ccebaa702e3e31b4"
},
{
"dataPath": "params_shard_209.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.51.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "bb4cb4411f9699ba9edd42a14fe63dcc"
},
{
"dataPath": "params_shard_210.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.51.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "3d844ee9ead864f8e7ba3f7d79a2b067"
},
{
"dataPath": "params_shard_211.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.51.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "c5bd9fd77f1ee2a466538f6cccc4abb3"
},
{
"dataPath": "params_shard_212.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.52.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "0a07afe99c02acc6623314f98e0e7042"
},
{
"dataPath": "params_shard_213.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.52.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "727ded046641d0d7291ae1f6dd7cca15"
},
{
"dataPath": "params_shard_214.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.52.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "d2a6b8e3dab3fdaa3c3b94682ad971d8"
},
{
"dataPath": "params_shard_215.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.52.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "e69b713c7ab4cb9a0804aa5d8512a944"
},
{
"dataPath": "params_shard_216.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.53.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "7ec40c8bf46499301bba07fb92ce64f6"
},
{
"dataPath": "params_shard_217.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.53.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "458c5049fb5a98958bc2789fd8a4661b"
},
{
"dataPath": "params_shard_218.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.53.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "846c92564124f3de0da9c1fe9c222c9a"
},
{
"dataPath": "params_shard_219.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.53.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "5a13bbc57d9fb9cedd7f8d58ef997cd3"
},
{
"dataPath": "params_shard_220.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.54.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "08910347e75d55743bfba18270d4da5f"
},
{
"dataPath": "params_shard_221.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.54.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "94145c2ab896c5e42bdfa716a60d20bf"
},
{
"dataPath": "params_shard_222.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.54.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "062b832f222716190f719186bed31161"
},
{
"dataPath": "params_shard_223.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.54.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "c45f9599200903b95b612ea2930ffd79"
},
{
"dataPath": "params_shard_224.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.55.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "c4b10136d11eef3df3941315dd57eb86"
},
{
"dataPath": "params_shard_225.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.55.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "b3a7f6c44bfaf5bd11100daa3935cf91"
},
{
"dataPath": "params_shard_226.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.55.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "734d1983c20928b5c14074faa06b0eaf"
},
{
"dataPath": "params_shard_227.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.55.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "418af5d0f9eb4c38d241baca02f4f1a5"
},
{
"dataPath": "params_shard_228.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.56.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "c342fc790357827dfc56071aad6d0b11"
},
{
"dataPath": "params_shard_229.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.56.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "b9f762474812f9ab39c37be73882ae17"
},
{
"dataPath": "params_shard_230.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.56.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "a33eeba8c40e0d6d0d14fc9f5f3180a9"
},
{
"dataPath": "params_shard_231.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.56.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "8da40d4c433458bb42e523c8ebc0febf"
},
{
"dataPath": "params_shard_232.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.57.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "cfbd4d64c9591a96c1f791484331d9c5"
},
{
"dataPath": "params_shard_233.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.57.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "6501aad225757bd2858110ee07fa6d1d"
},
{
"dataPath": "params_shard_234.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.57.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "134d4486a8a90d47c5b84628347e7cc8"
},
{
"dataPath": "params_shard_235.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.57.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "490e63dd84e3e19fc59ef6b4fea67900"
},
{
"dataPath": "params_shard_236.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.58.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "82d1aafa5fd2c2cde3816dc1cf158b5d"
},
{
"dataPath": "params_shard_237.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.58.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "81eaa978116ae4062ab46264a84fca05"
},
{
"dataPath": "params_shard_238.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.58.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "b3e348383eb3dc470f594bd0f7ef9164"
},
{
"dataPath": "params_shard_239.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.58.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "d14fe54e17c9c10414b9f1e9bd1f525e"
},
{
"dataPath": "params_shard_240.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.59.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "f308449b6c35588f8f4bf69d05f7d4cf"
},
{
"dataPath": "params_shard_241.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.59.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "1ce849d2b66bb5fbf05b17f37836041c"
},
{
"dataPath": "params_shard_242.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.59.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "988d3abb58e781a668859bcd8be3e0c6"
},
{
"dataPath": "params_shard_243.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.59.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "848c8c3f55ca6846f5c19919b2d3c73e"
},
{
"dataPath": "params_shard_244.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.60.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "4011187c2b3c9702ec28b7c22d500325"
},
{
"dataPath": "params_shard_245.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.60.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "1602243caa912687bd3a27f9ebfea73e"
},
{
"dataPath": "params_shard_246.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.60.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "1ba75f1772670be818f3941927a26f74"
},
{
"dataPath": "params_shard_247.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.60.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "ee8d0b6f2a0c66c627f33e747692139d"
},
{
"dataPath": "params_shard_248.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.61.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "b4bf9f0a00431e29f0a0959bf57047e6"
},
{
"dataPath": "params_shard_249.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.61.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "4ac1f798741e90add03065eb1803b681"
},
{
"dataPath": "params_shard_250.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.61.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "ef70bcd2d50ae806e11b4f1852e0fa41"
},
{
"dataPath": "params_shard_251.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.61.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "5afd078c2166291b9e066a8240c50b60"
},
{
"dataPath": "params_shard_252.bin",
"format": "raw-shard",
"nbytes": 283115520,
"records": [
{
"name": "model.layers.62.mlp.down_proj.weight",
"shape": [
5120,
27648
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 283115520,
"byteOffset": 0
}
],
"md5sum": "78a19235c7baf9cac6a8da2cb2937a4f"
},
{
"dataPath": "params_shard_253.bin",
"format": "raw-shard",
"nbytes": 566231040,
"records": [
{
"name": "model.layers.62.mlp.gate_up_proj.weight",
"shape": [
55296,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 566231040,
"byteOffset": 0
}
],
"md5sum": "0d78ce4c346fc113a819b37d2eafe4d3"
},
{
"dataPath": "params_shard_254.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.62.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "cfacd9675e9fae450c86cdb490ed189f"
},
{
"dataPath": "params_shard_255.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.62.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "e1eb88ea9c0dc29197fd966114235f10"
},
{
"dataPath": "params_shard_256.bin",
"format": "raw-shard",
"nbytes": 73400320,
"records": [
{
"name": "model.layers.63.self_attn.c_attn.weight",
"shape": [
7168,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73400320,
"byteOffset": 0
}
],
"md5sum": "193101b6973c37967f9be0ced8cce1a6"
},
{
"dataPath": "params_shard_257.bin",
"format": "raw-shard",
"nbytes": 52428800,
"records": [
{
"name": "model.layers.63.self_attn.o_proj.weight",
"shape": [
5120,
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 52428800,
"byteOffset": 0
}
],
"md5sum": "ecfeee3d0547a155fe21b45f886697f4"
},
{
"dataPath": "params_shard_258.bin",
"format": "raw-shard",
"nbytes": 2238464,
"records": [
{
"name": "model.layers.63.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 0
},
{
"name": "model.layers.63.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 10240
},
{
"name": "model.norm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 20480
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 30720
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 40960
},
{
"name": "model.layers.0.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 51200
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 65536
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 75776
},
{
"name": "model.layers.1.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 86016
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 100352
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 110592
},
{
"name": "model.layers.2.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 120832
},
{
"name": "model.layers.3.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 135168
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 149504
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 159744
},
{
"name": "model.layers.10.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 169984
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 184320
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 194560
},
{
"name": "model.layers.11.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 204800
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 219136
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 229376
},
{
"name": "model.layers.12.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 239616
},
{
"name": "model.layers.13.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 253952
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 268288
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 278528
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 288768
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 299008
},
{
"name": "model.layers.9.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 309248
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 323584
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 333824
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 344064
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 354304
},
{
"name": "model.layers.14.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 364544
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 378880
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 389120
},
{
"name": "model.layers.15.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 399360
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 413696
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 423936
},
{
"name": "model.layers.16.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 434176
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 448512
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 458752
},
{
"name": "model.layers.17.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 468992
},
{
"name": "model.layers.18.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 483328
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 497664
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 507904
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 518144
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 528384
},
{
"name": "model.layers.19.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 538624
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 552960
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 563200
},
{
"name": "model.layers.20.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 573440
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 587776
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 598016
},
{
"name": "model.layers.21.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 608256
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 622592
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 632832
},
{
"name": "model.layers.22.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 643072
},
{
"name": "model.layers.23.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 657408
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 671744
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 681984
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 692224
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 702464
},
{
"name": "model.layers.24.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 712704
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 727040
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 737280
},
{
"name": "model.layers.25.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 747520
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 761856
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 772096
},
{
"name": "model.layers.26.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 782336
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 796672
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 806912
},
{
"name": "model.layers.27.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 817152
},
{
"name": "model.layers.28.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 831488
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 845824
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 856064
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 866304
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 876544
},
{
"name": "model.layers.29.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 886784
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 901120
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 911360
},
{
"name": "model.layers.30.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 921600
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 935936
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 946176
},
{
"name": "model.layers.31.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 956416
},
{
"name": "model.layers.32.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 970752
},
{
"name": "model.layers.32.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 980992
},
{
"name": "model.layers.32.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 991232
},
{
"name": "model.layers.33.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1005568
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1019904
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1030144
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1040384
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1050624
},
{
"name": "model.layers.4.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1060864
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1075200
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1085440
},
{
"name": "model.layers.5.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1095680
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1110016
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1120256
},
{
"name": "model.layers.6.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1130496
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1144832
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1155072
},
{
"name": "model.layers.7.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1165312
},
{
"name": "model.layers.8.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1179648
},
{
"name": "model.layers.33.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1193984
},
{
"name": "model.layers.33.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1204224
},
{
"name": "model.layers.34.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1214464
},
{
"name": "model.layers.34.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1224704
},
{
"name": "model.layers.34.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1234944
},
{
"name": "model.layers.35.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1249280
},
{
"name": "model.layers.35.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1259520
},
{
"name": "model.layers.35.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1269760
},
{
"name": "model.layers.36.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1284096
},
{
"name": "model.layers.36.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1294336
},
{
"name": "model.layers.36.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1304576
},
{
"name": "model.layers.37.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1318912
},
{
"name": "model.layers.37.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1329152
},
{
"name": "model.layers.37.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1339392
},
{
"name": "model.layers.38.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1353728
},
{
"name": "model.layers.38.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1368064
},
{
"name": "model.layers.38.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1378304
},
{
"name": "model.layers.39.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1388544
},
{
"name": "model.layers.39.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1398784
},
{
"name": "model.layers.39.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1409024
},
{
"name": "model.layers.40.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1423360
},
{
"name": "model.layers.40.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1433600
},
{
"name": "model.layers.40.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1443840
},
{
"name": "model.layers.41.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1458176
},
{
"name": "model.layers.41.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1468416
},
{
"name": "model.layers.41.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1478656
},
{
"name": "model.layers.42.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1492992
},
{
"name": "model.layers.42.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1503232
},
{
"name": "model.layers.42.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1513472
},
{
"name": "model.layers.43.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1527808
},
{
"name": "model.layers.43.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1542144
},
{
"name": "model.layers.43.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1552384
},
{
"name": "model.layers.44.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1562624
},
{
"name": "model.layers.44.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1572864
},
{
"name": "model.layers.44.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1583104
},
{
"name": "model.layers.45.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1597440
},
{
"name": "model.layers.45.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1607680
},
{
"name": "model.layers.45.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1617920
},
{
"name": "model.layers.46.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1632256
},
{
"name": "model.layers.46.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1642496
},
{
"name": "model.layers.46.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1652736
},
{
"name": "model.layers.47.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1667072
},
{
"name": "model.layers.47.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1677312
},
{
"name": "model.layers.47.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1687552
},
{
"name": "model.layers.48.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1701888
},
{
"name": "model.layers.48.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1716224
},
{
"name": "model.layers.48.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1726464
},
{
"name": "model.layers.49.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1736704
},
{
"name": "model.layers.49.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1746944
},
{
"name": "model.layers.49.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1757184
},
{
"name": "model.layers.50.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1771520
},
{
"name": "model.layers.50.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1781760
},
{
"name": "model.layers.50.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1792000
},
{
"name": "model.layers.51.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1806336
},
{
"name": "model.layers.51.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1816576
},
{
"name": "model.layers.51.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1826816
},
{
"name": "model.layers.52.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1841152
},
{
"name": "model.layers.52.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1851392
},
{
"name": "model.layers.52.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1861632
},
{
"name": "model.layers.53.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1875968
},
{
"name": "model.layers.53.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1890304
},
{
"name": "model.layers.53.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1900544
},
{
"name": "model.layers.54.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1910784
},
{
"name": "model.layers.54.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1921024
},
{
"name": "model.layers.54.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1931264
},
{
"name": "model.layers.55.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1945600
},
{
"name": "model.layers.55.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1955840
},
{
"name": "model.layers.55.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 1966080
},
{
"name": "model.layers.56.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1980416
},
{
"name": "model.layers.56.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 1990656
},
{
"name": "model.layers.56.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 2000896
},
{
"name": "model.layers.57.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2015232
},
{
"name": "model.layers.57.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2025472
},
{
"name": "model.layers.57.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 2035712
},
{
"name": "model.layers.58.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 2050048
},
{
"name": "model.layers.58.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2064384
},
{
"name": "model.layers.58.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2074624
},
{
"name": "model.layers.59.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2084864
},
{
"name": "model.layers.59.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2095104
},
{
"name": "model.layers.59.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 2105344
},
{
"name": "model.layers.60.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2119680
},
{
"name": "model.layers.60.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2129920
},
{
"name": "model.layers.60.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 2140160
},
{
"name": "model.layers.61.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2154496
},
{
"name": "model.layers.61.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2164736
},
{
"name": "model.layers.61.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 2174976
},
{
"name": "model.layers.62.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2189312
},
{
"name": "model.layers.62.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 2199552
},
{
"name": "model.layers.62.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 2209792
},
{
"name": "model.layers.63.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 2224128
}
],
"md5sum": "b0f5a80d026b3517cfd2a7df2fe6b1bf"
}
]
}