diff --git a/.gitattributes b/.gitattributes
index e2edc667ad8fc054852644470bb308cf8d6080ae..8d19d7f44d66e732aa90c30d2d42462030fda4e4 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -36,3 +36,74 @@ logs/logs/xp3capmixnewcodelonglossseq/main_log.txt filter=lfs diff=lfs merge=lfs
 logs/logs/xp3mt/main_log.txt filter=lfs diff=lfs merge=lfs -text
 logs/logs/xp3zhmt/main_log.txt filter=lfs diff=lfs merge=lfs -text
 logs/logs/xp3zzlossseq/main_log.txt filter=lfs diff=lfs merge=lfs -text
+model_00021-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00027-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00005-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00031-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00038-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00034-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00060-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00020-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00037-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00045-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00043-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00006-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00070-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00015-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00025-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00065-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00062-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00003-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00033-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00022-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00019-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00016-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00056-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00004-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00053-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00058-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00047-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00055-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00061-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00041-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00011-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00032-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00046-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00013-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00001-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00042-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00008-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00068-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00017-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00048-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00002-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00030-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00054-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00069-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00050-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00010-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00063-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00014-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00059-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00052-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00067-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00023-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00049-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00018-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00044-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00035-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00057-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00028-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00051-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00024-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00064-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00029-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00039-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00071-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00009-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00007-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00026-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00012-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00040-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00036-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
+model_00066-of-00072.safetensors filter=lfs diff=lfs merge=lfs -text
diff --git a/model.safetensors.index.json b/model.safetensors.index.json
new file mode 100644
index 0000000000000000000000000000000000000000..89e9ab80d83ceb806a7497d5da7071c38ec0405f
--- /dev/null
+++ b/model.safetensors.index.json
@@ -0,0 +1 @@
+{"metadata": {"total_size": 352494542848}, "weight_map": {"h.0.input_layernorm.bias": "model_00002-of-00072.safetensors", "h.0.input_layernorm.weight": "model_00002-of-00072.safetensors", "h.0.mlp.dense_4h_to_h.bias": "model_00002-of-00072.safetensors", "h.0.mlp.dense_4h_to_h.weight": "model_00002-of-00072.safetensors", "h.0.mlp.dense_h_to_4h.bias": "model_00002-of-00072.safetensors", "h.0.mlp.dense_h_to_4h.weight": "model_00002-of-00072.safetensors", "h.0.post_attention_layernorm.bias": "model_00002-of-00072.safetensors", "h.0.post_attention_layernorm.weight": "model_00002-of-00072.safetensors", "h.0.self_attention.dense.bias": "model_00002-of-00072.safetensors", "h.0.self_attention.dense.weight": "model_00002-of-00072.safetensors", "h.0.self_attention.query_key_value.bias": "model_00002-of-00072.safetensors", "h.0.self_attention.query_key_value.weight": "model_00002-of-00072.safetensors", "h.1.input_layernorm.bias": "model_00003-of-00072.safetensors", "h.1.input_layernorm.weight": "model_00003-of-00072.safetensors", "h.1.mlp.dense_4h_to_h.bias": "model_00003-of-00072.safetensors", "h.1.mlp.dense_4h_to_h.weight": "model_00003-of-00072.safetensors", "h.1.mlp.dense_h_to_4h.bias": "model_00003-of-00072.safetensors", "h.1.mlp.dense_h_to_4h.weight": "model_00003-of-00072.safetensors", "h.1.post_attention_layernorm.bias": "model_00003-of-00072.safetensors", "h.1.post_attention_layernorm.weight": "model_00003-of-00072.safetensors", "h.1.self_attention.dense.bias": "model_00003-of-00072.safetensors", "h.1.self_attention.dense.weight": "model_00003-of-00072.safetensors", "h.1.self_attention.query_key_value.bias": "model_00003-of-00072.safetensors", "h.1.self_attention.query_key_value.weight": "model_00003-of-00072.safetensors", "h.10.input_layernorm.bias": "model_00012-of-00072.safetensors", "h.10.input_layernorm.weight": "model_00012-of-00072.safetensors", "h.10.mlp.dense_4h_to_h.bias": "model_00012-of-00072.safetensors", "h.10.mlp.dense_4h_to_h.weight": "model_00012-of-00072.safetensors", "h.10.mlp.dense_h_to_4h.bias": "model_00012-of-00072.safetensors", "h.10.mlp.dense_h_to_4h.weight": "model_00012-of-00072.safetensors", "h.10.post_attention_layernorm.bias": "model_00012-of-00072.safetensors", "h.10.post_attention_layernorm.weight": "model_00012-of-00072.safetensors", "h.10.self_attention.dense.bias": "model_00012-of-00072.safetensors", "h.10.self_attention.dense.weight": "model_00012-of-00072.safetensors", "h.10.self_attention.query_key_value.bias": "model_00012-of-00072.safetensors", "h.10.self_attention.query_key_value.weight": "model_00012-of-00072.safetensors", "h.11.input_layernorm.bias": "model_00013-of-00072.safetensors", "h.11.input_layernorm.weight": "model_00013-of-00072.safetensors", "h.11.mlp.dense_4h_to_h.bias": "model_00013-of-00072.safetensors", "h.11.mlp.dense_4h_to_h.weight": "model_00013-of-00072.safetensors", "h.11.mlp.dense_h_to_4h.bias": "model_00013-of-00072.safetensors", "h.11.mlp.dense_h_to_4h.weight": "model_00013-of-00072.safetensors", "h.11.post_attention_layernorm.bias": "model_00013-of-00072.safetensors", "h.11.post_attention_layernorm.weight": "model_00013-of-00072.safetensors", "h.11.self_attention.dense.bias": "model_00013-of-00072.safetensors", "h.11.self_attention.dense.weight": "model_00013-of-00072.safetensors", "h.11.self_attention.query_key_value.bias": "model_00013-of-00072.safetensors", "h.11.self_attention.query_key_value.weight": "model_00013-of-00072.safetensors", "h.12.input_layernorm.bias": "model_00014-of-00072.safetensors", "h.12.input_layernorm.weight": "model_00014-of-00072.safetensors", "h.12.mlp.dense_4h_to_h.bias": "model_00014-of-00072.safetensors", "h.12.mlp.dense_4h_to_h.weight": "model_00014-of-00072.safetensors", "h.12.mlp.dense_h_to_4h.bias": "model_00014-of-00072.safetensors", "h.12.mlp.dense_h_to_4h.weight": "model_00014-of-00072.safetensors", "h.12.post_attention_layernorm.bias": "model_00014-of-00072.safetensors", "h.12.post_attention_layernorm.weight": "model_00014-of-00072.safetensors", "h.12.self_attention.dense.bias": "model_00014-of-00072.safetensors", "h.12.self_attention.dense.weight": "model_00014-of-00072.safetensors", "h.12.self_attention.query_key_value.bias": "model_00014-of-00072.safetensors", "h.12.self_attention.query_key_value.weight": "model_00014-of-00072.safetensors", "h.13.input_layernorm.bias": "model_00015-of-00072.safetensors", "h.13.input_layernorm.weight": "model_00015-of-00072.safetensors", "h.13.mlp.dense_4h_to_h.bias": "model_00015-of-00072.safetensors", "h.13.mlp.dense_4h_to_h.weight": "model_00015-of-00072.safetensors", "h.13.mlp.dense_h_to_4h.bias": "model_00015-of-00072.safetensors", "h.13.mlp.dense_h_to_4h.weight": "model_00015-of-00072.safetensors", "h.13.post_attention_layernorm.bias": "model_00015-of-00072.safetensors", "h.13.post_attention_layernorm.weight": "model_00015-of-00072.safetensors", "h.13.self_attention.dense.bias": "model_00015-of-00072.safetensors", "h.13.self_attention.dense.weight": "model_00015-of-00072.safetensors", "h.13.self_attention.query_key_value.bias": "model_00015-of-00072.safetensors", "h.13.self_attention.query_key_value.weight": "model_00015-of-00072.safetensors", "h.14.input_layernorm.bias": "model_00016-of-00072.safetensors", "h.14.input_layernorm.weight": "model_00016-of-00072.safetensors", "h.14.mlp.dense_4h_to_h.bias": "model_00016-of-00072.safetensors", "h.14.mlp.dense_4h_to_h.weight": "model_00016-of-00072.safetensors", "h.14.mlp.dense_h_to_4h.bias": "model_00016-of-00072.safetensors", "h.14.mlp.dense_h_to_4h.weight": "model_00016-of-00072.safetensors", "h.14.post_attention_layernorm.bias": "model_00016-of-00072.safetensors", "h.14.post_attention_layernorm.weight": "model_00016-of-00072.safetensors", "h.14.self_attention.dense.bias": "model_00016-of-00072.safetensors", "h.14.self_attention.dense.weight": "model_00016-of-00072.safetensors", "h.14.self_attention.query_key_value.bias": "model_00016-of-00072.safetensors", "h.14.self_attention.query_key_value.weight": "model_00016-of-00072.safetensors", "h.15.input_layernorm.bias": "model_00017-of-00072.safetensors", "h.15.input_layernorm.weight": "model_00017-of-00072.safetensors", "h.15.mlp.dense_4h_to_h.bias": "model_00017-of-00072.safetensors", "h.15.mlp.dense_4h_to_h.weight": "model_00017-of-00072.safetensors", "h.15.mlp.dense_h_to_4h.bias": "model_00017-of-00072.safetensors", "h.15.mlp.dense_h_to_4h.weight": "model_00017-of-00072.safetensors", "h.15.post_attention_layernorm.bias": "model_00017-of-00072.safetensors", "h.15.post_attention_layernorm.weight": "model_00017-of-00072.safetensors", "h.15.self_attention.dense.bias": "model_00017-of-00072.safetensors", "h.15.self_attention.dense.weight": "model_00017-of-00072.safetensors", "h.15.self_attention.query_key_value.bias": "model_00017-of-00072.safetensors", "h.15.self_attention.query_key_value.weight": "model_00017-of-00072.safetensors", "h.16.input_layernorm.bias": "model_00018-of-00072.safetensors", "h.16.input_layernorm.weight": "model_00018-of-00072.safetensors", "h.16.mlp.dense_4h_to_h.bias": "model_00018-of-00072.safetensors", "h.16.mlp.dense_4h_to_h.weight": "model_00018-of-00072.safetensors", "h.16.mlp.dense_h_to_4h.bias": "model_00018-of-00072.safetensors", "h.16.mlp.dense_h_to_4h.weight": "model_00018-of-00072.safetensors", "h.16.post_attention_layernorm.bias": "model_00018-of-00072.safetensors", "h.16.post_attention_layernorm.weight": "model_00018-of-00072.safetensors", "h.16.self_attention.dense.bias": "model_00018-of-00072.safetensors", "h.16.self_attention.dense.weight": "model_00018-of-00072.safetensors", "h.16.self_attention.query_key_value.bias": "model_00018-of-00072.safetensors", "h.16.self_attention.query_key_value.weight": "model_00018-of-00072.safetensors", "h.17.input_layernorm.bias": "model_00019-of-00072.safetensors", "h.17.input_layernorm.weight": "model_00019-of-00072.safetensors", "h.17.mlp.dense_4h_to_h.bias": "model_00019-of-00072.safetensors", "h.17.mlp.dense_4h_to_h.weight": "model_00019-of-00072.safetensors", "h.17.mlp.dense_h_to_4h.bias": "model_00019-of-00072.safetensors", "h.17.mlp.dense_h_to_4h.weight": "model_00019-of-00072.safetensors", "h.17.post_attention_layernorm.bias": "model_00019-of-00072.safetensors", "h.17.post_attention_layernorm.weight": "model_00019-of-00072.safetensors", "h.17.self_attention.dense.bias": "model_00019-of-00072.safetensors", "h.17.self_attention.dense.weight": "model_00019-of-00072.safetensors", "h.17.self_attention.query_key_value.bias": "model_00019-of-00072.safetensors", "h.17.self_attention.query_key_value.weight": "model_00019-of-00072.safetensors", "h.18.input_layernorm.bias": "model_00020-of-00072.safetensors", "h.18.input_layernorm.weight": "model_00020-of-00072.safetensors", "h.18.mlp.dense_4h_to_h.bias": "model_00020-of-00072.safetensors", "h.18.mlp.dense_4h_to_h.weight": "model_00020-of-00072.safetensors", "h.18.mlp.dense_h_to_4h.bias": "model_00020-of-00072.safetensors", "h.18.mlp.dense_h_to_4h.weight": "model_00020-of-00072.safetensors", "h.18.post_attention_layernorm.bias": "model_00020-of-00072.safetensors", "h.18.post_attention_layernorm.weight": "model_00020-of-00072.safetensors", "h.18.self_attention.dense.bias": "model_00020-of-00072.safetensors", "h.18.self_attention.dense.weight": "model_00020-of-00072.safetensors", "h.18.self_attention.query_key_value.bias": "model_00020-of-00072.safetensors", "h.18.self_attention.query_key_value.weight": "model_00020-of-00072.safetensors", "h.19.input_layernorm.bias": "model_00021-of-00072.safetensors", "h.19.input_layernorm.weight": "model_00021-of-00072.safetensors", "h.19.mlp.dense_4h_to_h.bias": "model_00021-of-00072.safetensors", "h.19.mlp.dense_4h_to_h.weight": "model_00021-of-00072.safetensors", "h.19.mlp.dense_h_to_4h.bias": "model_00021-of-00072.safetensors", "h.19.mlp.dense_h_to_4h.weight": "model_00021-of-00072.safetensors", "h.19.post_attention_layernorm.bias": "model_00021-of-00072.safetensors", "h.19.post_attention_layernorm.weight": "model_00021-of-00072.safetensors", "h.19.self_attention.dense.bias": "model_00021-of-00072.safetensors", "h.19.self_attention.dense.weight": "model_00021-of-00072.safetensors", "h.19.self_attention.query_key_value.bias": "model_00021-of-00072.safetensors", "h.19.self_attention.query_key_value.weight": "model_00021-of-00072.safetensors", "h.2.input_layernorm.bias": "model_00004-of-00072.safetensors", "h.2.input_layernorm.weight": "model_00004-of-00072.safetensors", "h.2.mlp.dense_4h_to_h.bias": "model_00004-of-00072.safetensors", "h.2.mlp.dense_4h_to_h.weight": "model_00004-of-00072.safetensors", "h.2.mlp.dense_h_to_4h.bias": "model_00004-of-00072.safetensors", "h.2.mlp.dense_h_to_4h.weight": "model_00004-of-00072.safetensors", "h.2.post_attention_layernorm.bias": "model_00004-of-00072.safetensors", "h.2.post_attention_layernorm.weight": "model_00004-of-00072.safetensors", "h.2.self_attention.dense.bias": "model_00004-of-00072.safetensors", "h.2.self_attention.dense.weight": "model_00004-of-00072.safetensors", "h.2.self_attention.query_key_value.bias": "model_00004-of-00072.safetensors", "h.2.self_attention.query_key_value.weight": "model_00004-of-00072.safetensors", "h.20.input_layernorm.bias": "model_00022-of-00072.safetensors", "h.20.input_layernorm.weight": "model_00022-of-00072.safetensors", "h.20.mlp.dense_4h_to_h.bias": "model_00022-of-00072.safetensors", "h.20.mlp.dense_4h_to_h.weight": "model_00022-of-00072.safetensors", "h.20.mlp.dense_h_to_4h.bias": "model_00022-of-00072.safetensors", "h.20.mlp.dense_h_to_4h.weight": "model_00022-of-00072.safetensors", "h.20.post_attention_layernorm.bias": "model_00022-of-00072.safetensors", "h.20.post_attention_layernorm.weight": "model_00022-of-00072.safetensors", "h.20.self_attention.dense.bias": "model_00022-of-00072.safetensors", "h.20.self_attention.dense.weight": "model_00022-of-00072.safetensors", "h.20.self_attention.query_key_value.bias": "model_00022-of-00072.safetensors", "h.20.self_attention.query_key_value.weight": "model_00022-of-00072.safetensors", "h.21.input_layernorm.bias": "model_00023-of-00072.safetensors", "h.21.input_layernorm.weight": "model_00023-of-00072.safetensors", "h.21.mlp.dense_4h_to_h.bias": "model_00023-of-00072.safetensors", "h.21.mlp.dense_4h_to_h.weight": "model_00023-of-00072.safetensors", "h.21.mlp.dense_h_to_4h.bias": "model_00023-of-00072.safetensors", "h.21.mlp.dense_h_to_4h.weight": "model_00023-of-00072.safetensors", "h.21.post_attention_layernorm.bias": "model_00023-of-00072.safetensors", "h.21.post_attention_layernorm.weight": "model_00023-of-00072.safetensors", "h.21.self_attention.dense.bias": "model_00023-of-00072.safetensors", "h.21.self_attention.dense.weight": "model_00023-of-00072.safetensors", "h.21.self_attention.query_key_value.bias": "model_00023-of-00072.safetensors", "h.21.self_attention.query_key_value.weight": "model_00023-of-00072.safetensors", "h.22.input_layernorm.bias": "model_00024-of-00072.safetensors", "h.22.input_layernorm.weight": "model_00024-of-00072.safetensors", "h.22.mlp.dense_4h_to_h.bias": "model_00024-of-00072.safetensors", "h.22.mlp.dense_4h_to_h.weight": "model_00024-of-00072.safetensors", "h.22.mlp.dense_h_to_4h.bias": "model_00024-of-00072.safetensors", "h.22.mlp.dense_h_to_4h.weight": "model_00024-of-00072.safetensors", "h.22.post_attention_layernorm.bias": "model_00024-of-00072.safetensors", "h.22.post_attention_layernorm.weight": "model_00024-of-00072.safetensors", "h.22.self_attention.dense.bias": "model_00024-of-00072.safetensors", "h.22.self_attention.dense.weight": "model_00024-of-00072.safetensors", "h.22.self_attention.query_key_value.bias": "model_00024-of-00072.safetensors", "h.22.self_attention.query_key_value.weight": "model_00024-of-00072.safetensors", "h.23.input_layernorm.bias": "model_00025-of-00072.safetensors", "h.23.input_layernorm.weight": "model_00025-of-00072.safetensors", "h.23.mlp.dense_4h_to_h.bias": "model_00025-of-00072.safetensors", "h.23.mlp.dense_4h_to_h.weight": "model_00025-of-00072.safetensors", "h.23.mlp.dense_h_to_4h.bias": "model_00025-of-00072.safetensors", "h.23.mlp.dense_h_to_4h.weight": "model_00025-of-00072.safetensors", "h.23.post_attention_layernorm.bias": "model_00025-of-00072.safetensors", "h.23.post_attention_layernorm.weight": "model_00025-of-00072.safetensors", "h.23.self_attention.dense.bias": "model_00025-of-00072.safetensors", "h.23.self_attention.dense.weight": "model_00025-of-00072.safetensors", "h.23.self_attention.query_key_value.bias": "model_00025-of-00072.safetensors", "h.23.self_attention.query_key_value.weight": "model_00025-of-00072.safetensors", "h.24.input_layernorm.bias": "model_00026-of-00072.safetensors", "h.24.input_layernorm.weight": "model_00026-of-00072.safetensors", "h.24.mlp.dense_4h_to_h.bias": "model_00026-of-00072.safetensors", "h.24.mlp.dense_4h_to_h.weight": "model_00026-of-00072.safetensors", "h.24.mlp.dense_h_to_4h.bias": "model_00026-of-00072.safetensors", "h.24.mlp.dense_h_to_4h.weight": "model_00026-of-00072.safetensors", "h.24.post_attention_layernorm.bias": "model_00026-of-00072.safetensors", "h.24.post_attention_layernorm.weight": "model_00026-of-00072.safetensors", "h.24.self_attention.dense.bias": "model_00026-of-00072.safetensors", "h.24.self_attention.dense.weight": "model_00026-of-00072.safetensors", "h.24.self_attention.query_key_value.bias": "model_00026-of-00072.safetensors", "h.24.self_attention.query_key_value.weight": "model_00026-of-00072.safetensors", "h.25.input_layernorm.bias": "model_00027-of-00072.safetensors", "h.25.input_layernorm.weight": "model_00027-of-00072.safetensors", "h.25.mlp.dense_4h_to_h.bias": "model_00027-of-00072.safetensors", "h.25.mlp.dense_4h_to_h.weight": "model_00027-of-00072.safetensors", "h.25.mlp.dense_h_to_4h.bias": "model_00027-of-00072.safetensors", "h.25.mlp.dense_h_to_4h.weight": "model_00027-of-00072.safetensors", "h.25.post_attention_layernorm.bias": "model_00027-of-00072.safetensors", "h.25.post_attention_layernorm.weight": "model_00027-of-00072.safetensors", "h.25.self_attention.dense.bias": "model_00027-of-00072.safetensors", "h.25.self_attention.dense.weight": "model_00027-of-00072.safetensors", "h.25.self_attention.query_key_value.bias": "model_00027-of-00072.safetensors", "h.25.self_attention.query_key_value.weight": "model_00027-of-00072.safetensors", "h.26.input_layernorm.bias": "model_00028-of-00072.safetensors", "h.26.input_layernorm.weight": "model_00028-of-00072.safetensors", "h.26.mlp.dense_4h_to_h.bias": "model_00028-of-00072.safetensors", "h.26.mlp.dense_4h_to_h.weight": "model_00028-of-00072.safetensors", "h.26.mlp.dense_h_to_4h.bias": "model_00028-of-00072.safetensors", "h.26.mlp.dense_h_to_4h.weight": "model_00028-of-00072.safetensors", "h.26.post_attention_layernorm.bias": "model_00028-of-00072.safetensors", "h.26.post_attention_layernorm.weight": "model_00028-of-00072.safetensors", "h.26.self_attention.dense.bias": "model_00028-of-00072.safetensors", "h.26.self_attention.dense.weight": "model_00028-of-00072.safetensors", "h.26.self_attention.query_key_value.bias": "model_00028-of-00072.safetensors", "h.26.self_attention.query_key_value.weight": "model_00028-of-00072.safetensors", "h.27.input_layernorm.bias": "model_00029-of-00072.safetensors", "h.27.input_layernorm.weight": "model_00029-of-00072.safetensors", "h.27.mlp.dense_4h_to_h.bias": "model_00029-of-00072.safetensors", "h.27.mlp.dense_4h_to_h.weight": "model_00029-of-00072.safetensors", "h.27.mlp.dense_h_to_4h.bias": "model_00029-of-00072.safetensors", "h.27.mlp.dense_h_to_4h.weight": "model_00029-of-00072.safetensors", "h.27.post_attention_layernorm.bias": "model_00029-of-00072.safetensors", "h.27.post_attention_layernorm.weight": "model_00029-of-00072.safetensors", "h.27.self_attention.dense.bias": "model_00029-of-00072.safetensors", "h.27.self_attention.dense.weight": "model_00029-of-00072.safetensors", "h.27.self_attention.query_key_value.bias": "model_00029-of-00072.safetensors", "h.27.self_attention.query_key_value.weight": "model_00029-of-00072.safetensors", "h.28.input_layernorm.bias": "model_00030-of-00072.safetensors", "h.28.input_layernorm.weight": "model_00030-of-00072.safetensors", "h.28.mlp.dense_4h_to_h.bias": "model_00030-of-00072.safetensors", "h.28.mlp.dense_4h_to_h.weight": "model_00030-of-00072.safetensors", "h.28.mlp.dense_h_to_4h.bias": "model_00030-of-00072.safetensors", "h.28.mlp.dense_h_to_4h.weight": "model_00030-of-00072.safetensors", "h.28.post_attention_layernorm.bias": "model_00030-of-00072.safetensors", "h.28.post_attention_layernorm.weight": "model_00030-of-00072.safetensors", "h.28.self_attention.dense.bias": "model_00030-of-00072.safetensors", "h.28.self_attention.dense.weight": "model_00030-of-00072.safetensors", "h.28.self_attention.query_key_value.bias": "model_00030-of-00072.safetensors", "h.28.self_attention.query_key_value.weight": "model_00030-of-00072.safetensors", "h.29.input_layernorm.bias": "model_00031-of-00072.safetensors", "h.29.input_layernorm.weight": "model_00031-of-00072.safetensors", "h.29.mlp.dense_4h_to_h.bias": "model_00031-of-00072.safetensors", "h.29.mlp.dense_4h_to_h.weight": "model_00031-of-00072.safetensors", "h.29.mlp.dense_h_to_4h.bias": "model_00031-of-00072.safetensors", "h.29.mlp.dense_h_to_4h.weight": "model_00031-of-00072.safetensors", "h.29.post_attention_layernorm.bias": "model_00031-of-00072.safetensors", "h.29.post_attention_layernorm.weight": "model_00031-of-00072.safetensors", "h.29.self_attention.dense.bias": "model_00031-of-00072.safetensors", "h.29.self_attention.dense.weight": "model_00031-of-00072.safetensors", "h.29.self_attention.query_key_value.bias": "model_00031-of-00072.safetensors", "h.29.self_attention.query_key_value.weight": "model_00031-of-00072.safetensors", "h.3.input_layernorm.bias": "model_00005-of-00072.safetensors", "h.3.input_layernorm.weight": "model_00005-of-00072.safetensors", "h.3.mlp.dense_4h_to_h.bias": "model_00005-of-00072.safetensors", "h.3.mlp.dense_4h_to_h.weight": "model_00005-of-00072.safetensors", "h.3.mlp.dense_h_to_4h.bias": "model_00005-of-00072.safetensors", "h.3.mlp.dense_h_to_4h.weight": "model_00005-of-00072.safetensors", "h.3.post_attention_layernorm.bias": "model_00005-of-00072.safetensors", "h.3.post_attention_layernorm.weight": "model_00005-of-00072.safetensors", "h.3.self_attention.dense.bias": "model_00005-of-00072.safetensors", "h.3.self_attention.dense.weight": "model_00005-of-00072.safetensors", "h.3.self_attention.query_key_value.bias": "model_00005-of-00072.safetensors", "h.3.self_attention.query_key_value.weight": "model_00005-of-00072.safetensors", "h.30.input_layernorm.bias": "model_00032-of-00072.safetensors", "h.30.input_layernorm.weight": "model_00032-of-00072.safetensors", "h.30.mlp.dense_4h_to_h.bias": "model_00032-of-00072.safetensors", "h.30.mlp.dense_4h_to_h.weight": "model_00032-of-00072.safetensors", "h.30.mlp.dense_h_to_4h.bias": "model_00032-of-00072.safetensors", "h.30.mlp.dense_h_to_4h.weight": "model_00032-of-00072.safetensors", "h.30.post_attention_layernorm.bias": "model_00032-of-00072.safetensors", "h.30.post_attention_layernorm.weight": "model_00032-of-00072.safetensors", "h.30.self_attention.dense.bias": "model_00032-of-00072.safetensors", "h.30.self_attention.dense.weight": "model_00032-of-00072.safetensors", "h.30.self_attention.query_key_value.bias": "model_00032-of-00072.safetensors", "h.30.self_attention.query_key_value.weight": "model_00032-of-00072.safetensors", "h.31.input_layernorm.bias": "model_00033-of-00072.safetensors", "h.31.input_layernorm.weight": "model_00033-of-00072.safetensors", "h.31.mlp.dense_4h_to_h.bias": "model_00033-of-00072.safetensors", "h.31.mlp.dense_4h_to_h.weight": "model_00033-of-00072.safetensors", "h.31.mlp.dense_h_to_4h.bias": "model_00033-of-00072.safetensors", "h.31.mlp.dense_h_to_4h.weight": "model_00033-of-00072.safetensors", "h.31.post_attention_layernorm.bias": "model_00033-of-00072.safetensors", "h.31.post_attention_layernorm.weight": "model_00033-of-00072.safetensors", "h.31.self_attention.dense.bias": "model_00033-of-00072.safetensors", "h.31.self_attention.dense.weight": "model_00033-of-00072.safetensors", "h.31.self_attention.query_key_value.bias": "model_00033-of-00072.safetensors", "h.31.self_attention.query_key_value.weight": "model_00033-of-00072.safetensors", "h.32.input_layernorm.bias": "model_00034-of-00072.safetensors", "h.32.input_layernorm.weight": "model_00034-of-00072.safetensors", "h.32.mlp.dense_4h_to_h.bias": "model_00034-of-00072.safetensors", "h.32.mlp.dense_4h_to_h.weight": "model_00034-of-00072.safetensors", "h.32.mlp.dense_h_to_4h.bias": "model_00034-of-00072.safetensors", "h.32.mlp.dense_h_to_4h.weight": "model_00034-of-00072.safetensors", "h.32.post_attention_layernorm.bias": "model_00034-of-00072.safetensors", "h.32.post_attention_layernorm.weight": "model_00034-of-00072.safetensors", "h.32.self_attention.dense.bias": "model_00034-of-00072.safetensors", "h.32.self_attention.dense.weight": "model_00034-of-00072.safetensors", "h.32.self_attention.query_key_value.bias": "model_00034-of-00072.safetensors", "h.32.self_attention.query_key_value.weight": "model_00034-of-00072.safetensors", "h.33.input_layernorm.bias": "model_00035-of-00072.safetensors", "h.33.input_layernorm.weight": "model_00035-of-00072.safetensors", "h.33.mlp.dense_4h_to_h.bias": "model_00035-of-00072.safetensors", "h.33.mlp.dense_4h_to_h.weight": "model_00035-of-00072.safetensors", "h.33.mlp.dense_h_to_4h.bias": "model_00035-of-00072.safetensors", "h.33.mlp.dense_h_to_4h.weight": "model_00035-of-00072.safetensors", "h.33.post_attention_layernorm.bias": "model_00035-of-00072.safetensors", "h.33.post_attention_layernorm.weight": "model_00035-of-00072.safetensors", "h.33.self_attention.dense.bias": "model_00035-of-00072.safetensors", "h.33.self_attention.dense.weight": "model_00035-of-00072.safetensors", "h.33.self_attention.query_key_value.bias": "model_00035-of-00072.safetensors", "h.33.self_attention.query_key_value.weight": "model_00035-of-00072.safetensors", "h.34.input_layernorm.bias": "model_00036-of-00072.safetensors", "h.34.input_layernorm.weight": "model_00036-of-00072.safetensors", "h.34.mlp.dense_4h_to_h.bias": "model_00036-of-00072.safetensors", "h.34.mlp.dense_4h_to_h.weight": "model_00036-of-00072.safetensors", "h.34.mlp.dense_h_to_4h.bias": "model_00036-of-00072.safetensors", "h.34.mlp.dense_h_to_4h.weight": "model_00036-of-00072.safetensors", "h.34.post_attention_layernorm.bias": "model_00036-of-00072.safetensors", "h.34.post_attention_layernorm.weight": "model_00036-of-00072.safetensors", "h.34.self_attention.dense.bias": "model_00036-of-00072.safetensors", "h.34.self_attention.dense.weight": "model_00036-of-00072.safetensors", "h.34.self_attention.query_key_value.bias": "model_00036-of-00072.safetensors", "h.34.self_attention.query_key_value.weight": "model_00036-of-00072.safetensors", "h.35.input_layernorm.bias": "model_00037-of-00072.safetensors", "h.35.input_layernorm.weight": "model_00037-of-00072.safetensors", "h.35.mlp.dense_4h_to_h.bias": "model_00037-of-00072.safetensors", "h.35.mlp.dense_4h_to_h.weight": "model_00037-of-00072.safetensors", "h.35.mlp.dense_h_to_4h.bias": "model_00037-of-00072.safetensors", "h.35.mlp.dense_h_to_4h.weight": "model_00037-of-00072.safetensors", "h.35.post_attention_layernorm.bias": "model_00037-of-00072.safetensors", "h.35.post_attention_layernorm.weight": "model_00037-of-00072.safetensors", "h.35.self_attention.dense.bias": "model_00037-of-00072.safetensors", "h.35.self_attention.dense.weight": "model_00037-of-00072.safetensors", "h.35.self_attention.query_key_value.bias": "model_00037-of-00072.safetensors", "h.35.self_attention.query_key_value.weight": "model_00037-of-00072.safetensors", "h.36.input_layernorm.bias": "model_00038-of-00072.safetensors", "h.36.input_layernorm.weight": "model_00038-of-00072.safetensors", "h.36.mlp.dense_4h_to_h.bias": "model_00038-of-00072.safetensors", "h.36.mlp.dense_4h_to_h.weight": "model_00038-of-00072.safetensors", "h.36.mlp.dense_h_to_4h.bias": "model_00038-of-00072.safetensors", "h.36.mlp.dense_h_to_4h.weight": "model_00038-of-00072.safetensors", "h.36.post_attention_layernorm.bias": "model_00038-of-00072.safetensors", "h.36.post_attention_layernorm.weight": "model_00038-of-00072.safetensors", "h.36.self_attention.dense.bias": "model_00038-of-00072.safetensors", "h.36.self_attention.dense.weight": "model_00038-of-00072.safetensors", "h.36.self_attention.query_key_value.bias": "model_00038-of-00072.safetensors", "h.36.self_attention.query_key_value.weight": "model_00038-of-00072.safetensors", "h.37.input_layernorm.bias": "model_00039-of-00072.safetensors", "h.37.input_layernorm.weight": "model_00039-of-00072.safetensors", "h.37.mlp.dense_4h_to_h.bias": "model_00039-of-00072.safetensors", "h.37.mlp.dense_4h_to_h.weight": "model_00039-of-00072.safetensors", "h.37.mlp.dense_h_to_4h.bias": "model_00039-of-00072.safetensors", "h.37.mlp.dense_h_to_4h.weight": "model_00039-of-00072.safetensors", "h.37.post_attention_layernorm.bias": "model_00039-of-00072.safetensors", "h.37.post_attention_layernorm.weight": "model_00039-of-00072.safetensors", "h.37.self_attention.dense.bias": "model_00039-of-00072.safetensors", "h.37.self_attention.dense.weight": "model_00039-of-00072.safetensors", "h.37.self_attention.query_key_value.bias": "model_00039-of-00072.safetensors", "h.37.self_attention.query_key_value.weight": "model_00039-of-00072.safetensors", "h.38.input_layernorm.bias": "model_00040-of-00072.safetensors", "h.38.input_layernorm.weight": "model_00040-of-00072.safetensors", "h.38.mlp.dense_4h_to_h.bias": "model_00040-of-00072.safetensors", "h.38.mlp.dense_4h_to_h.weight": "model_00040-of-00072.safetensors", "h.38.mlp.dense_h_to_4h.bias": "model_00040-of-00072.safetensors", "h.38.mlp.dense_h_to_4h.weight": "model_00040-of-00072.safetensors", "h.38.post_attention_layernorm.bias": "model_00040-of-00072.safetensors", "h.38.post_attention_layernorm.weight": "model_00040-of-00072.safetensors", "h.38.self_attention.dense.bias": "model_00040-of-00072.safetensors", "h.38.self_attention.dense.weight": "model_00040-of-00072.safetensors", "h.38.self_attention.query_key_value.bias": "model_00040-of-00072.safetensors", "h.38.self_attention.query_key_value.weight": "model_00040-of-00072.safetensors", "h.39.input_layernorm.bias": "model_00041-of-00072.safetensors", "h.39.input_layernorm.weight": "model_00041-of-00072.safetensors", "h.39.mlp.dense_4h_to_h.bias": "model_00041-of-00072.safetensors", "h.39.mlp.dense_4h_to_h.weight": "model_00041-of-00072.safetensors", "h.39.mlp.dense_h_to_4h.bias": "model_00041-of-00072.safetensors", "h.39.mlp.dense_h_to_4h.weight": "model_00041-of-00072.safetensors", "h.39.post_attention_layernorm.bias": "model_00041-of-00072.safetensors", "h.39.post_attention_layernorm.weight": "model_00041-of-00072.safetensors", "h.39.self_attention.dense.bias": "model_00041-of-00072.safetensors", "h.39.self_attention.dense.weight": "model_00041-of-00072.safetensors", "h.39.self_attention.query_key_value.bias": "model_00041-of-00072.safetensors", "h.39.self_attention.query_key_value.weight": "model_00041-of-00072.safetensors", "h.4.input_layernorm.bias": "model_00006-of-00072.safetensors", "h.4.input_layernorm.weight": "model_00006-of-00072.safetensors", "h.4.mlp.dense_4h_to_h.bias": "model_00006-of-00072.safetensors", "h.4.mlp.dense_4h_to_h.weight": "model_00006-of-00072.safetensors", "h.4.mlp.dense_h_to_4h.bias": "model_00006-of-00072.safetensors", "h.4.mlp.dense_h_to_4h.weight": "model_00006-of-00072.safetensors", "h.4.post_attention_layernorm.bias": "model_00006-of-00072.safetensors", "h.4.post_attention_layernorm.weight": "model_00006-of-00072.safetensors", "h.4.self_attention.dense.bias": "model_00006-of-00072.safetensors", "h.4.self_attention.dense.weight": "model_00006-of-00072.safetensors", "h.4.self_attention.query_key_value.bias": "model_00006-of-00072.safetensors", "h.4.self_attention.query_key_value.weight": "model_00006-of-00072.safetensors", "h.40.input_layernorm.bias": "model_00042-of-00072.safetensors", "h.40.input_layernorm.weight": "model_00042-of-00072.safetensors", "h.40.mlp.dense_4h_to_h.bias": "model_00042-of-00072.safetensors", "h.40.mlp.dense_4h_to_h.weight": "model_00042-of-00072.safetensors", "h.40.mlp.dense_h_to_4h.bias": "model_00042-of-00072.safetensors", "h.40.mlp.dense_h_to_4h.weight": "model_00042-of-00072.safetensors", "h.40.post_attention_layernorm.bias": "model_00042-of-00072.safetensors", "h.40.post_attention_layernorm.weight": "model_00042-of-00072.safetensors", "h.40.self_attention.dense.bias": "model_00042-of-00072.safetensors", "h.40.self_attention.dense.weight": "model_00042-of-00072.safetensors", "h.40.self_attention.query_key_value.bias": "model_00042-of-00072.safetensors", "h.40.self_attention.query_key_value.weight": "model_00042-of-00072.safetensors", "h.41.input_layernorm.bias": "model_00043-of-00072.safetensors", "h.41.input_layernorm.weight": "model_00043-of-00072.safetensors", "h.41.mlp.dense_4h_to_h.bias": "model_00043-of-00072.safetensors", "h.41.mlp.dense_4h_to_h.weight": "model_00043-of-00072.safetensors", "h.41.mlp.dense_h_to_4h.bias": "model_00043-of-00072.safetensors", "h.41.mlp.dense_h_to_4h.weight": "model_00043-of-00072.safetensors", "h.41.post_attention_layernorm.bias": "model_00043-of-00072.safetensors", "h.41.post_attention_layernorm.weight": "model_00043-of-00072.safetensors", "h.41.self_attention.dense.bias": "model_00043-of-00072.safetensors", "h.41.self_attention.dense.weight": "model_00043-of-00072.safetensors", "h.41.self_attention.query_key_value.bias": "model_00043-of-00072.safetensors", "h.41.self_attention.query_key_value.weight": "model_00043-of-00072.safetensors", "h.42.input_layernorm.bias": "model_00044-of-00072.safetensors", "h.42.input_layernorm.weight": "model_00044-of-00072.safetensors", "h.42.mlp.dense_4h_to_h.bias": "model_00044-of-00072.safetensors", "h.42.mlp.dense_4h_to_h.weight": "model_00044-of-00072.safetensors", "h.42.mlp.dense_h_to_4h.bias": "model_00044-of-00072.safetensors", "h.42.mlp.dense_h_to_4h.weight": "model_00044-of-00072.safetensors", "h.42.post_attention_layernorm.bias": "model_00044-of-00072.safetensors", "h.42.post_attention_layernorm.weight": "model_00044-of-00072.safetensors", "h.42.self_attention.dense.bias": "model_00044-of-00072.safetensors", "h.42.self_attention.dense.weight": "model_00044-of-00072.safetensors", "h.42.self_attention.query_key_value.bias": "model_00044-of-00072.safetensors", "h.42.self_attention.query_key_value.weight": "model_00044-of-00072.safetensors", "h.43.input_layernorm.bias": "model_00045-of-00072.safetensors", "h.43.input_layernorm.weight": "model_00045-of-00072.safetensors", "h.43.mlp.dense_4h_to_h.bias": "model_00045-of-00072.safetensors", "h.43.mlp.dense_4h_to_h.weight": "model_00045-of-00072.safetensors", "h.43.mlp.dense_h_to_4h.bias": "model_00045-of-00072.safetensors", "h.43.mlp.dense_h_to_4h.weight": "model_00045-of-00072.safetensors", "h.43.post_attention_layernorm.bias": "model_00045-of-00072.safetensors", "h.43.post_attention_layernorm.weight": "model_00045-of-00072.safetensors", "h.43.self_attention.dense.bias": "model_00045-of-00072.safetensors", "h.43.self_attention.dense.weight": "model_00045-of-00072.safetensors", "h.43.self_attention.query_key_value.bias": "model_00045-of-00072.safetensors", "h.43.self_attention.query_key_value.weight": "model_00045-of-00072.safetensors", "h.44.input_layernorm.bias": "model_00046-of-00072.safetensors", "h.44.input_layernorm.weight": "model_00046-of-00072.safetensors", "h.44.mlp.dense_4h_to_h.bias": "model_00046-of-00072.safetensors", "h.44.mlp.dense_4h_to_h.weight": "model_00046-of-00072.safetensors", "h.44.mlp.dense_h_to_4h.bias": "model_00046-of-00072.safetensors", "h.44.mlp.dense_h_to_4h.weight": "model_00046-of-00072.safetensors", "h.44.post_attention_layernorm.bias": "model_00046-of-00072.safetensors", "h.44.post_attention_layernorm.weight": "model_00046-of-00072.safetensors", "h.44.self_attention.dense.bias": "model_00046-of-00072.safetensors", "h.44.self_attention.dense.weight": "model_00046-of-00072.safetensors", "h.44.self_attention.query_key_value.bias": "model_00046-of-00072.safetensors", "h.44.self_attention.query_key_value.weight": "model_00046-of-00072.safetensors", "h.45.input_layernorm.bias": "model_00047-of-00072.safetensors", "h.45.input_layernorm.weight": "model_00047-of-00072.safetensors", "h.45.mlp.dense_4h_to_h.bias": "model_00047-of-00072.safetensors", "h.45.mlp.dense_4h_to_h.weight": "model_00047-of-00072.safetensors", "h.45.mlp.dense_h_to_4h.bias": "model_00047-of-00072.safetensors", "h.45.mlp.dense_h_to_4h.weight": "model_00047-of-00072.safetensors", "h.45.post_attention_layernorm.bias": "model_00047-of-00072.safetensors", "h.45.post_attention_layernorm.weight": "model_00047-of-00072.safetensors", "h.45.self_attention.dense.bias": "model_00047-of-00072.safetensors", "h.45.self_attention.dense.weight": "model_00047-of-00072.safetensors", "h.45.self_attention.query_key_value.bias": "model_00047-of-00072.safetensors", "h.45.self_attention.query_key_value.weight": "model_00047-of-00072.safetensors", "h.46.input_layernorm.bias": "model_00048-of-00072.safetensors", "h.46.input_layernorm.weight": "model_00048-of-00072.safetensors", "h.46.mlp.dense_4h_to_h.bias": "model_00048-of-00072.safetensors", "h.46.mlp.dense_4h_to_h.weight": "model_00048-of-00072.safetensors", "h.46.mlp.dense_h_to_4h.bias": "model_00048-of-00072.safetensors", "h.46.mlp.dense_h_to_4h.weight": "model_00048-of-00072.safetensors", "h.46.post_attention_layernorm.bias": "model_00048-of-00072.safetensors", "h.46.post_attention_layernorm.weight": "model_00048-of-00072.safetensors", "h.46.self_attention.dense.bias": "model_00048-of-00072.safetensors", "h.46.self_attention.dense.weight": "model_00048-of-00072.safetensors", "h.46.self_attention.query_key_value.bias": "model_00048-of-00072.safetensors", "h.46.self_attention.query_key_value.weight": "model_00048-of-00072.safetensors", "h.47.input_layernorm.bias": "model_00049-of-00072.safetensors", "h.47.input_layernorm.weight": "model_00049-of-00072.safetensors", "h.47.mlp.dense_4h_to_h.bias": "model_00049-of-00072.safetensors", "h.47.mlp.dense_4h_to_h.weight": "model_00049-of-00072.safetensors", "h.47.mlp.dense_h_to_4h.bias": "model_00049-of-00072.safetensors", "h.47.mlp.dense_h_to_4h.weight": "model_00049-of-00072.safetensors", "h.47.post_attention_layernorm.bias": "model_00049-of-00072.safetensors", "h.47.post_attention_layernorm.weight": "model_00049-of-00072.safetensors", "h.47.self_attention.dense.bias": "model_00049-of-00072.safetensors", "h.47.self_attention.dense.weight": "model_00049-of-00072.safetensors", "h.47.self_attention.query_key_value.bias": "model_00049-of-00072.safetensors", "h.47.self_attention.query_key_value.weight": "model_00049-of-00072.safetensors", "h.48.input_layernorm.bias": "model_00050-of-00072.safetensors", "h.48.input_layernorm.weight": "model_00050-of-00072.safetensors", "h.48.mlp.dense_4h_to_h.bias": "model_00050-of-00072.safetensors", "h.48.mlp.dense_4h_to_h.weight": "model_00050-of-00072.safetensors", "h.48.mlp.dense_h_to_4h.bias": "model_00050-of-00072.safetensors", "h.48.mlp.dense_h_to_4h.weight": "model_00050-of-00072.safetensors", "h.48.post_attention_layernorm.bias": "model_00050-of-00072.safetensors", "h.48.post_attention_layernorm.weight": "model_00050-of-00072.safetensors", "h.48.self_attention.dense.bias": "model_00050-of-00072.safetensors", "h.48.self_attention.dense.weight": "model_00050-of-00072.safetensors", "h.48.self_attention.query_key_value.bias": "model_00050-of-00072.safetensors", "h.48.self_attention.query_key_value.weight": "model_00050-of-00072.safetensors", "h.49.input_layernorm.bias": "model_00051-of-00072.safetensors", "h.49.input_layernorm.weight": "model_00051-of-00072.safetensors", "h.49.mlp.dense_4h_to_h.bias": "model_00051-of-00072.safetensors", "h.49.mlp.dense_4h_to_h.weight": "model_00051-of-00072.safetensors", "h.49.mlp.dense_h_to_4h.bias": "model_00051-of-00072.safetensors", "h.49.mlp.dense_h_to_4h.weight": "model_00051-of-00072.safetensors", "h.49.post_attention_layernorm.bias": "model_00051-of-00072.safetensors", "h.49.post_attention_layernorm.weight": "model_00051-of-00072.safetensors", "h.49.self_attention.dense.bias": "model_00051-of-00072.safetensors", "h.49.self_attention.dense.weight": "model_00051-of-00072.safetensors", "h.49.self_attention.query_key_value.bias": "model_00051-of-00072.safetensors", "h.49.self_attention.query_key_value.weight": "model_00051-of-00072.safetensors", "h.5.input_layernorm.bias": "model_00007-of-00072.safetensors", "h.5.input_layernorm.weight": "model_00007-of-00072.safetensors", "h.5.mlp.dense_4h_to_h.bias": "model_00007-of-00072.safetensors", "h.5.mlp.dense_4h_to_h.weight": "model_00007-of-00072.safetensors", "h.5.mlp.dense_h_to_4h.bias": "model_00007-of-00072.safetensors", "h.5.mlp.dense_h_to_4h.weight": "model_00007-of-00072.safetensors", "h.5.post_attention_layernorm.bias": "model_00007-of-00072.safetensors", "h.5.post_attention_layernorm.weight": "model_00007-of-00072.safetensors", "h.5.self_attention.dense.bias": "model_00007-of-00072.safetensors", "h.5.self_attention.dense.weight": "model_00007-of-00072.safetensors", "h.5.self_attention.query_key_value.bias": "model_00007-of-00072.safetensors", "h.5.self_attention.query_key_value.weight": "model_00007-of-00072.safetensors", "h.50.input_layernorm.bias": "model_00052-of-00072.safetensors", "h.50.input_layernorm.weight": "model_00052-of-00072.safetensors", "h.50.mlp.dense_4h_to_h.bias": "model_00052-of-00072.safetensors", "h.50.mlp.dense_4h_to_h.weight": "model_00052-of-00072.safetensors", "h.50.mlp.dense_h_to_4h.bias": "model_00052-of-00072.safetensors", "h.50.mlp.dense_h_to_4h.weight": "model_00052-of-00072.safetensors", "h.50.post_attention_layernorm.bias": "model_00052-of-00072.safetensors", "h.50.post_attention_layernorm.weight": "model_00052-of-00072.safetensors", "h.50.self_attention.dense.bias": "model_00052-of-00072.safetensors", "h.50.self_attention.dense.weight": "model_00052-of-00072.safetensors", "h.50.self_attention.query_key_value.bias": "model_00052-of-00072.safetensors", "h.50.self_attention.query_key_value.weight": "model_00052-of-00072.safetensors", "h.51.input_layernorm.bias": "model_00053-of-00072.safetensors", "h.51.input_layernorm.weight": "model_00053-of-00072.safetensors", "h.51.mlp.dense_4h_to_h.bias": "model_00053-of-00072.safetensors", "h.51.mlp.dense_4h_to_h.weight": "model_00053-of-00072.safetensors", "h.51.mlp.dense_h_to_4h.bias": "model_00053-of-00072.safetensors", "h.51.mlp.dense_h_to_4h.weight": "model_00053-of-00072.safetensors", "h.51.post_attention_layernorm.bias": "model_00053-of-00072.safetensors", "h.51.post_attention_layernorm.weight": "model_00053-of-00072.safetensors", "h.51.self_attention.dense.bias": "model_00053-of-00072.safetensors", "h.51.self_attention.dense.weight": "model_00053-of-00072.safetensors", "h.51.self_attention.query_key_value.bias": "model_00053-of-00072.safetensors", "h.51.self_attention.query_key_value.weight": "model_00053-of-00072.safetensors", "h.52.input_layernorm.bias": "model_00054-of-00072.safetensors", "h.52.input_layernorm.weight": "model_00054-of-00072.safetensors", "h.52.mlp.dense_4h_to_h.bias": "model_00054-of-00072.safetensors", "h.52.mlp.dense_4h_to_h.weight": "model_00054-of-00072.safetensors", "h.52.mlp.dense_h_to_4h.bias": "model_00054-of-00072.safetensors", "h.52.mlp.dense_h_to_4h.weight": "model_00054-of-00072.safetensors", "h.52.post_attention_layernorm.bias": "model_00054-of-00072.safetensors", "h.52.post_attention_layernorm.weight": "model_00054-of-00072.safetensors", "h.52.self_attention.dense.bias": "model_00054-of-00072.safetensors", "h.52.self_attention.dense.weight": "model_00054-of-00072.safetensors", "h.52.self_attention.query_key_value.bias": "model_00054-of-00072.safetensors", "h.52.self_attention.query_key_value.weight": "model_00054-of-00072.safetensors", "h.53.input_layernorm.bias": "model_00055-of-00072.safetensors", "h.53.input_layernorm.weight": "model_00055-of-00072.safetensors", "h.53.mlp.dense_4h_to_h.bias": "model_00055-of-00072.safetensors", "h.53.mlp.dense_4h_to_h.weight": "model_00055-of-00072.safetensors", "h.53.mlp.dense_h_to_4h.bias": "model_00055-of-00072.safetensors", "h.53.mlp.dense_h_to_4h.weight": "model_00055-of-00072.safetensors", "h.53.post_attention_layernorm.bias": "model_00055-of-00072.safetensors", "h.53.post_attention_layernorm.weight": "model_00055-of-00072.safetensors", "h.53.self_attention.dense.bias": "model_00055-of-00072.safetensors", "h.53.self_attention.dense.weight": "model_00055-of-00072.safetensors", "h.53.self_attention.query_key_value.bias": "model_00055-of-00072.safetensors", "h.53.self_attention.query_key_value.weight": "model_00055-of-00072.safetensors", "h.54.input_layernorm.bias": "model_00056-of-00072.safetensors", "h.54.input_layernorm.weight": "model_00056-of-00072.safetensors", "h.54.mlp.dense_4h_to_h.bias": "model_00056-of-00072.safetensors", "h.54.mlp.dense_4h_to_h.weight": "model_00056-of-00072.safetensors", "h.54.mlp.dense_h_to_4h.bias": "model_00056-of-00072.safetensors", "h.54.mlp.dense_h_to_4h.weight": "model_00056-of-00072.safetensors", "h.54.post_attention_layernorm.bias": "model_00056-of-00072.safetensors", "h.54.post_attention_layernorm.weight": "model_00056-of-00072.safetensors", "h.54.self_attention.dense.bias": "model_00056-of-00072.safetensors", "h.54.self_attention.dense.weight": "model_00056-of-00072.safetensors", "h.54.self_attention.query_key_value.bias": "model_00056-of-00072.safetensors", "h.54.self_attention.query_key_value.weight": "model_00056-of-00072.safetensors", "h.55.input_layernorm.bias": "model_00057-of-00072.safetensors", "h.55.input_layernorm.weight": "model_00057-of-00072.safetensors", "h.55.mlp.dense_4h_to_h.bias": "model_00057-of-00072.safetensors", "h.55.mlp.dense_4h_to_h.weight": "model_00057-of-00072.safetensors", "h.55.mlp.dense_h_to_4h.bias": "model_00057-of-00072.safetensors", "h.55.mlp.dense_h_to_4h.weight": "model_00057-of-00072.safetensors", "h.55.post_attention_layernorm.bias": "model_00057-of-00072.safetensors", "h.55.post_attention_layernorm.weight": "model_00057-of-00072.safetensors", "h.55.self_attention.dense.bias": "model_00057-of-00072.safetensors", "h.55.self_attention.dense.weight": "model_00057-of-00072.safetensors", "h.55.self_attention.query_key_value.bias": "model_00057-of-00072.safetensors", "h.55.self_attention.query_key_value.weight": "model_00057-of-00072.safetensors", "h.56.input_layernorm.bias": "model_00058-of-00072.safetensors", "h.56.input_layernorm.weight": "model_00058-of-00072.safetensors", "h.56.mlp.dense_4h_to_h.bias": "model_00058-of-00072.safetensors", "h.56.mlp.dense_4h_to_h.weight": "model_00058-of-00072.safetensors", "h.56.mlp.dense_h_to_4h.bias": "model_00058-of-00072.safetensors", "h.56.mlp.dense_h_to_4h.weight": "model_00058-of-00072.safetensors", "h.56.post_attention_layernorm.bias": "model_00058-of-00072.safetensors", "h.56.post_attention_layernorm.weight": "model_00058-of-00072.safetensors", "h.56.self_attention.dense.bias": "model_00058-of-00072.safetensors", "h.56.self_attention.dense.weight": "model_00058-of-00072.safetensors", "h.56.self_attention.query_key_value.bias": "model_00058-of-00072.safetensors", "h.56.self_attention.query_key_value.weight": "model_00058-of-00072.safetensors", "h.57.input_layernorm.bias": "model_00059-of-00072.safetensors", "h.57.input_layernorm.weight": "model_00059-of-00072.safetensors", "h.57.mlp.dense_4h_to_h.bias": "model_00059-of-00072.safetensors", "h.57.mlp.dense_4h_to_h.weight": "model_00059-of-00072.safetensors", "h.57.mlp.dense_h_to_4h.bias": "model_00059-of-00072.safetensors", "h.57.mlp.dense_h_to_4h.weight": "model_00059-of-00072.safetensors", "h.57.post_attention_layernorm.bias": "model_00059-of-00072.safetensors", "h.57.post_attention_layernorm.weight": "model_00059-of-00072.safetensors", "h.57.self_attention.dense.bias": "model_00059-of-00072.safetensors", "h.57.self_attention.dense.weight": "model_00059-of-00072.safetensors", "h.57.self_attention.query_key_value.bias": "model_00059-of-00072.safetensors", "h.57.self_attention.query_key_value.weight": "model_00059-of-00072.safetensors", "h.58.input_layernorm.bias": "model_00060-of-00072.safetensors", "h.58.input_layernorm.weight": "model_00060-of-00072.safetensors", "h.58.mlp.dense_4h_to_h.bias": "model_00060-of-00072.safetensors", "h.58.mlp.dense_4h_to_h.weight": "model_00060-of-00072.safetensors", "h.58.mlp.dense_h_to_4h.bias": "model_00060-of-00072.safetensors", "h.58.mlp.dense_h_to_4h.weight": "model_00060-of-00072.safetensors", "h.58.post_attention_layernorm.bias": "model_00060-of-00072.safetensors", "h.58.post_attention_layernorm.weight": "model_00060-of-00072.safetensors", "h.58.self_attention.dense.bias": "model_00060-of-00072.safetensors", "h.58.self_attention.dense.weight": "model_00060-of-00072.safetensors", "h.58.self_attention.query_key_value.bias": "model_00060-of-00072.safetensors", "h.58.self_attention.query_key_value.weight": "model_00060-of-00072.safetensors", "h.59.input_layernorm.bias": "model_00061-of-00072.safetensors", "h.59.input_layernorm.weight": "model_00061-of-00072.safetensors", "h.59.mlp.dense_4h_to_h.bias": "model_00061-of-00072.safetensors", "h.59.mlp.dense_4h_to_h.weight": "model_00061-of-00072.safetensors", "h.59.mlp.dense_h_to_4h.bias": "model_00061-of-00072.safetensors", "h.59.mlp.dense_h_to_4h.weight": "model_00061-of-00072.safetensors", "h.59.post_attention_layernorm.bias": "model_00061-of-00072.safetensors", "h.59.post_attention_layernorm.weight": "model_00061-of-00072.safetensors", "h.59.self_attention.dense.bias": "model_00061-of-00072.safetensors", "h.59.self_attention.dense.weight": "model_00061-of-00072.safetensors", "h.59.self_attention.query_key_value.bias": "model_00061-of-00072.safetensors", "h.59.self_attention.query_key_value.weight": "model_00061-of-00072.safetensors", "h.6.input_layernorm.bias": "model_00008-of-00072.safetensors", "h.6.input_layernorm.weight": "model_00008-of-00072.safetensors", "h.6.mlp.dense_4h_to_h.bias": "model_00008-of-00072.safetensors", "h.6.mlp.dense_4h_to_h.weight": "model_00008-of-00072.safetensors", "h.6.mlp.dense_h_to_4h.bias": "model_00008-of-00072.safetensors", "h.6.mlp.dense_h_to_4h.weight": "model_00008-of-00072.safetensors", "h.6.post_attention_layernorm.bias": "model_00008-of-00072.safetensors", "h.6.post_attention_layernorm.weight": "model_00008-of-00072.safetensors", "h.6.self_attention.dense.bias": "model_00008-of-00072.safetensors", "h.6.self_attention.dense.weight": "model_00008-of-00072.safetensors", "h.6.self_attention.query_key_value.bias": "model_00008-of-00072.safetensors", "h.6.self_attention.query_key_value.weight": "model_00008-of-00072.safetensors", "h.60.input_layernorm.bias": "model_00062-of-00072.safetensors", "h.60.input_layernorm.weight": "model_00062-of-00072.safetensors", "h.60.mlp.dense_4h_to_h.bias": "model_00062-of-00072.safetensors", "h.60.mlp.dense_4h_to_h.weight": "model_00062-of-00072.safetensors", "h.60.mlp.dense_h_to_4h.bias": "model_00062-of-00072.safetensors", "h.60.mlp.dense_h_to_4h.weight": "model_00062-of-00072.safetensors", "h.60.post_attention_layernorm.bias": "model_00062-of-00072.safetensors", "h.60.post_attention_layernorm.weight": "model_00062-of-00072.safetensors", "h.60.self_attention.dense.bias": "model_00062-of-00072.safetensors", "h.60.self_attention.dense.weight": "model_00062-of-00072.safetensors", "h.60.self_attention.query_key_value.bias": "model_00062-of-00072.safetensors", "h.60.self_attention.query_key_value.weight": "model_00062-of-00072.safetensors", "h.61.input_layernorm.bias": "model_00063-of-00072.safetensors", "h.61.input_layernorm.weight": "model_00063-of-00072.safetensors", "h.61.mlp.dense_4h_to_h.bias": "model_00063-of-00072.safetensors", "h.61.mlp.dense_4h_to_h.weight": "model_00063-of-00072.safetensors", "h.61.mlp.dense_h_to_4h.bias": "model_00063-of-00072.safetensors", "h.61.mlp.dense_h_to_4h.weight": "model_00063-of-00072.safetensors", "h.61.post_attention_layernorm.bias": "model_00063-of-00072.safetensors", "h.61.post_attention_layernorm.weight": "model_00063-of-00072.safetensors", "h.61.self_attention.dense.bias": "model_00063-of-00072.safetensors", "h.61.self_attention.dense.weight": "model_00063-of-00072.safetensors", "h.61.self_attention.query_key_value.bias": "model_00063-of-00072.safetensors", "h.61.self_attention.query_key_value.weight": "model_00063-of-00072.safetensors", "h.62.input_layernorm.bias": "model_00064-of-00072.safetensors", "h.62.input_layernorm.weight": "model_00064-of-00072.safetensors", "h.62.mlp.dense_4h_to_h.bias": "model_00064-of-00072.safetensors", "h.62.mlp.dense_4h_to_h.weight": "model_00064-of-00072.safetensors", "h.62.mlp.dense_h_to_4h.bias": "model_00064-of-00072.safetensors", "h.62.mlp.dense_h_to_4h.weight": "model_00064-of-00072.safetensors", "h.62.post_attention_layernorm.bias": "model_00064-of-00072.safetensors", "h.62.post_attention_layernorm.weight": "model_00064-of-00072.safetensors", "h.62.self_attention.dense.bias": "model_00064-of-00072.safetensors", "h.62.self_attention.dense.weight": "model_00064-of-00072.safetensors", "h.62.self_attention.query_key_value.bias": "model_00064-of-00072.safetensors", "h.62.self_attention.query_key_value.weight": "model_00064-of-00072.safetensors", "h.63.input_layernorm.bias": "model_00065-of-00072.safetensors", "h.63.input_layernorm.weight": "model_00065-of-00072.safetensors", "h.63.mlp.dense_4h_to_h.bias": "model_00065-of-00072.safetensors", "h.63.mlp.dense_4h_to_h.weight": "model_00065-of-00072.safetensors", "h.63.mlp.dense_h_to_4h.bias": "model_00065-of-00072.safetensors", "h.63.mlp.dense_h_to_4h.weight": "model_00065-of-00072.safetensors", "h.63.post_attention_layernorm.bias": "model_00065-of-00072.safetensors", "h.63.post_attention_layernorm.weight": "model_00065-of-00072.safetensors", "h.63.self_attention.dense.bias": "model_00065-of-00072.safetensors", "h.63.self_attention.dense.weight": "model_00065-of-00072.safetensors", "h.63.self_attention.query_key_value.bias": "model_00065-of-00072.safetensors", "h.63.self_attention.query_key_value.weight": "model_00065-of-00072.safetensors", "h.64.input_layernorm.bias": "model_00066-of-00072.safetensors", "h.64.input_layernorm.weight": "model_00066-of-00072.safetensors", "h.64.mlp.dense_4h_to_h.bias": "model_00066-of-00072.safetensors", "h.64.mlp.dense_4h_to_h.weight": "model_00066-of-00072.safetensors", "h.64.mlp.dense_h_to_4h.bias": "model_00066-of-00072.safetensors", "h.64.mlp.dense_h_to_4h.weight": "model_00066-of-00072.safetensors", "h.64.post_attention_layernorm.bias": "model_00066-of-00072.safetensors", "h.64.post_attention_layernorm.weight": "model_00066-of-00072.safetensors", "h.64.self_attention.dense.bias": "model_00066-of-00072.safetensors", "h.64.self_attention.dense.weight": "model_00066-of-00072.safetensors", "h.64.self_attention.query_key_value.bias": "model_00066-of-00072.safetensors", "h.64.self_attention.query_key_value.weight": "model_00066-of-00072.safetensors", "h.65.input_layernorm.bias": "model_00067-of-00072.safetensors", "h.65.input_layernorm.weight": "model_00067-of-00072.safetensors", "h.65.mlp.dense_4h_to_h.bias": "model_00067-of-00072.safetensors", "h.65.mlp.dense_4h_to_h.weight": "model_00067-of-00072.safetensors", "h.65.mlp.dense_h_to_4h.bias": "model_00067-of-00072.safetensors", "h.65.mlp.dense_h_to_4h.weight": "model_00067-of-00072.safetensors", "h.65.post_attention_layernorm.bias": "model_00067-of-00072.safetensors", "h.65.post_attention_layernorm.weight": "model_00067-of-00072.safetensors", "h.65.self_attention.dense.bias": "model_00067-of-00072.safetensors", "h.65.self_attention.dense.weight": "model_00067-of-00072.safetensors", "h.65.self_attention.query_key_value.bias": "model_00067-of-00072.safetensors", "h.65.self_attention.query_key_value.weight": "model_00067-of-00072.safetensors", "h.66.input_layernorm.bias": "model_00068-of-00072.safetensors", "h.66.input_layernorm.weight": "model_00068-of-00072.safetensors", "h.66.mlp.dense_4h_to_h.bias": "model_00068-of-00072.safetensors", "h.66.mlp.dense_4h_to_h.weight": "model_00068-of-00072.safetensors", "h.66.mlp.dense_h_to_4h.bias": "model_00068-of-00072.safetensors", "h.66.mlp.dense_h_to_4h.weight": "model_00068-of-00072.safetensors", "h.66.post_attention_layernorm.bias": "model_00068-of-00072.safetensors", "h.66.post_attention_layernorm.weight": "model_00068-of-00072.safetensors", "h.66.self_attention.dense.bias": "model_00068-of-00072.safetensors", "h.66.self_attention.dense.weight": "model_00068-of-00072.safetensors", "h.66.self_attention.query_key_value.bias": "model_00068-of-00072.safetensors", "h.66.self_attention.query_key_value.weight": "model_00068-of-00072.safetensors", "h.67.input_layernorm.bias": "model_00069-of-00072.safetensors", "h.67.input_layernorm.weight": "model_00069-of-00072.safetensors", "h.67.mlp.dense_4h_to_h.bias": "model_00069-of-00072.safetensors", "h.67.mlp.dense_4h_to_h.weight": "model_00069-of-00072.safetensors", "h.67.mlp.dense_h_to_4h.bias": "model_00069-of-00072.safetensors", "h.67.mlp.dense_h_to_4h.weight": "model_00069-of-00072.safetensors", "h.67.post_attention_layernorm.bias": "model_00069-of-00072.safetensors", "h.67.post_attention_layernorm.weight": "model_00069-of-00072.safetensors", "h.67.self_attention.dense.bias": "model_00069-of-00072.safetensors", "h.67.self_attention.dense.weight": "model_00069-of-00072.safetensors", "h.67.self_attention.query_key_value.bias": "model_00069-of-00072.safetensors", "h.67.self_attention.query_key_value.weight": "model_00069-of-00072.safetensors", "h.68.input_layernorm.bias": "model_00070-of-00072.safetensors", "h.68.input_layernorm.weight": "model_00070-of-00072.safetensors", "h.68.mlp.dense_4h_to_h.bias": "model_00070-of-00072.safetensors", "h.68.mlp.dense_4h_to_h.weight": "model_00070-of-00072.safetensors", "h.68.mlp.dense_h_to_4h.bias": "model_00070-of-00072.safetensors", "h.68.mlp.dense_h_to_4h.weight": "model_00070-of-00072.safetensors", "h.68.post_attention_layernorm.bias": "model_00070-of-00072.safetensors", "h.68.post_attention_layernorm.weight": "model_00070-of-00072.safetensors", "h.68.self_attention.dense.bias": "model_00070-of-00072.safetensors", "h.68.self_attention.dense.weight": "model_00070-of-00072.safetensors", "h.68.self_attention.query_key_value.bias": "model_00070-of-00072.safetensors", "h.68.self_attention.query_key_value.weight": "model_00070-of-00072.safetensors", "h.69.input_layernorm.bias": "model_00071-of-00072.safetensors", "h.69.input_layernorm.weight": "model_00071-of-00072.safetensors", "h.69.mlp.dense_4h_to_h.bias": "model_00071-of-00072.safetensors", "h.69.mlp.dense_4h_to_h.weight": "model_00071-of-00072.safetensors", "h.69.mlp.dense_h_to_4h.bias": "model_00071-of-00072.safetensors", "h.69.mlp.dense_h_to_4h.weight": "model_00071-of-00072.safetensors", "h.69.post_attention_layernorm.bias": "model_00071-of-00072.safetensors", "h.69.post_attention_layernorm.weight": "model_00071-of-00072.safetensors", "h.69.self_attention.dense.bias": "model_00071-of-00072.safetensors", "h.69.self_attention.dense.weight": "model_00071-of-00072.safetensors", "h.69.self_attention.query_key_value.bias": "model_00071-of-00072.safetensors", "h.69.self_attention.query_key_value.weight": "model_00071-of-00072.safetensors", "h.7.input_layernorm.bias": "model_00009-of-00072.safetensors", "h.7.input_layernorm.weight": "model_00009-of-00072.safetensors", "h.7.mlp.dense_4h_to_h.bias": "model_00009-of-00072.safetensors", "h.7.mlp.dense_4h_to_h.weight": "model_00009-of-00072.safetensors", "h.7.mlp.dense_h_to_4h.bias": "model_00009-of-00072.safetensors", "h.7.mlp.dense_h_to_4h.weight": "model_00009-of-00072.safetensors", "h.7.post_attention_layernorm.bias": "model_00009-of-00072.safetensors", "h.7.post_attention_layernorm.weight": "model_00009-of-00072.safetensors", "h.7.self_attention.dense.bias": "model_00009-of-00072.safetensors", "h.7.self_attention.dense.weight": "model_00009-of-00072.safetensors", "h.7.self_attention.query_key_value.bias": "model_00009-of-00072.safetensors", "h.7.self_attention.query_key_value.weight": "model_00009-of-00072.safetensors", "h.8.input_layernorm.bias": "model_00010-of-00072.safetensors", "h.8.input_layernorm.weight": "model_00010-of-00072.safetensors", "h.8.mlp.dense_4h_to_h.bias": "model_00010-of-00072.safetensors", "h.8.mlp.dense_4h_to_h.weight": "model_00010-of-00072.safetensors", "h.8.mlp.dense_h_to_4h.bias": "model_00010-of-00072.safetensors", "h.8.mlp.dense_h_to_4h.weight": "model_00010-of-00072.safetensors", "h.8.post_attention_layernorm.bias": "model_00010-of-00072.safetensors", "h.8.post_attention_layernorm.weight": "model_00010-of-00072.safetensors", "h.8.self_attention.dense.bias": "model_00010-of-00072.safetensors", "h.8.self_attention.dense.weight": "model_00010-of-00072.safetensors", "h.8.self_attention.query_key_value.bias": "model_00010-of-00072.safetensors", "h.8.self_attention.query_key_value.weight": "model_00010-of-00072.safetensors", "h.9.input_layernorm.bias": "model_00011-of-00072.safetensors", "h.9.input_layernorm.weight": "model_00011-of-00072.safetensors", "h.9.mlp.dense_4h_to_h.bias": "model_00011-of-00072.safetensors", "h.9.mlp.dense_4h_to_h.weight": "model_00011-of-00072.safetensors", "h.9.mlp.dense_h_to_4h.bias": "model_00011-of-00072.safetensors", "h.9.mlp.dense_h_to_4h.weight": "model_00011-of-00072.safetensors", "h.9.post_attention_layernorm.bias": "model_00011-of-00072.safetensors", "h.9.post_attention_layernorm.weight": "model_00011-of-00072.safetensors", "h.9.self_attention.dense.bias": "model_00011-of-00072.safetensors", "h.9.self_attention.dense.weight": "model_00011-of-00072.safetensors", "h.9.self_attention.query_key_value.bias": "model_00011-of-00072.safetensors", "h.9.self_attention.query_key_value.weight": "model_00011-of-00072.safetensors", "ln_f.bias": "model_00072-of-00072.safetensors", "ln_f.weight": "model_00072-of-00072.safetensors", "word_embeddings.weight": "model_00001-of-00072.safetensors", "word_embeddings_layernorm.bias": "model_00001-of-00072.safetensors", "word_embeddings_layernorm.weight": "model_00001-of-00072.safetensors"}}
\ No newline at end of file
diff --git a/model_00001-of-00072.safetensors b/model_00001-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..bf1a7a32662d4ec7562f7fb78544942e84af517f
--- /dev/null
+++ b/model_00001-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e30d7fb951f7c48f201cc140057f8ae25c2d71bfb316549303211337ffe1dcab
+size 7193289054
diff --git a/model_00002-of-00072.safetensors b/model_00002-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..d39f17597479a9f2f1f39141b0c4b46cc4544467
--- /dev/null
+++ b/model_00002-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c1dade261e7004691ef59cd95e73ca05f3fc7169a3f81dd97e4be3205a639607
+size 4932875534
diff --git a/model_00003-of-00072.safetensors b/model_00003-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..1975481809dcba40458d303f507c26dbf442f764
--- /dev/null
+++ b/model_00003-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:77f39850dc70a101c3d04114de061230a5c4452ecd4c9a457ef537fae029760d
+size 4932875534
diff --git a/model_00004-of-00072.safetensors b/model_00004-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..2a18417cc3aba2903fd61717c7d8f7b28adf1bd3
--- /dev/null
+++ b/model_00004-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fd182bc08fab3d1b5f5972a7c03bcdb43ea23027e12045fe11cd152942226192
+size 4932875534
diff --git a/model_00005-of-00072.safetensors b/model_00005-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..cc7b3e3125e69da370f4a35b1ebbe238d35ea3ab
--- /dev/null
+++ b/model_00005-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b955b02d4f3470c2bbe37471323267ec48bc8d07bd63a7216020c08afe3da0b4
+size 4932875534
diff --git a/model_00006-of-00072.safetensors b/model_00006-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0a5385b00302f70d9b4f5c3aeadfac19f7c8905c
--- /dev/null
+++ b/model_00006-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:24a3eb73e75a703b6a8e56dbe0846219eadfcc8b836fc80d63b2989858314a6c
+size 4932875534
diff --git a/model_00007-of-00072.safetensors b/model_00007-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..4d0909856175ef1f686bd3d0f4358e9e3a039788
--- /dev/null
+++ b/model_00007-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:346e05c2b092ef24a23327b404dd1de955983309b1c1f256f8bf838a32dfc39e
+size 4932875534
diff --git a/model_00008-of-00072.safetensors b/model_00008-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..1aa59aec6f0378a9f56b1749778416b6293927af
--- /dev/null
+++ b/model_00008-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6f61d0887d223777afc750c9c15e4fefa615d8e343888a8b61702f441ea2e7a4
+size 4932875534
diff --git a/model_00009-of-00072.safetensors b/model_00009-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5051a50f807951f550afa9e25eb2f4f17060ad23
--- /dev/null
+++ b/model_00009-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:403e479af3fb1e7f9d549803e3c304adec15835d85ae5ed90e3d98e7c778db75
+size 4932875534
diff --git a/model_00010-of-00072.safetensors b/model_00010-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..684ae051610d76ffd5b8ae9cb7b0e85e7ebb6254
--- /dev/null
+++ b/model_00010-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1094b467f2b74bc8caa7adcd1fb6bbf209f47c5974dbdaadf14737dce3459ce6
+size 4932875534
diff --git a/model_00011-of-00072.safetensors b/model_00011-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ae97b47375b65b88afbd533c3ebf2fff55d47d35
--- /dev/null
+++ b/model_00011-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:06b89d1804d7b8ece3956d7ccf3f90613f3575f1cab658ee4eedbd12cedc08fc
+size 4932875534
diff --git a/model_00012-of-00072.safetensors b/model_00012-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..23c7713586086e4b50b672e9a317d650c3e3b7b2
--- /dev/null
+++ b/model_00012-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7f63d86cd387e2d0965161868a77201be50bc3ef6fdcbf011ab3497fdb102407
+size 4932875550
diff --git a/model_00013-of-00072.safetensors b/model_00013-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..768852690bede476caea0573a97ac5809bf3ab6b
--- /dev/null
+++ b/model_00013-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:400c454c7b3dc0314469aa9d68a2c54e9f780ee6cde5d4dfd43947b845591a80
+size 4932875550
diff --git a/model_00014-of-00072.safetensors b/model_00014-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..68615e085221e650ff18581b80f2bf0a16c7b65f
--- /dev/null
+++ b/model_00014-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fa2a34bef5ef9db83cd26ca968d77e0bc6e26c7ab0dca578923cb26dabcb4ea2
+size 4932875550
diff --git a/model_00015-of-00072.safetensors b/model_00015-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..517c8b1c32d041d73ac5b5083914c65b39fde925
--- /dev/null
+++ b/model_00015-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:de6920421bacb945ead13bfaa431cf24b26f4acef2b87fc250e5bc4753282d19
+size 4932875550
diff --git a/model_00016-of-00072.safetensors b/model_00016-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..49cb68f0afac849d870a195d4b7156b9c1cab6db
--- /dev/null
+++ b/model_00016-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:397b3496f7b73d232f4db9fe3acb0d66def5a7968c445843ae114c8bf4f04d82
+size 4932875550
diff --git a/model_00017-of-00072.safetensors b/model_00017-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5379add7367b752200174f97fa45641097d31e24
--- /dev/null
+++ b/model_00017-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6ff46db6a5bfe99c0cda9b5c595637a25ae2613278b11babe121e91be57cd73c
+size 4932875550
diff --git a/model_00018-of-00072.safetensors b/model_00018-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..2b99e0faa4dfd7b9dca965d4913d0ad0b3440467
--- /dev/null
+++ b/model_00018-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fd60d1189874aad85021243355a81dfef757e7fd03081b87a8f0a6e600d43a76
+size 4932875550
diff --git a/model_00019-of-00072.safetensors b/model_00019-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..1191c1e79c95ae33d448b7e7173f9c4c1d98d96e
--- /dev/null
+++ b/model_00019-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5bed55ceba0bbe65c138f920fb65bed307df88a57c807102f232e4a9c4ca9ed8
+size 4932875550
diff --git a/model_00020-of-00072.safetensors b/model_00020-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..4803675523b346975d96954b7ec9a18cc92313d2
--- /dev/null
+++ b/model_00020-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2d419eb379949cab4c4685ef5eed2a8ad7b49ffb77f3671175642fc3683b9cee
+size 4932875550
diff --git a/model_00021-of-00072.safetensors b/model_00021-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..82dc943d9e5621695c9014d6d679492fb32b3973
--- /dev/null
+++ b/model_00021-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ef5aa0fcf056c86f421e228434b62e40e6bd3cbc96c18844ba69db7590f4d934
+size 4932875550
diff --git a/model_00022-of-00072.safetensors b/model_00022-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..68b79ceb046fe55638157c3fc62b7507f43918c5
--- /dev/null
+++ b/model_00022-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6f54e50e576f21f299b952b312eac3476f3bc57231f53e2b4ab256a7395f1525
+size 4932875550
diff --git a/model_00023-of-00072.safetensors b/model_00023-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..724e3b16013c81227f359f6be53899938f1b03a4
--- /dev/null
+++ b/model_00023-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:54b57daa846238211b8adb64bc9e28a28c07e20ad503533b18f4d66ed2aa86f5
+size 4932875550
diff --git a/model_00024-of-00072.safetensors b/model_00024-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..23f15b4a118e4d88a2100ef0fb70409fd003d15c
--- /dev/null
+++ b/model_00024-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:56de07d68a73550b82f02ff42b4ba87e1cf35881ceab586e9759bff06c641e5c
+size 4932875550
diff --git a/model_00025-of-00072.safetensors b/model_00025-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f3c37b4f86256bf8241b013d2236d7c415f048c1
--- /dev/null
+++ b/model_00025-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e8aaa0e2f2d148cd2225cdfcffb71848faf27b4149c12a33637f175b0f6d9c15
+size 4932875550
diff --git a/model_00026-of-00072.safetensors b/model_00026-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..dc0e9411f381ad1e305b292b5e6e6fbcc6c001c9
--- /dev/null
+++ b/model_00026-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f58f844b32d792a2d67090193e9e3b4f00ff6ad084da63a3e37dd9819149f83e
+size 4932875550
diff --git a/model_00027-of-00072.safetensors b/model_00027-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5de9c0b4f565863ed11c83eede02afd486929890
--- /dev/null
+++ b/model_00027-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eaec09a779a8c1d1984fdf9121a66a47255eca358b030e11dbe616b36e8c7424
+size 4932875550
diff --git a/model_00028-of-00072.safetensors b/model_00028-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..38619fbeabb91e7f3ec5d8ef20c65abf7a3a34bc
--- /dev/null
+++ b/model_00028-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3620ea8d35be2c0695b01415e8a5103d879f13d8f937ddc8c99d95b90c7d1d86
+size 4932875550
diff --git a/model_00029-of-00072.safetensors b/model_00029-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..4eb6b2d2bb12c4658d24eef7bdd40cc22b3c943f
--- /dev/null
+++ b/model_00029-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c62e0cb19cd2ba0aaee5aa6bcd4bc54a1ea025f3e8e7f367e3f1011e9e9db24d
+size 4932875550
diff --git a/model_00030-of-00072.safetensors b/model_00030-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..472458e52e456e9f83fdb392a25be556dcc02686
--- /dev/null
+++ b/model_00030-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e9ab4ff64c2f10634a612c7bd229d3e57320303700744633f2e870ac5158f3ba
+size 4932875550
diff --git a/model_00031-of-00072.safetensors b/model_00031-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a90890528e56c0e51b1f1030e2330a8a2270c1e6
--- /dev/null
+++ b/model_00031-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f34185a577ae1079cf2adf61b93fc6294926e61bbdef2652ba77d358bfda9266
+size 4932875550
diff --git a/model_00032-of-00072.safetensors b/model_00032-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f0d9de2509c6dbb4309459d5cb1d70f393541d5e
--- /dev/null
+++ b/model_00032-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5106756852e637b27e4d3bf381e3d9a0c038015cfb1f5da3ac22c3685c9192f9
+size 4932875550
diff --git a/model_00033-of-00072.safetensors b/model_00033-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e4d737e4a6c821cc769f0d1f619ca1fc4768db9b
--- /dev/null
+++ b/model_00033-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ed9afd6c58e3ddef8107e11073f568fb0509fa9a4c8785fef43cd0824651c41c
+size 4932875550
diff --git a/model_00034-of-00072.safetensors b/model_00034-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..91557d133e4e9304200319cb1f5f20fadd33d903
--- /dev/null
+++ b/model_00034-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:19754650bafb3dae6e9b45e7abfbeb5ca34debf45f6c63c3b37348aaeb5f3fe6
+size 4932875550
diff --git a/model_00035-of-00072.safetensors b/model_00035-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..1a72e1bef810cdc809273dfe1a19a864254c33e8
--- /dev/null
+++ b/model_00035-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d50afb042b8a0b173190717d5ef18fc1d0f0aae32ad357bd583c6c6822c2f40a
+size 4932875550
diff --git a/model_00036-of-00072.safetensors b/model_00036-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ddc9534c0b1525018d39a4fb3fdcf3cc1ee01826
--- /dev/null
+++ b/model_00036-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:947d2a9b94a856c2f7e5cb691e9d29170e6a3884edaa6b03cc5004274551fb30
+size 4932875550
diff --git a/model_00037-of-00072.safetensors b/model_00037-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..1b03b059d69e256d884ff08495b3b2b829eb0133
--- /dev/null
+++ b/model_00037-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:62cfec0eb6ac041c5f31a578757b73d8b4448b4a7aa60ad642f870b36ff0718a
+size 4932875550
diff --git a/model_00038-of-00072.safetensors b/model_00038-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f3e16b55a91c65b1344d428706132dcd1e2bcd84
--- /dev/null
+++ b/model_00038-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bc2a5b916cad450ea2a9a45f621387fbd3a8bdc365ddf117e225482c47b7537f
+size 4932875550
diff --git a/model_00039-of-00072.safetensors b/model_00039-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..29b4e19bd98ad703483718fc0b642f003c4f905e
--- /dev/null
+++ b/model_00039-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d9d87b0681c7dbdaeafe1a245f90ae0ee8b5452ce55cd6b74c8cf765075e2634
+size 4932875550
diff --git a/model_00040-of-00072.safetensors b/model_00040-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7830ed63ab5ac88fc3822faac4b02cea0da3f64d
--- /dev/null
+++ b/model_00040-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b428a6c9ee2bc4d9db55f8408f2e2fcb3bba35c887105088a1a0c58c059a875e
+size 4932875550
diff --git a/model_00041-of-00072.safetensors b/model_00041-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..34438996df41a1e682e064b70c1ae2b81100cd35
--- /dev/null
+++ b/model_00041-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e63a5ecea5b30a2085bd0e6c41e19c8011b267312800d03b953fb5a09cc5e598
+size 4932875550
diff --git a/model_00042-of-00072.safetensors b/model_00042-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f784fb90dcda2bb01fba69256f32785478d5af9d
--- /dev/null
+++ b/model_00042-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4406387ab67c44a857f2c6127d57ebc4c0e5e2a19365e72f6c83026c505d25fe
+size 4932875550
diff --git a/model_00043-of-00072.safetensors b/model_00043-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..9ed2fdfa5e94fdeb7bb2a6599566f19f8b687159
--- /dev/null
+++ b/model_00043-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:77ddbfff64d58f5b8f99558e25774a555037b7017a3c421cd7b7ff5689391ca7
+size 4932875550
diff --git a/model_00044-of-00072.safetensors b/model_00044-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..1986dad95e6a18380f4053defbd60753373c0990
--- /dev/null
+++ b/model_00044-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cf0b7898788424988074ceb40d17d5f39b038ac3817eff3f39350e6d74e62b06
+size 4932875550
diff --git a/model_00045-of-00072.safetensors b/model_00045-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c5b9265ba394721a9b6e5035623587d4e771683d
--- /dev/null
+++ b/model_00045-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e965108f635a7e813fcc7e9196e748f70049f53289869b6d48770c4d779226eb
+size 4932875550
diff --git a/model_00046-of-00072.safetensors b/model_00046-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c63fabc6944b74cca772d1d19cc9d52c91d43ec4
--- /dev/null
+++ b/model_00046-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c752ef46110a95957de506e7569be24ec5f90d833f61705e20764687bf8fb258
+size 4932875550
diff --git a/model_00047-of-00072.safetensors b/model_00047-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b62c1e3035d813ac05cd0b06312fc3f65cf51d99
--- /dev/null
+++ b/model_00047-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2516bb84281f64429e13b31947308e6a16aba5130ca2dc20a7880a3decd6f084
+size 4932875550
diff --git a/model_00048-of-00072.safetensors b/model_00048-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ca8d44716a488db6792e130fc3cfcbd31fd40c5e
--- /dev/null
+++ b/model_00048-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:57e759b81e347209a468d37577194b2af9a45ac3093f8717a658df121d136093
+size 4932875550
diff --git a/model_00049-of-00072.safetensors b/model_00049-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..bcacdbec6cf3d48f84ec00a00b6933e94dbde5db
--- /dev/null
+++ b/model_00049-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1607bae51d277fbff087d75eaa4901facfaf157cff44e3cceb00cb01d2d44cc8
+size 4932875550
diff --git a/model_00050-of-00072.safetensors b/model_00050-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5bdc4764793dd61b210d09013feafd42774b4068
--- /dev/null
+++ b/model_00050-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a5b387bb8c7431e87421842a8dd0b288df537d45b3774be00b6972cc804f5eb5
+size 4932875550
diff --git a/model_00051-of-00072.safetensors b/model_00051-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..25da63a34d6493fd5347e4dc2f70a7abcb4bf66e
--- /dev/null
+++ b/model_00051-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7eb036ba885acca56b043a00f732acebd463678857c93104bc468aad6529d336
+size 4932875550
diff --git a/model_00052-of-00072.safetensors b/model_00052-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..9d04c8c227b43a570483aea79072dc2feb113109
--- /dev/null
+++ b/model_00052-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c8ff64a328da142f1187c700afaafe109c9609d15e72a6095f5e1a091f019988
+size 4932875550
diff --git a/model_00053-of-00072.safetensors b/model_00053-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..153e208c6a275c824128d0c147fc59f39b9df609
--- /dev/null
+++ b/model_00053-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:558acf44a11e7629f595a1075e75c55cc6fb6ff43bf0ead566e4440654d68e6b
+size 4932875550
diff --git a/model_00054-of-00072.safetensors b/model_00054-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b4377fb4b7537b04bc7cf9b64c2cc15547a8d265
--- /dev/null
+++ b/model_00054-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e72ac9c116a6013ea2fcce932cb4c00c64b31f40d795177928515947e5a1abc8
+size 4932875550
diff --git a/model_00055-of-00072.safetensors b/model_00055-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b7806aa1db1c568b02cb06d19c14060d45fc178f
--- /dev/null
+++ b/model_00055-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bcfe8c127dd63c30c2813cf4955699b149a683ad260d9ffafeaf492f1ebf99ac
+size 4932875550
diff --git a/model_00056-of-00072.safetensors b/model_00056-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..46208c12b0be054945f866a49708a9fca6d9690d
--- /dev/null
+++ b/model_00056-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:72439ea1105e8a8d8bfe368ef3831bfaa67635adcfac83c5a4c6246820298d5d
+size 4932875550
diff --git a/model_00057-of-00072.safetensors b/model_00057-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..4b5f951d644e5e7f749dbb3fe3a6d0c4e6cf91e5
--- /dev/null
+++ b/model_00057-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5867f52208204cb813baae08cb7661444f1a135cb0e0303d5cab0b7bbda0ce4c
+size 4932875550
diff --git a/model_00058-of-00072.safetensors b/model_00058-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..2aa502206e331b6ad618f8b023b7677564ca7384
--- /dev/null
+++ b/model_00058-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a4e7373f4af2267647b8c4effdf0404974cee003b0bbbadf652fed8b6f1e6663
+size 4932875550
diff --git a/model_00059-of-00072.safetensors b/model_00059-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a1ab12c75934fba8eea5e6de19d24f60f24bca5a
--- /dev/null
+++ b/model_00059-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2d6961761a7a1dacf70f9cc8eb9e110caedeb5194d517103bfe8c18a879c768a
+size 4932875550
diff --git a/model_00060-of-00072.safetensors b/model_00060-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7636c23d53a69394c3a5c1698eb7b2924b599549
--- /dev/null
+++ b/model_00060-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:021ed9ec2b7d656f18d461d87f0362e9d87c1a610b8ad6c760a8dffc4ddb0017
+size 4932875550
diff --git a/model_00061-of-00072.safetensors b/model_00061-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..2912ccc7f572660b084557834b6d0f537c6e3bc5
--- /dev/null
+++ b/model_00061-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:28a4b0a41a66245694781aab857a9f081375ae851e00f50cd9dc6329fcbd9281
+size 4932875550
diff --git a/model_00062-of-00072.safetensors b/model_00062-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..bc1960f6449b19d0c893b73f47b1a460735e1332
--- /dev/null
+++ b/model_00062-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4de0aab9e437e7333b049cbf05c858ae6b5b81aeb72f9e7e05f3a16052d5d124
+size 4932875550
diff --git a/model_00063-of-00072.safetensors b/model_00063-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5f51637f13b5a762defee01ced8cae8737b6dd97
--- /dev/null
+++ b/model_00063-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2816e51e09a1f6e9a118e16f85393fa056aad66723900439d6ab4bc4770ea020
+size 4932875550
diff --git a/model_00064-of-00072.safetensors b/model_00064-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ef6ca5d7a2a71e373a4668084fcd79a4bfe9dffa
--- /dev/null
+++ b/model_00064-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:102218101c1892281c16c65a0dea766832fcc3bc9889ef5d9175cc43dba6f011
+size 4932875550
diff --git a/model_00065-of-00072.safetensors b/model_00065-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..08f4973358594011d6b54c67e7067899f916bbf9
--- /dev/null
+++ b/model_00065-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:de58ac852f1cab9af09abeda7981c9e3c6fbb62f9e059633871d31ba589fad27
+size 4932875550
diff --git a/model_00066-of-00072.safetensors b/model_00066-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f6f321345bd0acac0e4b63455070f271934063f1
--- /dev/null
+++ b/model_00066-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6e136199b7e1746f1309f84d8a680d3956d7af28796f99e5014e88c23a886b2a
+size 4932875550
diff --git a/model_00067-of-00072.safetensors b/model_00067-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..1f5ccc98020e958c83f0c2327185ced67a2bbe84
--- /dev/null
+++ b/model_00067-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c6e001f71eba6f7e929691d8b5bda491a496c4e867ca9d7a6de92c05c2c8038d
+size 4932875550
diff --git a/model_00068-of-00072.safetensors b/model_00068-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8d18ae257cbc963f87fe7d09bc329faa31ef38a0
--- /dev/null
+++ b/model_00068-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:085df5d7de1240f48303ded13f656a7447f2638792b0e97c07a69862f309211e
+size 4932875550
diff --git a/model_00069-of-00072.safetensors b/model_00069-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5b150385e254a02e1719bede8a69a21b3e1f653c
--- /dev/null
+++ b/model_00069-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:136b14e8abc5e917b7cba631d9f53320c6b016eae12e7db302407e15550cb64f
+size 4932875550
diff --git a/model_00070-of-00072.safetensors b/model_00070-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..16f4342bf7ea76a349e5c877fdc246b6b0e3f3e9
--- /dev/null
+++ b/model_00070-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1e4974c4a536b1889cad95c4b5f9a7aadad5f19361ea51acafab8c34c5861005
+size 4932875550
diff --git a/model_00071-of-00072.safetensors b/model_00071-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f7892ded6933ab06c309c2e49a095e429b398350
--- /dev/null
+++ b/model_00071-of-00072.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c6759c9f72843cd2d7d34e2ae606c40c91f717e2705de0512df4ac2a02067a6e
+size 4932875550
diff --git a/model_00072-of-00072.safetensors b/model_00072-of-00072.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8b1f848bf353bfc00da69042f1b2b2ff37918908
Binary files /dev/null and b/model_00072-of-00072.safetensors differ