Training in progress, step 8500, checkpoint
Browse files- last-checkpoint/global_step8500/mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step8500/zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step8500/zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step8500/zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step8500/zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step8500/zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step8500/zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/model.safetensors +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +2 -2
- last-checkpoint/rng_state_4.pth +2 -2
- last-checkpoint/rng_state_5.pth +2 -2
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +73 -3
last-checkpoint/global_step8500/mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7b00bfac3db53b521d614ce880e37feda5c474c7fba505a80a6d87d89fa371d6
|
3 |
+
size 197282509
|
last-checkpoint/global_step8500/zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:91cd20ad8ecf66c964fee0c8463fbedecf792a1a57c7fdec1d0fad4e4e1d8b35
|
3 |
+
size 180416968
|
last-checkpoint/global_step8500/zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b04765786097e4990a2170914e6f9d7c27447c8ec041bca13a538f7ff9b0eecc
|
3 |
+
size 180416776
|
last-checkpoint/global_step8500/zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:549824e4bab052048978b41ec5aa592913f15740cf737e38321c0b486d0527a9
|
3 |
+
size 180416776
|
last-checkpoint/global_step8500/zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7b82f439bd80358859433ccfec8b6f84a61360667568e37f9ff58752b32be9b5
|
3 |
+
size 180416904
|
last-checkpoint/global_step8500/zero_pp_rank_4_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9b8e2d0d67801717d7ec7333d951d1911fdc8344f8fa1ce8a7c4fd95796ec354
|
3 |
+
size 180416712
|
last-checkpoint/global_step8500/zero_pp_rank_5_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a29df9d086627bd5788f2c85fc4c8e2f1f7f432ae7fe8b1242018731c04f2930
|
3 |
+
size 180417096
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step8500
|
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 188836816
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d7d95a2a6398a3976684f9f296988a5e39dde3afb33e0d969993bcbd7dcacaca
|
3 |
size 188836816
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15536
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:564a5d159c6784b209d5ba2261c4de8c25ee8803b9c92f51fd1e9fd52c1b3fc4
|
3 |
size 15536
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15536
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4cbf33bcff4e9bc7aead0f12ef6877a895f2040493ae419349b5a3b110b5036e
|
3 |
size 15536
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15536
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3ce0b2876ec405f01219dce5c2551d896b5f10a920f11726ba040105793eaabb
|
3 |
size 15536
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:59a83285a5f0517959619b1f7f7827cf51e67685851a9aed6f10647402c51355
|
3 |
+
size 15472
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d45bda445d230c33252857d7ba14a68c6656232ceb63109a76eba6e8e0d43306
|
3 |
+
size 15472
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:976477c91777116536dc7d0dffbe92366832796b5f9e9ba3c59be4799747b19a
|
3 |
+
size 15536
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1256
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b71db0e061be5d6991df2e521349de0fd5a5ff99a4bfc1c920bc3894a6254c9f
|
3 |
size 1256
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
"eval_steps": 1500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -1163,6 +1163,76 @@
|
|
1163 |
"learning_rate": 0.0001,
|
1164 |
"loss": 0.022,
|
1165 |
"step": 8000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1166 |
}
|
1167 |
],
|
1168 |
"logging_steps": 50,
|
@@ -1182,7 +1252,7 @@
|
|
1182 |
"attributes": {}
|
1183 |
}
|
1184 |
},
|
1185 |
-
"total_flos": 2.
|
1186 |
"train_batch_size": 64,
|
1187 |
"trial_name": null,
|
1188 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 229.72972972972974,
|
5 |
"eval_steps": 1500,
|
6 |
+
"global_step": 8500,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
1163 |
"learning_rate": 0.0001,
|
1164 |
"loss": 0.022,
|
1165 |
"step": 8000
|
1166 |
+
},
|
1167 |
+
{
|
1168 |
+
"epoch": 217.56756756756758,
|
1169 |
+
"grad_norm": 0.38513001799583435,
|
1170 |
+
"learning_rate": 0.0001,
|
1171 |
+
"loss": 0.0215,
|
1172 |
+
"step": 8050
|
1173 |
+
},
|
1174 |
+
{
|
1175 |
+
"epoch": 218.9189189189189,
|
1176 |
+
"grad_norm": 0.400036484003067,
|
1177 |
+
"learning_rate": 0.0001,
|
1178 |
+
"loss": 0.021,
|
1179 |
+
"step": 8100
|
1180 |
+
},
|
1181 |
+
{
|
1182 |
+
"epoch": 220.27027027027026,
|
1183 |
+
"grad_norm": 0.3203113377094269,
|
1184 |
+
"learning_rate": 0.0001,
|
1185 |
+
"loss": 0.0207,
|
1186 |
+
"step": 8150
|
1187 |
+
},
|
1188 |
+
{
|
1189 |
+
"epoch": 221.6216216216216,
|
1190 |
+
"grad_norm": 0.3765117824077606,
|
1191 |
+
"learning_rate": 0.0001,
|
1192 |
+
"loss": 0.0197,
|
1193 |
+
"step": 8200
|
1194 |
+
},
|
1195 |
+
{
|
1196 |
+
"epoch": 222.97297297297297,
|
1197 |
+
"grad_norm": 0.3336365222930908,
|
1198 |
+
"learning_rate": 0.0001,
|
1199 |
+
"loss": 0.0211,
|
1200 |
+
"step": 8250
|
1201 |
+
},
|
1202 |
+
{
|
1203 |
+
"epoch": 224.32432432432432,
|
1204 |
+
"grad_norm": 0.29828354716300964,
|
1205 |
+
"learning_rate": 0.0001,
|
1206 |
+
"loss": 0.0188,
|
1207 |
+
"step": 8300
|
1208 |
+
},
|
1209 |
+
{
|
1210 |
+
"epoch": 225.67567567567568,
|
1211 |
+
"grad_norm": 0.34553930163383484,
|
1212 |
+
"learning_rate": 0.0001,
|
1213 |
+
"loss": 0.0199,
|
1214 |
+
"step": 8350
|
1215 |
+
},
|
1216 |
+
{
|
1217 |
+
"epoch": 227.02702702702703,
|
1218 |
+
"grad_norm": 0.3510328531265259,
|
1219 |
+
"learning_rate": 0.0001,
|
1220 |
+
"loss": 0.0215,
|
1221 |
+
"step": 8400
|
1222 |
+
},
|
1223 |
+
{
|
1224 |
+
"epoch": 228.3783783783784,
|
1225 |
+
"grad_norm": 0.48810675740242004,
|
1226 |
+
"learning_rate": 0.0001,
|
1227 |
+
"loss": 0.0217,
|
1228 |
+
"step": 8450
|
1229 |
+
},
|
1230 |
+
{
|
1231 |
+
"epoch": 229.72972972972974,
|
1232 |
+
"grad_norm": 0.34023284912109375,
|
1233 |
+
"learning_rate": 0.0001,
|
1234 |
+
"loss": 0.0225,
|
1235 |
+
"step": 8500
|
1236 |
}
|
1237 |
],
|
1238 |
"logging_steps": 50,
|
|
|
1252 |
"attributes": {}
|
1253 |
}
|
1254 |
},
|
1255 |
+
"total_flos": 2.3861586914904637e+20,
|
1256 |
"train_batch_size": 64,
|
1257 |
"trial_name": null,
|
1258 |
"trial_params": null
|