Training in progress, step 95000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +71 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 893439185
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1b573222d9b319a3d487c0c578817b5195c7686b855d96946d89e10146ac2e0d
|
3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 449471589
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:066b9aec11086e6d754392502c67c132273bd9e8c2079983d1d403167bb1695a
|
3 |
size 449471589
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15587
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a4f99dc9e42edd3c1d095b5820fe5dc8c8ab1d01c4fa0832268eb95913430929
|
3 |
size 15587
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f42287d0188e0ca7518e7347c2cbcdfa5474b9d7f4514b250faba8c2dce24e04
|
3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:529d4dd93aa83bf4a63eacb44b8ee831aacefd449fd87e79fd39cb07ed7f8418
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -1230,11 +1230,79 @@
|
|
1230 |
"eval_samples_per_second": 302.358,
|
1231 |
"eval_steps_per_second": 4.777,
|
1232 |
"step": 90000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1233 |
}
|
1234 |
],
|
1235 |
"max_steps": 100000,
|
1236 |
"num_train_epochs": 9,
|
1237 |
-
"total_flos": 4.
|
1238 |
"trial_name": null,
|
1239 |
"trial_params": null
|
1240 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 8.270938533867318,
|
5 |
+
"global_step": 95000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
1230 |
"eval_samples_per_second": 302.358,
|
1231 |
"eval_steps_per_second": 4.777,
|
1232 |
"step": 90000
|
1233 |
+
},
|
1234 |
+
{
|
1235 |
+
"epoch": 7.88,
|
1236 |
+
"learning_rate": 1.2020863570515961e-05,
|
1237 |
+
"loss": 0.4009,
|
1238 |
+
"step": 90500
|
1239 |
+
},
|
1240 |
+
{
|
1241 |
+
"epoch": 7.92,
|
1242 |
+
"learning_rate": 1.1815845782614282e-05,
|
1243 |
+
"loss": 0.4028,
|
1244 |
+
"step": 91000
|
1245 |
+
},
|
1246 |
+
{
|
1247 |
+
"epoch": 7.97,
|
1248 |
+
"learning_rate": 1.162157941946108e-05,
|
1249 |
+
"loss": 0.4015,
|
1250 |
+
"step": 91500
|
1251 |
+
},
|
1252 |
+
{
|
1253 |
+
"epoch": 8.01,
|
1254 |
+
"learning_rate": 1.1438112413374588e-05,
|
1255 |
+
"loss": 0.4016,
|
1256 |
+
"step": 92000
|
1257 |
+
},
|
1258 |
+
{
|
1259 |
+
"epoch": 8.05,
|
1260 |
+
"learning_rate": 1.1265490032093274e-05,
|
1261 |
+
"loss": 0.4015,
|
1262 |
+
"step": 92500
|
1263 |
+
},
|
1264 |
+
{
|
1265 |
+
"epoch": 8.1,
|
1266 |
+
"learning_rate": 1.1103754867606732e-05,
|
1267 |
+
"loss": 0.4009,
|
1268 |
+
"step": 93000
|
1269 |
+
},
|
1270 |
+
{
|
1271 |
+
"epoch": 8.14,
|
1272 |
+
"learning_rate": 1.0953237511280449e-05,
|
1273 |
+
"loss": 0.4,
|
1274 |
+
"step": 93500
|
1275 |
+
},
|
1276 |
+
{
|
1277 |
+
"epoch": 8.18,
|
1278 |
+
"learning_rate": 1.0813640603098685e-05,
|
1279 |
+
"loss": 0.4012,
|
1280 |
+
"step": 94000
|
1281 |
+
},
|
1282 |
+
{
|
1283 |
+
"epoch": 8.23,
|
1284 |
+
"learning_rate": 1.0684751669305436e-05,
|
1285 |
+
"loss": 0.4014,
|
1286 |
+
"step": 94500
|
1287 |
+
},
|
1288 |
+
{
|
1289 |
+
"epoch": 8.27,
|
1290 |
+
"learning_rate": 1.0566893240808188e-05,
|
1291 |
+
"loss": 0.3998,
|
1292 |
+
"step": 95000
|
1293 |
+
},
|
1294 |
+
{
|
1295 |
+
"epoch": 8.27,
|
1296 |
+
"eval_loss": 0.38168150186538696,
|
1297 |
+
"eval_runtime": 17.1028,
|
1298 |
+
"eval_samples_per_second": 292.35,
|
1299 |
+
"eval_steps_per_second": 4.619,
|
1300 |
+
"step": 95000
|
1301 |
}
|
1302 |
],
|
1303 |
"max_steps": 100000,
|
1304 |
"num_train_epochs": 9,
|
1305 |
+
"total_flos": 4.474219316873905e+21,
|
1306 |
"trial_name": null,
|
1307 |
"trial_params": null
|
1308 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 449471589
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:066b9aec11086e6d754392502c67c132273bd9e8c2079983d1d403167bb1695a
|
3 |
size 449471589
|