Nadav commited on
Commit
4fb48eb
·
1 Parent(s): 76e772e

Training in progress, step 95000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b0288be0c1235e777c27dd9047a36eedfe264bb488a4c0e0bed0c34a7672a27
3
  size 893439185
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b573222d9b319a3d487c0c578817b5195c7686b855d96946d89e10146ac2e0d
3
  size 893439185
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d7eecde17cb6e9aa8cd7b4552f91801dbea59bd9f43564420f17d5094be8ecd0
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:066b9aec11086e6d754392502c67c132273bd9e8c2079983d1d403167bb1695a
3
  size 449471589
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a05d03c073f727785714559ddd963d68941172e2660ea09c2fa916687be15d30
3
  size 15587
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4f99dc9e42edd3c1d095b5820fe5dc8c8ab1d01c4fa0832268eb95913430929
3
  size 15587
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2a253357a0f0cc613ebf9506b97610f90339ebe2be5f931702becbaf171e782d
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f42287d0188e0ca7518e7347c2cbcdfa5474b9d7f4514b250faba8c2dce24e04
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8c584c5bb4a0182ed3950a23caf01b869cbb053591dee4ffba4c62961e6b03ec
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:529d4dd93aa83bf4a63eacb44b8ee831aacefd449fd87e79fd39cb07ed7f8418
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 7.835625979453248,
5
- "global_step": 90000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1230,11 +1230,79 @@
1230
  "eval_samples_per_second": 302.358,
1231
  "eval_steps_per_second": 4.777,
1232
  "step": 90000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1233
  }
1234
  ],
1235
  "max_steps": 100000,
1236
  "num_train_epochs": 9,
1237
- "total_flos": 4.238744869891214e+21,
1238
  "trial_name": null,
1239
  "trial_params": null
1240
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 8.270938533867318,
5
+ "global_step": 95000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1230
  "eval_samples_per_second": 302.358,
1231
  "eval_steps_per_second": 4.777,
1232
  "step": 90000
1233
+ },
1234
+ {
1235
+ "epoch": 7.88,
1236
+ "learning_rate": 1.2020863570515961e-05,
1237
+ "loss": 0.4009,
1238
+ "step": 90500
1239
+ },
1240
+ {
1241
+ "epoch": 7.92,
1242
+ "learning_rate": 1.1815845782614282e-05,
1243
+ "loss": 0.4028,
1244
+ "step": 91000
1245
+ },
1246
+ {
1247
+ "epoch": 7.97,
1248
+ "learning_rate": 1.162157941946108e-05,
1249
+ "loss": 0.4015,
1250
+ "step": 91500
1251
+ },
1252
+ {
1253
+ "epoch": 8.01,
1254
+ "learning_rate": 1.1438112413374588e-05,
1255
+ "loss": 0.4016,
1256
+ "step": 92000
1257
+ },
1258
+ {
1259
+ "epoch": 8.05,
1260
+ "learning_rate": 1.1265490032093274e-05,
1261
+ "loss": 0.4015,
1262
+ "step": 92500
1263
+ },
1264
+ {
1265
+ "epoch": 8.1,
1266
+ "learning_rate": 1.1103754867606732e-05,
1267
+ "loss": 0.4009,
1268
+ "step": 93000
1269
+ },
1270
+ {
1271
+ "epoch": 8.14,
1272
+ "learning_rate": 1.0953237511280449e-05,
1273
+ "loss": 0.4,
1274
+ "step": 93500
1275
+ },
1276
+ {
1277
+ "epoch": 8.18,
1278
+ "learning_rate": 1.0813640603098685e-05,
1279
+ "loss": 0.4012,
1280
+ "step": 94000
1281
+ },
1282
+ {
1283
+ "epoch": 8.23,
1284
+ "learning_rate": 1.0684751669305436e-05,
1285
+ "loss": 0.4014,
1286
+ "step": 94500
1287
+ },
1288
+ {
1289
+ "epoch": 8.27,
1290
+ "learning_rate": 1.0566893240808188e-05,
1291
+ "loss": 0.3998,
1292
+ "step": 95000
1293
+ },
1294
+ {
1295
+ "epoch": 8.27,
1296
+ "eval_loss": 0.38168150186538696,
1297
+ "eval_runtime": 17.1028,
1298
+ "eval_samples_per_second": 292.35,
1299
+ "eval_steps_per_second": 4.619,
1300
+ "step": 95000
1301
  }
1302
  ],
1303
  "max_steps": 100000,
1304
  "num_train_epochs": 9,
1305
+ "total_flos": 4.474219316873905e+21,
1306
  "trial_name": null,
1307
  "trial_params": null
1308
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d7eecde17cb6e9aa8cd7b4552f91801dbea59bd9f43564420f17d5094be8ecd0
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:066b9aec11086e6d754392502c67c132273bd9e8c2079983d1d403167bb1695a
3
  size 449471589