Nadav commited on
Commit
9d18631
·
1 Parent(s): 4fb48eb

Training in progress, step 100000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b573222d9b319a3d487c0c578817b5195c7686b855d96946d89e10146ac2e0d
3
  size 893439185
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c92252b3cd9dd7d41fd1e3e6f9555d8d595a367dcb3a12f334d23aa6d7f4be3a
3
  size 893439185
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:066b9aec11086e6d754392502c67c132273bd9e8c2079983d1d403167bb1695a
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9629991174b98bdc9c2004c23d8143dcb55e825714893b4fe878d212e3404d14
3
  size 449471589
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a4f99dc9e42edd3c1d095b5820fe5dc8c8ab1d01c4fa0832268eb95913430929
3
  size 15587
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:523db574847f79a66ca0c3ee491e7cb5f8ccda9a30ceceeeeb9a387933819100
3
  size 15587
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f42287d0188e0ca7518e7347c2cbcdfa5474b9d7f4514b250faba8c2dce24e04
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:487b0fc983d2aea61209ee5a057485c5c06df1d2510fa4762ccbc37b48f414e6
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:529d4dd93aa83bf4a63eacb44b8ee831aacefd449fd87e79fd39cb07ed7f8418
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b9d090099ba96f6dced98144320a4b9bf47460bb589afeb514879b05a2d09ba
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 8.270938533867318,
5
- "global_step": 95000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1298,11 +1298,79 @@
1298
  "eval_samples_per_second": 292.35,
1299
  "eval_steps_per_second": 4.619,
1300
  "step": 95000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1301
  }
1302
  ],
1303
  "max_steps": 100000,
1304
  "num_train_epochs": 9,
1305
- "total_flos": 4.474219316873905e+21,
1306
  "trial_name": null,
1307
  "trial_params": null
1308
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 8.706251088281386,
5
+ "global_step": 100000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1298
  "eval_samples_per_second": 292.35,
1299
  "eval_steps_per_second": 4.619,
1300
  "step": 95000
1301
+ },
1302
+ {
1303
+ "epoch": 8.31,
1304
+ "learning_rate": 1.0460094397410629e-05,
1305
+ "loss": 0.4004,
1306
+ "step": 95500
1307
+ },
1308
+ {
1309
+ "epoch": 8.36,
1310
+ "learning_rate": 1.0364381490129294e-05,
1311
+ "loss": 0.4006,
1312
+ "step": 96000
1313
+ },
1314
+ {
1315
+ "epoch": 8.4,
1316
+ "learning_rate": 1.0279778134691878e-05,
1317
+ "loss": 0.4007,
1318
+ "step": 96500
1319
+ },
1320
+ {
1321
+ "epoch": 8.45,
1322
+ "learning_rate": 1.0206441030895691e-05,
1323
+ "loss": 0.4001,
1324
+ "step": 97000
1325
+ },
1326
+ {
1327
+ "epoch": 8.49,
1328
+ "learning_rate": 1.0144094343341912e-05,
1329
+ "loss": 0.4005,
1330
+ "step": 97500
1331
+ },
1332
+ {
1333
+ "epoch": 8.53,
1334
+ "learning_rate": 1.0092911560189286e-05,
1335
+ "loss": 0.3995,
1336
+ "step": 98000
1337
+ },
1338
+ {
1339
+ "epoch": 8.58,
1340
+ "learning_rate": 1.0052905310023697e-05,
1341
+ "loss": 0.401,
1342
+ "step": 98500
1343
+ },
1344
+ {
1345
+ "epoch": 8.62,
1346
+ "learning_rate": 1.0024085463788748e-05,
1347
+ "loss": 0.4002,
1348
+ "step": 99000
1349
+ },
1350
+ {
1351
+ "epoch": 8.66,
1352
+ "learning_rate": 1.0006483210534733e-05,
1353
+ "loss": 0.4,
1354
+ "step": 99500
1355
+ },
1356
+ {
1357
+ "epoch": 8.71,
1358
+ "learning_rate": 1.0000032344694547e-05,
1359
+ "loss": 0.3999,
1360
+ "step": 100000
1361
+ },
1362
+ {
1363
+ "epoch": 8.71,
1364
+ "eval_loss": 0.3816235363483429,
1365
+ "eval_runtime": 41.2453,
1366
+ "eval_samples_per_second": 121.226,
1367
+ "eval_steps_per_second": 1.915,
1368
+ "step": 100000
1369
  }
1370
  ],
1371
  "max_steps": 100000,
1372
  "num_train_epochs": 9,
1373
+ "total_flos": 4.7097123842386664e+21,
1374
  "trial_name": null,
1375
  "trial_params": null
1376
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:066b9aec11086e6d754392502c67c132273bd9e8c2079983d1d403167bb1695a
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9629991174b98bdc9c2004c23d8143dcb55e825714893b4fe878d212e3404d14
3
  size 449471589