plip commited on
Commit
92dbca7
·
1 Parent(s): d5ca745

Training in progress, step 70000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:437bdcfa884f243eb057db287fb27ac37ef2a0326b1d025d8ffcaaa891bad6ea
3
- size 50044241
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c11a259e2fcb4db9bb25889155e0b96cd678f633b923947bdf13de2be4bf3ae6
3
+ size 50044689
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8d066230793a00e463dada238cbf68fe9a6e4506761309bbc7272fd63bd501d9
3
  size 25761253
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ca06a94bd9a048e9a1048edd363d5f15c12bf594452e56a2cb99b829fdbf580
3
  size 25761253
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4b53ff89d37c2976f0dbf329b38e7e3851775fcd8b4f21d8e3166437bbe7a48e
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43dbf8e062e9d4eda243137e0b414d7704f82717578f44d0ec7e823055c550c1
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4b53ff89d37c2976f0dbf329b38e7e3851775fcd8b4f21d8e3166437bbe7a48e
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43dbf8e062e9d4eda243137e0b414d7704f82717578f44d0ec7e823055c550c1
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4b53ff89d37c2976f0dbf329b38e7e3851775fcd8b4f21d8e3166437bbe7a48e
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43dbf8e062e9d4eda243137e0b414d7704f82717578f44d0ec7e823055c550c1
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4b53ff89d37c2976f0dbf329b38e7e3851775fcd8b4f21d8e3166437bbe7a48e
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43dbf8e062e9d4eda243137e0b414d7704f82717578f44d0ec7e823055c550c1
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4b53ff89d37c2976f0dbf329b38e7e3851775fcd8b4f21d8e3166437bbe7a48e
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43dbf8e062e9d4eda243137e0b414d7704f82717578f44d0ec7e823055c550c1
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4b53ff89d37c2976f0dbf329b38e7e3851775fcd8b4f21d8e3166437bbe7a48e
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43dbf8e062e9d4eda243137e0b414d7704f82717578f44d0ec7e823055c550c1
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4b53ff89d37c2976f0dbf329b38e7e3851775fcd8b4f21d8e3166437bbe7a48e
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43dbf8e062e9d4eda243137e0b414d7704f82717578f44d0ec7e823055c550c1
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4b53ff89d37c2976f0dbf329b38e7e3851775fcd8b4f21d8e3166437bbe7a48e
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43dbf8e062e9d4eda243137e0b414d7704f82717578f44d0ec7e823055c550c1
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a14cae2e3d67695f0ffa4604dbaaa3f7147d70607921bb89c9afa09d57c87a5
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d89de13f8c844c8306762a31aff6dac5ffaadd95c6501762d83dc7939ed9eace
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 3.6764705882352944,
5
- "global_step": 60000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1206,11 +1206,211 @@
1206
  "eval_samples_per_second": 755.286,
1207
  "eval_steps_per_second": 12.085,
1208
  "step": 60000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1209
  }
1210
  ],
1211
  "max_steps": 250000,
1212
  "num_train_epochs": 16,
1213
- "total_flos": 9.609779856839792e+20,
1214
  "trial_name": null,
1215
  "trial_params": null
1216
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 4.28921568627451,
5
+ "global_step": 70000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1206
  "eval_samples_per_second": 755.286,
1207
  "eval_steps_per_second": 12.085,
1208
  "step": 60000
1209
+ },
1210
+ {
1211
+ "epoch": 3.71,
1212
+ "learning_rate": 0.0005425079773152721,
1213
+ "loss": 0.5113,
1214
+ "step": 60500
1215
+ },
1216
+ {
1217
+ "epoch": 3.74,
1218
+ "learning_rate": 0.0005413455519215879,
1219
+ "loss": 0.5105,
1220
+ "step": 61000
1221
+ },
1222
+ {
1223
+ "epoch": 3.74,
1224
+ "eval_loss": 0.8522316813468933,
1225
+ "eval_runtime": 1.2934,
1226
+ "eval_samples_per_second": 773.185,
1227
+ "eval_steps_per_second": 12.371,
1228
+ "step": 61000
1229
+ },
1230
+ {
1231
+ "epoch": 3.77,
1232
+ "learning_rate": 0.0005401727880078093,
1233
+ "loss": 0.5102,
1234
+ "step": 61500
1235
+ },
1236
+ {
1237
+ "epoch": 3.8,
1238
+ "learning_rate": 0.0005389897368744289,
1239
+ "loss": 0.5099,
1240
+ "step": 62000
1241
+ },
1242
+ {
1243
+ "epoch": 3.8,
1244
+ "eval_loss": 0.8549780249595642,
1245
+ "eval_runtime": 1.3356,
1246
+ "eval_samples_per_second": 748.732,
1247
+ "eval_steps_per_second": 11.98,
1248
+ "step": 62000
1249
+ },
1250
+ {
1251
+ "epoch": 3.83,
1252
+ "learning_rate": 0.0005377964502719361,
1253
+ "loss": 0.5099,
1254
+ "step": 62500
1255
+ },
1256
+ {
1257
+ "epoch": 3.86,
1258
+ "learning_rate": 0.0005365929803985524,
1259
+ "loss": 0.5094,
1260
+ "step": 63000
1261
+ },
1262
+ {
1263
+ "epoch": 3.86,
1264
+ "eval_loss": 0.8536927700042725,
1265
+ "eval_runtime": 1.2799,
1266
+ "eval_samples_per_second": 781.291,
1267
+ "eval_steps_per_second": 12.501,
1268
+ "step": 63000
1269
+ },
1270
+ {
1271
+ "epoch": 3.89,
1272
+ "learning_rate": 0.0005353793798979489,
1273
+ "loss": 0.5093,
1274
+ "step": 63500
1275
+ },
1276
+ {
1277
+ "epoch": 3.92,
1278
+ "learning_rate": 0.000534155701856943,
1279
+ "loss": 0.509,
1280
+ "step": 64000
1281
+ },
1282
+ {
1283
+ "epoch": 3.92,
1284
+ "eval_loss": 0.8535122871398926,
1285
+ "eval_runtime": 1.2782,
1286
+ "eval_samples_per_second": 782.329,
1287
+ "eval_steps_per_second": 12.517,
1288
+ "step": 64000
1289
+ },
1290
+ {
1291
+ "epoch": 3.95,
1292
+ "learning_rate": 0.0005329219998031763,
1293
+ "loss": 0.5087,
1294
+ "step": 64500
1295
+ },
1296
+ {
1297
+ "epoch": 3.98,
1298
+ "learning_rate": 0.0005316783277027734,
1299
+ "loss": 0.5091,
1300
+ "step": 65000
1301
+ },
1302
+ {
1303
+ "epoch": 3.98,
1304
+ "eval_loss": 0.8591586351394653,
1305
+ "eval_runtime": 1.3143,
1306
+ "eval_samples_per_second": 760.882,
1307
+ "eval_steps_per_second": 12.174,
1308
+ "step": 65000
1309
+ },
1310
+ {
1311
+ "epoch": 4.01,
1312
+ "learning_rate": 0.0005304247399579808,
1313
+ "loss": 0.508,
1314
+ "step": 65500
1315
+ },
1316
+ {
1317
+ "epoch": 4.04,
1318
+ "learning_rate": 0.0005291612914047876,
1319
+ "loss": 0.5079,
1320
+ "step": 66000
1321
+ },
1322
+ {
1323
+ "epoch": 4.04,
1324
+ "eval_loss": 0.8554427027702332,
1325
+ "eval_runtime": 1.3483,
1326
+ "eval_samples_per_second": 741.678,
1327
+ "eval_steps_per_second": 11.867,
1328
+ "step": 66000
1329
+ },
1330
+ {
1331
+ "epoch": 4.07,
1332
+ "learning_rate": 0.0005278880373105263,
1333
+ "loss": 0.5077,
1334
+ "step": 66500
1335
+ },
1336
+ {
1337
+ "epoch": 4.11,
1338
+ "learning_rate": 0.0005266050333714561,
1339
+ "loss": 0.5074,
1340
+ "step": 67000
1341
+ },
1342
+ {
1343
+ "epoch": 4.11,
1344
+ "eval_loss": 0.8515585660934448,
1345
+ "eval_runtime": 1.2858,
1346
+ "eval_samples_per_second": 777.712,
1347
+ "eval_steps_per_second": 12.443,
1348
+ "step": 67000
1349
+ },
1350
+ {
1351
+ "epoch": 4.14,
1352
+ "learning_rate": 0.0005253123357103253,
1353
+ "loss": 0.5074,
1354
+ "step": 67500
1355
+ },
1356
+ {
1357
+ "epoch": 4.17,
1358
+ "learning_rate": 0.0005240100008739177,
1359
+ "loss": 0.5069,
1360
+ "step": 68000
1361
+ },
1362
+ {
1363
+ "epoch": 4.17,
1364
+ "eval_loss": 0.8490995168685913,
1365
+ "eval_runtime": 1.2974,
1366
+ "eval_samples_per_second": 770.751,
1367
+ "eval_steps_per_second": 12.332,
1368
+ "step": 68000
1369
+ },
1370
+ {
1371
+ "epoch": 4.2,
1372
+ "learning_rate": 0.0005226980858305778,
1373
+ "loss": 0.5067,
1374
+ "step": 68500
1375
+ },
1376
+ {
1377
+ "epoch": 4.23,
1378
+ "learning_rate": 0.0005213766479677197,
1379
+ "loss": 0.5066,
1380
+ "step": 69000
1381
+ },
1382
+ {
1383
+ "epoch": 4.23,
1384
+ "eval_loss": 0.8570588231086731,
1385
+ "eval_runtime": 1.2957,
1386
+ "eval_samples_per_second": 771.773,
1387
+ "eval_steps_per_second": 12.348,
1388
+ "step": 69000
1389
+ },
1390
+ {
1391
+ "epoch": 4.26,
1392
+ "learning_rate": 0.0005200457450893163,
1393
+ "loss": 0.5063,
1394
+ "step": 69500
1395
+ },
1396
+ {
1397
+ "epoch": 4.29,
1398
+ "learning_rate": 0.0005187054354133712,
1399
+ "loss": 0.5068,
1400
+ "step": 70000
1401
+ },
1402
+ {
1403
+ "epoch": 4.29,
1404
+ "eval_loss": 0.8535866141319275,
1405
+ "eval_runtime": 1.3217,
1406
+ "eval_samples_per_second": 756.603,
1407
+ "eval_steps_per_second": 12.106,
1408
+ "step": 70000
1409
  }
1410
  ],
1411
  "max_steps": 250000,
1412
  "num_train_epochs": 16,
1413
+ "total_flos": 1.1211359780461682e+21,
1414
  "trial_name": null,
1415
  "trial_params": null
1416
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8d066230793a00e463dada238cbf68fe9a6e4506761309bbc7272fd63bd501d9
3
  size 25761253
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ca06a94bd9a048e9a1048edd363d5f15c12bf594452e56a2cb99b829fdbf580
3
  size 25761253