plip commited on
Commit
aad1369
·
1 Parent(s): 2166f5c

Training in progress, step 180000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:de4f35402a55e29f80fb8c753ad6523a199f5dffc7c4e25641de3446e7dcc1e2
3
  size 50044689
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d649779ba95c621015cb53780f5701aa302c759b040cf454f46acc31f4706b5
3
  size 50044689
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c916340d7d75d5bc18c1ad33b84c353fe26c18b731b4892c4ff510dc90928bc5
3
  size 25761253
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c619af36af772fff3b27134f866199a2501f2804d36e9fef52fa198a4bf9feb5
3
  size 25761253
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f357ba8dde7396ae8bd66292dabc034f58c2eaafd10d7b87a424d8b6a4c63676
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f8edee7121f04285c72b6c9f7844a954a235fd8dbdb096b1e8ce5764e29663e
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f357ba8dde7396ae8bd66292dabc034f58c2eaafd10d7b87a424d8b6a4c63676
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f8edee7121f04285c72b6c9f7844a954a235fd8dbdb096b1e8ce5764e29663e
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f357ba8dde7396ae8bd66292dabc034f58c2eaafd10d7b87a424d8b6a4c63676
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f8edee7121f04285c72b6c9f7844a954a235fd8dbdb096b1e8ce5764e29663e
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f357ba8dde7396ae8bd66292dabc034f58c2eaafd10d7b87a424d8b6a4c63676
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f8edee7121f04285c72b6c9f7844a954a235fd8dbdb096b1e8ce5764e29663e
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f357ba8dde7396ae8bd66292dabc034f58c2eaafd10d7b87a424d8b6a4c63676
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f8edee7121f04285c72b6c9f7844a954a235fd8dbdb096b1e8ce5764e29663e
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f357ba8dde7396ae8bd66292dabc034f58c2eaafd10d7b87a424d8b6a4c63676
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f8edee7121f04285c72b6c9f7844a954a235fd8dbdb096b1e8ce5764e29663e
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f357ba8dde7396ae8bd66292dabc034f58c2eaafd10d7b87a424d8b6a4c63676
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f8edee7121f04285c72b6c9f7844a954a235fd8dbdb096b1e8ce5764e29663e
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f357ba8dde7396ae8bd66292dabc034f58c2eaafd10d7b87a424d8b6a4c63676
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f8edee7121f04285c72b6c9f7844a954a235fd8dbdb096b1e8ce5764e29663e
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1470898ecdc6550560113c5e2cfd1e79edea6b27c0b7d35814645546c1b5bff0
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae7e48b658f6388c6c044e6d37239970a21307494d626979f7e10630dfa93207
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 10.416666666666666,
5
- "global_step": 170000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -3406,11 +3406,211 @@
3406
  "eval_samples_per_second": 733.005,
3407
  "eval_steps_per_second": 11.728,
3408
  "step": 170000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3409
  }
3410
  ],
3411
  "max_steps": 250000,
3412
  "num_train_epochs": 16,
3413
- "total_flos": 2.7227559436825185e+21,
3414
  "trial_name": null,
3415
  "trial_params": null
3416
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 11.029411764705882,
5
+ "global_step": 180000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
3406
  "eval_samples_per_second": 733.005,
3407
  "eval_steps_per_second": 11.728,
3408
  "step": 170000
3409
+ },
3410
+ {
3411
+ "epoch": 10.45,
3412
+ "learning_rate": 0.00015862789502737648,
3413
+ "loss": 0.4491,
3414
+ "step": 170500
3415
+ },
3416
+ {
3417
+ "epoch": 10.48,
3418
+ "learning_rate": 0.00015693712647480446,
3419
+ "loss": 0.4494,
3420
+ "step": 171000
3421
+ },
3422
+ {
3423
+ "epoch": 10.48,
3424
+ "eval_loss": 0.7991927266120911,
3425
+ "eval_runtime": 1.3282,
3426
+ "eval_samples_per_second": 752.902,
3427
+ "eval_steps_per_second": 12.046,
3428
+ "step": 171000
3429
+ },
3430
+ {
3431
+ "epoch": 10.51,
3432
+ "learning_rate": 0.00015525283467197743,
3433
+ "loss": 0.4487,
3434
+ "step": 171500
3435
+ },
3436
+ {
3437
+ "epoch": 10.54,
3438
+ "learning_rate": 0.00015357509329527556,
3439
+ "loss": 0.4486,
3440
+ "step": 172000
3441
+ },
3442
+ {
3443
+ "epoch": 10.54,
3444
+ "eval_loss": 0.8018996715545654,
3445
+ "eval_runtime": 1.3294,
3446
+ "eval_samples_per_second": 752.227,
3447
+ "eval_steps_per_second": 12.036,
3448
+ "step": 172000
3449
+ },
3450
+ {
3451
+ "epoch": 10.57,
3452
+ "learning_rate": 0.00015190397573454158,
3453
+ "loss": 0.4488,
3454
+ "step": 172500
3455
+ },
3456
+ {
3457
+ "epoch": 10.6,
3458
+ "learning_rate": 0.00015023955508987127,
3459
+ "loss": 0.4485,
3460
+ "step": 173000
3461
+ },
3462
+ {
3463
+ "epoch": 10.6,
3464
+ "eval_loss": 0.8025578260421753,
3465
+ "eval_runtime": 1.3279,
3466
+ "eval_samples_per_second": 753.043,
3467
+ "eval_steps_per_second": 12.049,
3468
+ "step": 173000
3469
+ },
3470
+ {
3471
+ "epoch": 10.63,
3472
+ "learning_rate": 0.00014858190416841565,
3473
+ "loss": 0.4483,
3474
+ "step": 173500
3475
+ },
3476
+ {
3477
+ "epoch": 10.66,
3478
+ "learning_rate": 0.00014693109548119591,
3479
+ "loss": 0.4483,
3480
+ "step": 174000
3481
+ },
3482
+ {
3483
+ "epoch": 10.66,
3484
+ "eval_loss": 0.8008602261543274,
3485
+ "eval_runtime": 1.2568,
3486
+ "eval_samples_per_second": 795.696,
3487
+ "eval_steps_per_second": 12.731,
3488
+ "step": 174000
3489
+ },
3490
+ {
3491
+ "epoch": 10.69,
3492
+ "learning_rate": 0.00014528720123993226,
3493
+ "loss": 0.448,
3494
+ "step": 174500
3495
+ },
3496
+ {
3497
+ "epoch": 10.72,
3498
+ "learning_rate": 0.0001436502933538841,
3499
+ "loss": 0.448,
3500
+ "step": 175000
3501
+ },
3502
+ {
3503
+ "epoch": 10.72,
3504
+ "eval_loss": 0.8021511435508728,
3505
+ "eval_runtime": 1.2829,
3506
+ "eval_samples_per_second": 779.473,
3507
+ "eval_steps_per_second": 12.472,
3508
+ "step": 175000
3509
+ },
3510
+ {
3511
+ "epoch": 10.75,
3512
+ "learning_rate": 0.00014202044342670508,
3513
+ "loss": 0.448,
3514
+ "step": 175500
3515
+ },
3516
+ {
3517
+ "epoch": 10.78,
3518
+ "learning_rate": 0.00014039772275331125,
3519
+ "loss": 0.4479,
3520
+ "step": 176000
3521
+ },
3522
+ {
3523
+ "epoch": 10.78,
3524
+ "eval_loss": 0.8016372323036194,
3525
+ "eval_runtime": 1.3768,
3526
+ "eval_samples_per_second": 726.333,
3527
+ "eval_steps_per_second": 11.621,
3528
+ "step": 176000
3529
+ },
3530
+ {
3531
+ "epoch": 10.81,
3532
+ "learning_rate": 0.00013878220231676152,
3533
+ "loss": 0.4475,
3534
+ "step": 176500
3535
+ },
3536
+ {
3537
+ "epoch": 10.85,
3538
+ "learning_rate": 0.00013717395278515355,
3539
+ "loss": 0.4476,
3540
+ "step": 177000
3541
+ },
3542
+ {
3543
+ "epoch": 10.85,
3544
+ "eval_loss": 0.7988106608390808,
3545
+ "eval_runtime": 1.2962,
3546
+ "eval_samples_per_second": 771.516,
3547
+ "eval_steps_per_second": 12.344,
3548
+ "step": 177000
3549
+ },
3550
+ {
3551
+ "epoch": 10.88,
3552
+ "learning_rate": 0.00013557304450853162,
3553
+ "loss": 0.4472,
3554
+ "step": 177500
3555
+ },
3556
+ {
3557
+ "epoch": 10.91,
3558
+ "learning_rate": 0.00013397954751581014,
3559
+ "loss": 0.4474,
3560
+ "step": 178000
3561
+ },
3562
+ {
3563
+ "epoch": 10.91,
3564
+ "eval_loss": 0.8025058507919312,
3565
+ "eval_runtime": 1.3447,
3566
+ "eval_samples_per_second": 743.664,
3567
+ "eval_steps_per_second": 11.899,
3568
+ "step": 178000
3569
+ },
3570
+ {
3571
+ "epoch": 10.94,
3572
+ "learning_rate": 0.00013239353151170983,
3573
+ "loss": 0.4471,
3574
+ "step": 178500
3575
+ },
3576
+ {
3577
+ "epoch": 10.97,
3578
+ "learning_rate": 0.00013081506587370853,
3579
+ "loss": 0.4471,
3580
+ "step": 179000
3581
+ },
3582
+ {
3583
+ "epoch": 10.97,
3584
+ "eval_loss": 0.8035358190536499,
3585
+ "eval_runtime": 1.362,
3586
+ "eval_samples_per_second": 734.202,
3587
+ "eval_steps_per_second": 11.747,
3588
+ "step": 179000
3589
+ },
3590
+ {
3591
+ "epoch": 11.0,
3592
+ "learning_rate": 0.00012924421964900695,
3593
+ "loss": 0.447,
3594
+ "step": 179500
3595
+ },
3596
+ {
3597
+ "epoch": 11.03,
3598
+ "learning_rate": 0.00012768106155150758,
3599
+ "loss": 0.4471,
3600
+ "step": 180000
3601
+ },
3602
+ {
3603
+ "epoch": 11.03,
3604
+ "eval_loss": 0.7982646226882935,
3605
+ "eval_runtime": 1.3411,
3606
+ "eval_samples_per_second": 745.664,
3607
+ "eval_steps_per_second": 11.931,
3608
+ "step": 180000
3609
  }
3610
  ],
3611
  "max_steps": 250000,
3612
  "num_train_epochs": 16,
3613
+ "total_flos": 2.8829139360447075e+21,
3614
  "trial_name": null,
3615
  "trial_params": null
3616
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c916340d7d75d5bc18c1ad33b84c353fe26c18b731b4892c4ff510dc90928bc5
3
  size 25761253
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c619af36af772fff3b27134f866199a2501f2804d36e9fef52fa198a4bf9feb5
3
  size 25761253