plip commited on
Commit
5c0ed8d
1 Parent(s): 0bc6c92

Training in progress, step 230000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:23bbc0287788b2c03b401006764ab5c51beca76144630361bf7232a38c305bb2
3
  size 50044689
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c73f6ebf98667b9d415618c592fcc151813c952d53aa83593aeea350ae20ad82
3
  size 50044689
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:536b6de0b75a81a1046eda925f6ee0d8ba9e4b19c73ef4c848e57b42c2ce63b3
3
  size 25761253
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f49e8c0a7fe5574e03000ae8a462d71703d3111453eca27095f6af063a3ac4e
3
  size 25761253
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0be7ce3b05940caed2419a487b770c6b4c383452e171183ad5c457321fb063b3
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e201bd9c4e3306c102fdf93d9794a7db518406ab08c5c5b2a75cbf98841961b7
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0be7ce3b05940caed2419a487b770c6b4c383452e171183ad5c457321fb063b3
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e201bd9c4e3306c102fdf93d9794a7db518406ab08c5c5b2a75cbf98841961b7
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0be7ce3b05940caed2419a487b770c6b4c383452e171183ad5c457321fb063b3
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e201bd9c4e3306c102fdf93d9794a7db518406ab08c5c5b2a75cbf98841961b7
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0be7ce3b05940caed2419a487b770c6b4c383452e171183ad5c457321fb063b3
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e201bd9c4e3306c102fdf93d9794a7db518406ab08c5c5b2a75cbf98841961b7
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0be7ce3b05940caed2419a487b770c6b4c383452e171183ad5c457321fb063b3
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e201bd9c4e3306c102fdf93d9794a7db518406ab08c5c5b2a75cbf98841961b7
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0be7ce3b05940caed2419a487b770c6b4c383452e171183ad5c457321fb063b3
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e201bd9c4e3306c102fdf93d9794a7db518406ab08c5c5b2a75cbf98841961b7
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0be7ce3b05940caed2419a487b770c6b4c383452e171183ad5c457321fb063b3
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e201bd9c4e3306c102fdf93d9794a7db518406ab08c5c5b2a75cbf98841961b7
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0be7ce3b05940caed2419a487b770c6b4c383452e171183ad5c457321fb063b3
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e201bd9c4e3306c102fdf93d9794a7db518406ab08c5c5b2a75cbf98841961b7
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:49c292ab14a993919881f01d5b74688df2db2f4ea7c017c2175d0fea64e57565
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cde8387b01007811ac1b94d9590ed8f2f119f8e7b49d5ce42fb838c1939b856b
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 13.480392156862745,
5
- "global_step": 220000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -4406,11 +4406,211 @@
4406
  "eval_samples_per_second": 765.469,
4407
  "eval_steps_per_second": 12.247,
4408
  "step": 220000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4409
  }
4410
  ],
4411
  "max_steps": 250000,
4412
  "num_train_epochs": 16,
4413
- "total_flos": 3.5235659265006937e+21,
4414
  "trial_name": null,
4415
  "trial_params": null
4416
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 14.093137254901961,
5
+ "global_step": 230000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
4406
  "eval_samples_per_second": 765.469,
4407
  "eval_steps_per_second": 12.247,
4408
  "step": 220000
4409
+ },
4410
+ {
4411
+ "epoch": 13.51,
4412
+ "learning_rate": 3.2176382151888054e-05,
4413
+ "loss": 0.4414,
4414
+ "step": 220500
4415
+ },
4416
+ {
4417
+ "epoch": 13.54,
4418
+ "learning_rate": 3.1440176210975204e-05,
4419
+ "loss": 0.4412,
4420
+ "step": 221000
4421
+ },
4422
+ {
4423
+ "epoch": 13.54,
4424
+ "eval_loss": 0.7996479272842407,
4425
+ "eval_runtime": 1.3112,
4426
+ "eval_samples_per_second": 762.651,
4427
+ "eval_steps_per_second": 12.202,
4428
+ "step": 221000
4429
+ },
4430
+ {
4431
+ "epoch": 13.57,
4432
+ "learning_rate": 3.071593666296585e-05,
4433
+ "loss": 0.4411,
4434
+ "step": 221500
4435
+ },
4436
+ {
4437
+ "epoch": 13.6,
4438
+ "learning_rate": 3.000369518844396e-05,
4439
+ "loss": 0.4411,
4440
+ "step": 222000
4441
+ },
4442
+ {
4443
+ "epoch": 13.6,
4444
+ "eval_loss": 0.8002747297286987,
4445
+ "eval_runtime": 1.2963,
4446
+ "eval_samples_per_second": 771.412,
4447
+ "eval_steps_per_second": 12.343,
4448
+ "step": 222000
4449
+ },
4450
+ {
4451
+ "epoch": 13.63,
4452
+ "learning_rate": 2.9303482943159077e-05,
4453
+ "loss": 0.4411,
4454
+ "step": 222500
4455
+ },
4456
+ {
4457
+ "epoch": 13.66,
4458
+ "learning_rate": 2.861533055666306e-05,
4459
+ "loss": 0.4411,
4460
+ "step": 223000
4461
+ },
4462
+ {
4463
+ "epoch": 13.66,
4464
+ "eval_loss": 0.7992754578590393,
4465
+ "eval_runtime": 1.2911,
4466
+ "eval_samples_per_second": 774.504,
4467
+ "eval_steps_per_second": 12.392,
4468
+ "step": 223000
4469
+ },
4470
+ {
4471
+ "epoch": 13.69,
4472
+ "learning_rate": 2.793926813097066e-05,
4473
+ "loss": 0.4411,
4474
+ "step": 223500
4475
+ },
4476
+ {
4477
+ "epoch": 13.73,
4478
+ "learning_rate": 2.7275325239242546e-05,
4479
+ "loss": 0.4411,
4480
+ "step": 224000
4481
+ },
4482
+ {
4483
+ "epoch": 13.73,
4484
+ "eval_loss": 0.800546407699585,
4485
+ "eval_runtime": 6.2685,
4486
+ "eval_samples_per_second": 159.527,
4487
+ "eval_steps_per_second": 2.552,
4488
+ "step": 224000
4489
+ },
4490
+ {
4491
+ "epoch": 13.76,
4492
+ "learning_rate": 2.6623530924491626e-05,
4493
+ "loss": 0.4409,
4494
+ "step": 224500
4495
+ },
4496
+ {
4497
+ "epoch": 13.79,
4498
+ "learning_rate": 2.5983913698312782e-05,
4499
+ "loss": 0.4409,
4500
+ "step": 225000
4501
+ },
4502
+ {
4503
+ "epoch": 13.79,
4504
+ "eval_loss": 0.8013263940811157,
4505
+ "eval_runtime": 1.3128,
4506
+ "eval_samples_per_second": 761.734,
4507
+ "eval_steps_per_second": 12.188,
4508
+ "step": 225000
4509
+ },
4510
+ {
4511
+ "epoch": 13.82,
4512
+ "learning_rate": 2.5356501539635512e-05,
4513
+ "loss": 0.441,
4514
+ "step": 225500
4515
+ },
4516
+ {
4517
+ "epoch": 13.85,
4518
+ "learning_rate": 2.4741321893500244e-05,
4519
+ "loss": 0.4409,
4520
+ "step": 226000
4521
+ },
4522
+ {
4523
+ "epoch": 13.85,
4524
+ "eval_loss": 0.8015850782394409,
4525
+ "eval_runtime": 1.3378,
4526
+ "eval_samples_per_second": 747.504,
4527
+ "eval_steps_per_second": 11.96,
4528
+ "step": 226000
4529
+ },
4530
+ {
4531
+ "epoch": 13.88,
4532
+ "learning_rate": 2.4138401669857587e-05,
4533
+ "loss": 0.4408,
4534
+ "step": 226500
4535
+ },
4536
+ {
4537
+ "epoch": 13.91,
4538
+ "learning_rate": 2.3547767242391212e-05,
4539
+ "loss": 0.4409,
4540
+ "step": 227000
4541
+ },
4542
+ {
4543
+ "epoch": 13.91,
4544
+ "eval_loss": 0.7994450926780701,
4545
+ "eval_runtime": 1.2846,
4546
+ "eval_samples_per_second": 778.448,
4547
+ "eval_steps_per_second": 12.455,
4548
+ "step": 227000
4549
+ },
4550
+ {
4551
+ "epoch": 13.94,
4552
+ "learning_rate": 2.2969444447364498e-05,
4553
+ "loss": 0.4409,
4554
+ "step": 227500
4555
+ },
4556
+ {
4557
+ "epoch": 13.97,
4558
+ "learning_rate": 2.240345858248992e-05,
4559
+ "loss": 0.4408,
4560
+ "step": 228000
4561
+ },
4562
+ {
4563
+ "epoch": 13.97,
4564
+ "eval_loss": 0.8022862672805786,
4565
+ "eval_runtime": 1.3268,
4566
+ "eval_samples_per_second": 753.683,
4567
+ "eval_steps_per_second": 12.059,
4568
+ "step": 228000
4569
+ },
4570
+ {
4571
+ "epoch": 14.0,
4572
+ "learning_rate": 2.184983440582284e-05,
4573
+ "loss": 0.4408,
4574
+ "step": 228500
4575
+ },
4576
+ {
4577
+ "epoch": 14.03,
4578
+ "learning_rate": 2.1308596134678134e-05,
4579
+ "loss": 0.4407,
4580
+ "step": 229000
4581
+ },
4582
+ {
4583
+ "epoch": 14.03,
4584
+ "eval_loss": 0.8013246059417725,
4585
+ "eval_runtime": 1.2694,
4586
+ "eval_samples_per_second": 787.747,
4587
+ "eval_steps_per_second": 12.604,
4588
+ "step": 229000
4589
+ },
4590
+ {
4591
+ "epoch": 14.06,
4592
+ "learning_rate": 2.0779767444571236e-05,
4593
+ "loss": 0.4406,
4594
+ "step": 229500
4595
+ },
4596
+ {
4597
+ "epoch": 14.09,
4598
+ "learning_rate": 2.0263371468182175e-05,
4599
+ "loss": 0.4406,
4600
+ "step": 230000
4601
+ },
4602
+ {
4603
+ "epoch": 14.09,
4604
+ "eval_loss": 0.8037849068641663,
4605
+ "eval_runtime": 1.356,
4606
+ "eval_samples_per_second": 737.479,
4607
+ "eval_steps_per_second": 11.8,
4608
+ "step": 230000
4609
  }
4610
  ],
4611
  "max_steps": 250000,
4612
  "num_train_epochs": 16,
4613
+ "total_flos": 3.6837239188628827e+21,
4614
  "trial_name": null,
4615
  "trial_params": null
4616
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:536b6de0b75a81a1046eda925f6ee0d8ba9e4b19c73ef4c848e57b42c2ce63b3
3
  size 25761253
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f49e8c0a7fe5574e03000ae8a462d71703d3111453eca27095f6af063a3ac4e
3
  size 25761253