joelniklaus commited on
Commit
0e6e586
1 Parent(s): 056ad06

Training in progress, step 800000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:83668b91421a0e0a488c25487bfaad007dc55c18cdf463900a44a3ade5b8d40e
3
  size 1475917081
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0736aff937bc5cbe089a02ee96acafe29a3ce8b5ef41405f8f101efedc8d1867
3
  size 1475917081
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b421ceb551fd425f650a6742be22de59c7f6b297d62820870b0681f67c5a91c4
3
  size 737971755
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a83020031862aea2e988f120ee4fbfdb9e36868ffeeee41e4196243bad5ad70f
3
  size 737971755
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b0e59fd1cd4b1fd2a1afae2fc772a87dc59b796c76d4e1240f03b46611b2bcc9
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea5f4d07228ced52baeea808cd24096aeb8c1e411df6d964bc2778cd1f37bff3
3
  size 13611
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b0e59fd1cd4b1fd2a1afae2fc772a87dc59b796c76d4e1240f03b46611b2bcc9
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea5f4d07228ced52baeea808cd24096aeb8c1e411df6d964bc2778cd1f37bff3
3
  size 13611
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b0e59fd1cd4b1fd2a1afae2fc772a87dc59b796c76d4e1240f03b46611b2bcc9
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea5f4d07228ced52baeea808cd24096aeb8c1e411df6d964bc2778cd1f37bff3
3
  size 13611
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b0e59fd1cd4b1fd2a1afae2fc772a87dc59b796c76d4e1240f03b46611b2bcc9
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea5f4d07228ced52baeea808cd24096aeb8c1e411df6d964bc2778cd1f37bff3
3
  size 13611
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b0e59fd1cd4b1fd2a1afae2fc772a87dc59b796c76d4e1240f03b46611b2bcc9
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea5f4d07228ced52baeea808cd24096aeb8c1e411df6d964bc2778cd1f37bff3
3
  size 13611
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b0e59fd1cd4b1fd2a1afae2fc772a87dc59b796c76d4e1240f03b46611b2bcc9
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea5f4d07228ced52baeea808cd24096aeb8c1e411df6d964bc2778cd1f37bff3
3
  size 13611
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b0e59fd1cd4b1fd2a1afae2fc772a87dc59b796c76d4e1240f03b46611b2bcc9
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea5f4d07228ced52baeea808cd24096aeb8c1e411df6d964bc2778cd1f37bff3
3
  size 13611
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b0e59fd1cd4b1fd2a1afae2fc772a87dc59b796c76d4e1240f03b46611b2bcc9
3
  size 13611
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea5f4d07228ced52baeea808cd24096aeb8c1e411df6d964bc2778cd1f37bff3
3
  size 13611
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:411052d7656a2fbf4baa154bd61bcb86c1d4e17113e6919b82f37e9aff99019f
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50e51b9224ded3ddffee57f26ec45414409de0232579ddafb7f3e083076fa4c5
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.75,
5
- "global_step": 750000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -4626,11 +4626,319 @@
4626
  "eval_samples_per_second": 287.549,
4627
  "eval_steps_per_second": 2.3,
4628
  "step": 750000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4629
  }
4630
  ],
4631
  "max_steps": 1000000,
4632
  "num_train_epochs": 9223372036854775807,
4633
- "total_flos": 1.2648204730368e+19,
4634
  "trial_name": null,
4635
  "trial_params": null
4636
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.8,
5
+ "global_step": 800000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
4626
  "eval_samples_per_second": 287.549,
4627
  "eval_steps_per_second": 2.3,
4628
  "step": 750000
4629
+ },
4630
+ {
4631
+ "epoch": 0.75,
4632
+ "learning_rate": 1.6014457078461353e-05,
4633
+ "loss": 0.7701,
4634
+ "step": 751000
4635
+ },
4636
+ {
4637
+ "epoch": 0.75,
4638
+ "learning_rate": 1.5893364398662176e-05,
4639
+ "loss": 0.7372,
4640
+ "step": 752000
4641
+ },
4642
+ {
4643
+ "epoch": 0.75,
4644
+ "learning_rate": 1.5772644703565565e-05,
4645
+ "loss": 0.6723,
4646
+ "step": 753000
4647
+ },
4648
+ {
4649
+ "epoch": 0.75,
4650
+ "learning_rate": 1.5652299313342773e-05,
4651
+ "loss": 0.7164,
4652
+ "step": 754000
4653
+ },
4654
+ {
4655
+ "epoch": 0.76,
4656
+ "learning_rate": 1.553232954407171e-05,
4657
+ "loss": 0.6914,
4658
+ "step": 755000
4659
+ },
4660
+ {
4661
+ "epoch": 0.76,
4662
+ "learning_rate": 1.5412736707722537e-05,
4663
+ "loss": 0.6834,
4664
+ "step": 756000
4665
+ },
4666
+ {
4667
+ "epoch": 0.76,
4668
+ "learning_rate": 1.5293522112143373e-05,
4669
+ "loss": 0.695,
4670
+ "step": 757000
4671
+ },
4672
+ {
4673
+ "epoch": 0.76,
4674
+ "learning_rate": 1.517468706104589e-05,
4675
+ "loss": 0.7152,
4676
+ "step": 758000
4677
+ },
4678
+ {
4679
+ "epoch": 0.76,
4680
+ "learning_rate": 1.5056232853991209e-05,
4681
+ "loss": 0.7522,
4682
+ "step": 759000
4683
+ },
4684
+ {
4685
+ "epoch": 0.76,
4686
+ "learning_rate": 1.4938160786375572e-05,
4687
+ "loss": 0.7519,
4688
+ "step": 760000
4689
+ },
4690
+ {
4691
+ "epoch": 0.76,
4692
+ "learning_rate": 1.4820472149416154e-05,
4693
+ "loss": 0.7807,
4694
+ "step": 761000
4695
+ },
4696
+ {
4697
+ "epoch": 0.76,
4698
+ "learning_rate": 1.470316823013707e-05,
4699
+ "loss": 0.8045,
4700
+ "step": 762000
4701
+ },
4702
+ {
4703
+ "epoch": 0.76,
4704
+ "learning_rate": 1.4586250311355132e-05,
4705
+ "loss": 0.8274,
4706
+ "step": 763000
4707
+ },
4708
+ {
4709
+ "epoch": 0.76,
4710
+ "learning_rate": 1.4469719671666043e-05,
4711
+ "loss": 0.8296,
4712
+ "step": 764000
4713
+ },
4714
+ {
4715
+ "epoch": 0.77,
4716
+ "learning_rate": 1.435357758543015e-05,
4717
+ "loss": 0.8388,
4718
+ "step": 765000
4719
+ },
4720
+ {
4721
+ "epoch": 0.77,
4722
+ "learning_rate": 1.4237825322758736e-05,
4723
+ "loss": 0.7847,
4724
+ "step": 766000
4725
+ },
4726
+ {
4727
+ "epoch": 0.77,
4728
+ "learning_rate": 1.412246414949997e-05,
4729
+ "loss": 0.8024,
4730
+ "step": 767000
4731
+ },
4732
+ {
4733
+ "epoch": 0.77,
4734
+ "learning_rate": 1.4007495327225162e-05,
4735
+ "loss": 0.7176,
4736
+ "step": 768000
4737
+ },
4738
+ {
4739
+ "epoch": 0.77,
4740
+ "learning_rate": 1.389292011321498e-05,
4741
+ "loss": 0.7148,
4742
+ "step": 769000
4743
+ },
4744
+ {
4745
+ "epoch": 0.77,
4746
+ "learning_rate": 1.3778739760445552e-05,
4747
+ "loss": 0.707,
4748
+ "step": 770000
4749
+ },
4750
+ {
4751
+ "epoch": 0.77,
4752
+ "learning_rate": 1.3664955517574968e-05,
4753
+ "loss": 0.7007,
4754
+ "step": 771000
4755
+ },
4756
+ {
4757
+ "epoch": 0.77,
4758
+ "learning_rate": 1.3551568628929434e-05,
4759
+ "loss": 0.7072,
4760
+ "step": 772000
4761
+ },
4762
+ {
4763
+ "epoch": 0.77,
4764
+ "learning_rate": 1.343858033448982e-05,
4765
+ "loss": 0.7647,
4766
+ "step": 773000
4767
+ },
4768
+ {
4769
+ "epoch": 0.77,
4770
+ "learning_rate": 1.3325991869878013e-05,
4771
+ "loss": 0.7978,
4772
+ "step": 774000
4773
+ },
4774
+ {
4775
+ "epoch": 0.78,
4776
+ "learning_rate": 1.3213804466343421e-05,
4777
+ "loss": 0.8,
4778
+ "step": 775000
4779
+ },
4780
+ {
4781
+ "epoch": 0.78,
4782
+ "learning_rate": 1.3102019350749528e-05,
4783
+ "loss": 0.8197,
4784
+ "step": 776000
4785
+ },
4786
+ {
4787
+ "epoch": 0.78,
4788
+ "learning_rate": 1.299063774556042e-05,
4789
+ "loss": 0.8255,
4790
+ "step": 777000
4791
+ },
4792
+ {
4793
+ "epoch": 0.78,
4794
+ "learning_rate": 1.2879660868827508e-05,
4795
+ "loss": 0.8455,
4796
+ "step": 778000
4797
+ },
4798
+ {
4799
+ "epoch": 0.78,
4800
+ "learning_rate": 1.2769089934176126e-05,
4801
+ "loss": 0.8561,
4802
+ "step": 779000
4803
+ },
4804
+ {
4805
+ "epoch": 0.78,
4806
+ "learning_rate": 1.2658926150792322e-05,
4807
+ "loss": 0.8587,
4808
+ "step": 780000
4809
+ },
4810
+ {
4811
+ "epoch": 0.78,
4812
+ "learning_rate": 1.2549170723409549e-05,
4813
+ "loss": 0.7954,
4814
+ "step": 781000
4815
+ },
4816
+ {
4817
+ "epoch": 0.78,
4818
+ "learning_rate": 1.243982485229559e-05,
4819
+ "loss": 0.8021,
4820
+ "step": 782000
4821
+ },
4822
+ {
4823
+ "epoch": 0.78,
4824
+ "learning_rate": 1.233088973323937e-05,
4825
+ "loss": 0.7285,
4826
+ "step": 783000
4827
+ },
4828
+ {
4829
+ "epoch": 0.78,
4830
+ "learning_rate": 1.2222366557537911e-05,
4831
+ "loss": 0.7509,
4832
+ "step": 784000
4833
+ },
4834
+ {
4835
+ "epoch": 0.79,
4836
+ "learning_rate": 1.2114256511983274e-05,
4837
+ "loss": 0.7451,
4838
+ "step": 785000
4839
+ },
4840
+ {
4841
+ "epoch": 0.79,
4842
+ "learning_rate": 1.2006560778849578e-05,
4843
+ "loss": 0.7591,
4844
+ "step": 786000
4845
+ },
4846
+ {
4847
+ "epoch": 0.79,
4848
+ "learning_rate": 1.1899280535880119e-05,
4849
+ "loss": 0.8196,
4850
+ "step": 787000
4851
+ },
4852
+ {
4853
+ "epoch": 0.79,
4854
+ "learning_rate": 1.1792416956274444e-05,
4855
+ "loss": 0.8173,
4856
+ "step": 788000
4857
+ },
4858
+ {
4859
+ "epoch": 0.79,
4860
+ "learning_rate": 1.1685971208675539e-05,
4861
+ "loss": 0.8056,
4862
+ "step": 789000
4863
+ },
4864
+ {
4865
+ "epoch": 0.79,
4866
+ "learning_rate": 1.157994445715706e-05,
4867
+ "loss": 0.8131,
4868
+ "step": 790000
4869
+ },
4870
+ {
4871
+ "epoch": 0.79,
4872
+ "learning_rate": 1.1474337861210543e-05,
4873
+ "loss": 0.7835,
4874
+ "step": 791000
4875
+ },
4876
+ {
4877
+ "epoch": 0.79,
4878
+ "learning_rate": 1.1369152575732822e-05,
4879
+ "loss": 0.7698,
4880
+ "step": 792000
4881
+ },
4882
+ {
4883
+ "epoch": 0.79,
4884
+ "learning_rate": 1.1264389751013326e-05,
4885
+ "loss": 0.7947,
4886
+ "step": 793000
4887
+ },
4888
+ {
4889
+ "epoch": 0.79,
4890
+ "learning_rate": 1.1160050532721528e-05,
4891
+ "loss": 0.7808,
4892
+ "step": 794000
4893
+ },
4894
+ {
4895
+ "epoch": 0.8,
4896
+ "learning_rate": 1.1056136061894384e-05,
4897
+ "loss": 0.7543,
4898
+ "step": 795000
4899
+ },
4900
+ {
4901
+ "epoch": 0.8,
4902
+ "learning_rate": 1.095264747492391e-05,
4903
+ "loss": 0.7688,
4904
+ "step": 796000
4905
+ },
4906
+ {
4907
+ "epoch": 0.8,
4908
+ "learning_rate": 1.0849585903544706e-05,
4909
+ "loss": 0.7812,
4910
+ "step": 797000
4911
+ },
4912
+ {
4913
+ "epoch": 0.8,
4914
+ "learning_rate": 1.0746952474821614e-05,
4915
+ "loss": 0.7049,
4916
+ "step": 798000
4917
+ },
4918
+ {
4919
+ "epoch": 0.8,
4920
+ "learning_rate": 1.0644748311137376e-05,
4921
+ "loss": 0.7251,
4922
+ "step": 799000
4923
+ },
4924
+ {
4925
+ "epoch": 0.8,
4926
+ "learning_rate": 1.0542974530180327e-05,
4927
+ "loss": 0.695,
4928
+ "step": 800000
4929
+ },
4930
+ {
4931
+ "epoch": 0.8,
4932
+ "eval_loss": 0.5825644731521606,
4933
+ "eval_runtime": 20.7083,
4934
+ "eval_samples_per_second": 241.45,
4935
+ "eval_steps_per_second": 1.932,
4936
+ "step": 800000
4937
  }
4938
  ],
4939
  "max_steps": 1000000,
4940
  "num_train_epochs": 9223372036854775807,
4941
+ "total_flos": 1.34914183790592e+19,
4942
  "trial_name": null,
4943
  "trial_params": null
4944
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:281e1c30b4f4821c0ff87c4a38c66c5325c63ebb8240b3901d44f4cd8f330fc8
3
- size 3439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56bc6d38ac5da7036b4bce8e3d70d84b767d971c5ea2387cc69adc958b0700f9
3
+ size 3503
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b421ceb551fd425f650a6742be22de59c7f6b297d62820870b0681f67c5a91c4
3
  size 737971755
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a83020031862aea2e988f120ee4fbfdb9e36868ffeeee41e4196243bad5ad70f
3
  size 737971755
runs/Feb09_18-20-41_t1v-n-0cfb531e-w-0/1675967137.0478177/events.out.tfevents.1675967137.t1v-n-0cfb531e-w-0.3767571.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a98c027cf2146aaecfbe0642659eb9d3a2a95a1b4fa3c3d2f9953eef846ee72
3
+ size 5484
runs/Feb09_18-20-41_t1v-n-0cfb531e-w-0/events.out.tfevents.1675967137.t1v-n-0cfb531e-w-0.3767571.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a89903c3ad93724a201222cc39ca9ad19e4291497e25d97bb164b6fca4688679
3
+ size 12096
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:281e1c30b4f4821c0ff87c4a38c66c5325c63ebb8240b3901d44f4cd8f330fc8
3
- size 3439
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56bc6d38ac5da7036b4bce8e3d70d84b767d971c5ea2387cc69adc958b0700f9
3
+ size 3503