plip commited on
Commit
6faaef4
1 Parent(s): e238422

Training in progress, step 200000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ba65516926b4ff6cdfb50443e11a434cb503c7c65e74dc6127694de917095dae
3
  size 50044689
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58f5bbc2fa5a6564ba72b2ae51852d2cf5c1476db0b209d90ec27efd86c1832a
3
  size 50044689
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:40af1c76beae3fcf68a183dcf22a7c21a89e9eb4f6548fe295f14d1acc603f3a
3
  size 25761253
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:814e92d04e78b6c50be49334aded8445cc7160900174a209d8b6a93d4efe04ca
3
  size 25761253
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:31b742d11ad36723027991d2089e333cb5ecac7a190f975b5e786c90c8a60b34
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4761b20aad223fbddcacbd5ca8449487e62b55006b411c219bef6ce43077cf88
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:31b742d11ad36723027991d2089e333cb5ecac7a190f975b5e786c90c8a60b34
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4761b20aad223fbddcacbd5ca8449487e62b55006b411c219bef6ce43077cf88
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:31b742d11ad36723027991d2089e333cb5ecac7a190f975b5e786c90c8a60b34
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4761b20aad223fbddcacbd5ca8449487e62b55006b411c219bef6ce43077cf88
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:31b742d11ad36723027991d2089e333cb5ecac7a190f975b5e786c90c8a60b34
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4761b20aad223fbddcacbd5ca8449487e62b55006b411c219bef6ce43077cf88
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:31b742d11ad36723027991d2089e333cb5ecac7a190f975b5e786c90c8a60b34
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4761b20aad223fbddcacbd5ca8449487e62b55006b411c219bef6ce43077cf88
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:31b742d11ad36723027991d2089e333cb5ecac7a190f975b5e786c90c8a60b34
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4761b20aad223fbddcacbd5ca8449487e62b55006b411c219bef6ce43077cf88
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:31b742d11ad36723027991d2089e333cb5ecac7a190f975b5e786c90c8a60b34
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4761b20aad223fbddcacbd5ca8449487e62b55006b411c219bef6ce43077cf88
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:31b742d11ad36723027991d2089e333cb5ecac7a190f975b5e786c90c8a60b34
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4761b20aad223fbddcacbd5ca8449487e62b55006b411c219bef6ce43077cf88
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:196e9b55e2db27c384076a5416088da2b3e045d13b4c3f579694349cd8bb530e
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c550723a62ae0c4012004470c1ebd8187b99ec43cbd02d108f19a1f1901da5d8
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 11.642156862745098,
5
- "global_step": 190000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -3806,11 +3806,211 @@
3806
  "eval_samples_per_second": 775.752,
3807
  "eval_steps_per_second": 12.412,
3808
  "step": 190000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3809
  }
3810
  ],
3811
  "max_steps": 250000,
3812
  "num_train_epochs": 16,
3813
- "total_flos": 3.0430819389105116e+21,
3814
  "trial_name": null,
3815
  "trial_params": null
3816
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 12.254901960784313,
5
+ "global_step": 200000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
3806
  "eval_samples_per_second": 775.752,
3807
  "eval_steps_per_second": 12.412,
3808
  "step": 190000
3809
+ },
3810
+ {
3811
+ "epoch": 11.67,
3812
+ "learning_rate": 9.67486925578087e-05,
3813
+ "loss": 0.4449,
3814
+ "step": 190500
3815
+ },
3816
+ {
3817
+ "epoch": 11.7,
3818
+ "learning_rate": 9.537134606282964e-05,
3819
+ "loss": 0.4448,
3820
+ "step": 191000
3821
+ },
3822
+ {
3823
+ "epoch": 11.7,
3824
+ "eval_loss": 0.8036643266677856,
3825
+ "eval_runtime": 1.3143,
3826
+ "eval_samples_per_second": 760.834,
3827
+ "eval_steps_per_second": 12.173,
3828
+ "step": 191000
3829
+ },
3830
+ {
3831
+ "epoch": 11.73,
3832
+ "learning_rate": 9.400316940427652e-05,
3833
+ "loss": 0.4447,
3834
+ "step": 191500
3835
+ },
3836
+ {
3837
+ "epoch": 11.76,
3838
+ "learning_rate": 9.264422243062844e-05,
3839
+ "loss": 0.4448,
3840
+ "step": 192000
3841
+ },
3842
+ {
3843
+ "epoch": 11.76,
3844
+ "eval_loss": 0.8037863969802856,
3845
+ "eval_runtime": 1.3425,
3846
+ "eval_samples_per_second": 744.863,
3847
+ "eval_steps_per_second": 11.918,
3848
+ "step": 192000
3849
+ },
3850
+ {
3851
+ "epoch": 11.8,
3852
+ "learning_rate": 9.129456458662876e-05,
3853
+ "loss": 0.4445,
3854
+ "step": 192500
3855
+ },
3856
+ {
3857
+ "epoch": 11.83,
3858
+ "learning_rate": 8.995425491068365e-05,
3859
+ "loss": 0.4445,
3860
+ "step": 193000
3861
+ },
3862
+ {
3863
+ "epoch": 11.83,
3864
+ "eval_loss": 0.8010460734367371,
3865
+ "eval_runtime": 1.3665,
3866
+ "eval_samples_per_second": 731.803,
3867
+ "eval_steps_per_second": 11.709,
3868
+ "step": 193000
3869
+ },
3870
+ {
3871
+ "epoch": 11.86,
3872
+ "learning_rate": 8.862335203228025e-05,
3873
+ "loss": 0.4444,
3874
+ "step": 193500
3875
+ },
3876
+ {
3877
+ "epoch": 11.89,
3878
+ "learning_rate": 8.73019141694222e-05,
3879
+ "loss": 0.4442,
3880
+ "step": 194000
3881
+ },
3882
+ {
3883
+ "epoch": 11.89,
3884
+ "eval_loss": 0.7977059483528137,
3885
+ "eval_runtime": 1.3628,
3886
+ "eval_samples_per_second": 733.81,
3887
+ "eval_steps_per_second": 11.741,
3888
+ "step": 194000
3889
+ },
3890
+ {
3891
+ "epoch": 11.92,
3892
+ "learning_rate": 8.598999912608229e-05,
3893
+ "loss": 0.4442,
3894
+ "step": 194500
3895
+ },
3896
+ {
3897
+ "epoch": 11.95,
3898
+ "learning_rate": 8.468766428967468e-05,
3899
+ "loss": 0.4443,
3900
+ "step": 195000
3901
+ },
3902
+ {
3903
+ "epoch": 11.95,
3904
+ "eval_loss": 0.80078125,
3905
+ "eval_runtime": 1.316,
3906
+ "eval_samples_per_second": 759.905,
3907
+ "eval_steps_per_second": 12.158,
3908
+ "step": 195000
3909
+ },
3910
+ {
3911
+ "epoch": 11.98,
3912
+ "learning_rate": 8.339496662854397e-05,
3913
+ "loss": 0.444,
3914
+ "step": 195500
3915
+ },
3916
+ {
3917
+ "epoch": 12.01,
3918
+ "learning_rate": 8.211196268947367e-05,
3919
+ "loss": 0.4441,
3920
+ "step": 196000
3921
+ },
3922
+ {
3923
+ "epoch": 12.01,
3924
+ "eval_loss": 0.8048492670059204,
3925
+ "eval_runtime": 1.3312,
3926
+ "eval_samples_per_second": 751.193,
3927
+ "eval_steps_per_second": 12.019,
3928
+ "step": 196000
3929
+ },
3930
+ {
3931
+ "epoch": 12.04,
3932
+ "learning_rate": 8.083870859521251e-05,
3933
+ "loss": 0.4441,
3934
+ "step": 196500
3935
+ },
3936
+ {
3937
+ "epoch": 12.07,
3938
+ "learning_rate": 7.95752600420192e-05,
3939
+ "loss": 0.4439,
3940
+ "step": 197000
3941
+ },
3942
+ {
3943
+ "epoch": 12.07,
3944
+ "eval_loss": 0.8033810257911682,
3945
+ "eval_runtime": 1.3277,
3946
+ "eval_samples_per_second": 753.203,
3947
+ "eval_steps_per_second": 12.051,
3948
+ "step": 197000
3949
+ },
3950
+ {
3951
+ "epoch": 12.1,
3952
+ "learning_rate": 7.832167229722666e-05,
3953
+ "loss": 0.4438,
3954
+ "step": 197500
3955
+ },
3956
+ {
3957
+ "epoch": 12.13,
3958
+ "learning_rate": 7.707800019682362e-05,
3959
+ "loss": 0.4438,
3960
+ "step": 198000
3961
+ },
3962
+ {
3963
+ "epoch": 12.13,
3964
+ "eval_loss": 0.8051833510398865,
3965
+ "eval_runtime": 1.2928,
3966
+ "eval_samples_per_second": 773.488,
3967
+ "eval_steps_per_second": 12.376,
3968
+ "step": 198000
3969
+ },
3970
+ {
3971
+ "epoch": 12.16,
3972
+ "learning_rate": 7.5844298143057e-05,
3973
+ "loss": 0.4437,
3974
+ "step": 198500
3975
+ },
3976
+ {
3977
+ "epoch": 12.19,
3978
+ "learning_rate": 7.462062010205106e-05,
3979
+ "loss": 0.4437,
3980
+ "step": 199000
3981
+ },
3982
+ {
3983
+ "epoch": 12.19,
3984
+ "eval_loss": 0.8041102290153503,
3985
+ "eval_runtime": 1.3104,
3986
+ "eval_samples_per_second": 763.133,
3987
+ "eval_steps_per_second": 12.21,
3988
+ "step": 199000
3989
+ },
3990
+ {
3991
+ "epoch": 12.22,
3992
+ "learning_rate": 7.340701960144751e-05,
3993
+ "loss": 0.4437,
3994
+ "step": 199500
3995
+ },
3996
+ {
3997
+ "epoch": 12.25,
3998
+ "learning_rate": 7.220354972806392e-05,
3999
+ "loss": 0.4434,
4000
+ "step": 200000
4001
+ },
4002
+ {
4003
+ "epoch": 12.25,
4004
+ "eval_loss": 0.8000948429107666,
4005
+ "eval_runtime": 1.2692,
4006
+ "eval_samples_per_second": 787.882,
4007
+ "eval_steps_per_second": 12.606,
4008
+ "step": 200000
4009
  }
4010
  ],
4011
  "max_steps": 250000,
4012
  "num_train_epochs": 16,
4013
+ "total_flos": 3.2032399312727006e+21,
4014
  "trial_name": null,
4015
  "trial_params": null
4016
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:40af1c76beae3fcf68a183dcf22a7c21a89e9eb4f6548fe295f14d1acc603f3a
3
  size 25761253
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:814e92d04e78b6c50be49334aded8445cc7160900174a209d8b6a93d4efe04ca
3
  size 25761253