jflotz commited on
Commit
33d542f
·
1 Parent(s): f764013

Training in progress, step 450000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a4ad94b9d5ed920cecf2394888d2a87ad3ee893c3c5ddbd5617ff00b81d3e84
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6931208249dada9481d4f15fa6a8eb29e66faab6c960f940cb42178be058ede5
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c579869ee7eca1f3cfb28c1e8c5c1a2c4c07c47eb9cbbb5f2453c93bc559b64a
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96730bdf9c613274e5002868e7a4d31f7cf6da025343ac9c04b48c36b22d877f
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:25a654aa71fcb8c8c7fac3eb694717c26bcb1def578ecd3a5246da103da0b6ce
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0eac835ca5ea6ff1c5bc401311ec38950e11a1e99d93162fecdaf18652f830d
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:25a654aa71fcb8c8c7fac3eb694717c26bcb1def578ecd3a5246da103da0b6ce
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0eac835ca5ea6ff1c5bc401311ec38950e11a1e99d93162fecdaf18652f830d
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:25a654aa71fcb8c8c7fac3eb694717c26bcb1def578ecd3a5246da103da0b6ce
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0eac835ca5ea6ff1c5bc401311ec38950e11a1e99d93162fecdaf18652f830d
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:25a654aa71fcb8c8c7fac3eb694717c26bcb1def578ecd3a5246da103da0b6ce
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0eac835ca5ea6ff1c5bc401311ec38950e11a1e99d93162fecdaf18652f830d
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:25a654aa71fcb8c8c7fac3eb694717c26bcb1def578ecd3a5246da103da0b6ce
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0eac835ca5ea6ff1c5bc401311ec38950e11a1e99d93162fecdaf18652f830d
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:25a654aa71fcb8c8c7fac3eb694717c26bcb1def578ecd3a5246da103da0b6ce
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0eac835ca5ea6ff1c5bc401311ec38950e11a1e99d93162fecdaf18652f830d
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:25a654aa71fcb8c8c7fac3eb694717c26bcb1def578ecd3a5246da103da0b6ce
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0eac835ca5ea6ff1c5bc401311ec38950e11a1e99d93162fecdaf18652f830d
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:25a654aa71fcb8c8c7fac3eb694717c26bcb1def578ecd3a5246da103da0b6ce
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0eac835ca5ea6ff1c5bc401311ec38950e11a1e99d93162fecdaf18652f830d
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1894f32b2441ea8820978bbb44f8f2d9ce0a579e669301efbd1655378591798a
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc29c434fb0390a8f4f90d65ac745a0b4f381dbd06e857762d450d4a464c7045
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 9.814418272662383,
5
- "global_step": 440000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -8806,11 +8806,211 @@
8806
  "eval_samples_per_second": 1153.611,
8807
  "eval_steps_per_second": 18.08,
8808
  "step": 440000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8809
  }
8810
  ],
8811
  "max_steps": 500000,
8812
  "num_train_epochs": 12,
8813
- "total_flos": 1.4057178017725262e+22,
8814
  "trial_name": null,
8815
  "trial_params": null
8816
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 10.037473233404711,
5
+ "global_step": 450000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
8806
  "eval_samples_per_second": 1153.611,
8807
  "eval_steps_per_second": 18.08,
8808
  "step": 440000
8809
+ },
8810
+ {
8811
+ "epoch": 9.83,
8812
+ "learning_rate": 2.1083388335824145e-05,
8813
+ "loss": 0.2568,
8814
+ "step": 440500
8815
+ },
8816
+ {
8817
+ "epoch": 9.84,
8818
+ "learning_rate": 2.0900255633978873e-05,
8819
+ "loss": 0.257,
8820
+ "step": 441000
8821
+ },
8822
+ {
8823
+ "epoch": 9.84,
8824
+ "eval_loss": 0.23978090286254883,
8825
+ "eval_runtime": 2.0085,
8826
+ "eval_samples_per_second": 1143.638,
8827
+ "eval_steps_per_second": 17.924,
8828
+ "step": 441000
8829
+ },
8830
+ {
8831
+ "epoch": 9.85,
8832
+ "learning_rate": 2.0718589425453314e-05,
8833
+ "loss": 0.2559,
8834
+ "step": 441500
8835
+ },
8836
+ {
8837
+ "epoch": 9.86,
8838
+ "learning_rate": 2.0538391696920015e-05,
8839
+ "loss": 0.2559,
8840
+ "step": 442000
8841
+ },
8842
+ {
8843
+ "epoch": 9.86,
8844
+ "eval_loss": 0.23992407321929932,
8845
+ "eval_runtime": 1.9856,
8846
+ "eval_samples_per_second": 1156.855,
8847
+ "eval_steps_per_second": 18.131,
8848
+ "step": 442000
8849
+ },
8850
+ {
8851
+ "epoch": 9.87,
8852
+ "learning_rate": 2.035966441899249e-05,
8853
+ "loss": 0.2557,
8854
+ "step": 442500
8855
+ },
8856
+ {
8857
+ "epoch": 9.88,
8858
+ "learning_rate": 2.0182409546203555e-05,
8859
+ "loss": 0.2556,
8860
+ "step": 443000
8861
+ },
8862
+ {
8863
+ "epoch": 9.88,
8864
+ "eval_loss": 0.23771096765995026,
8865
+ "eval_runtime": 1.9588,
8866
+ "eval_samples_per_second": 1172.656,
8867
+ "eval_steps_per_second": 18.379,
8868
+ "step": 443000
8869
+ },
8870
+ {
8871
+ "epoch": 9.89,
8872
+ "learning_rate": 2.000662901698415e-05,
8873
+ "loss": 0.2562,
8874
+ "step": 443500
8875
+ },
8876
+ {
8877
+ "epoch": 9.9,
8878
+ "learning_rate": 1.983232475364195e-05,
8879
+ "loss": 0.2565,
8880
+ "step": 444000
8881
+ },
8882
+ {
8883
+ "epoch": 9.9,
8884
+ "eval_loss": 0.24137504398822784,
8885
+ "eval_runtime": 1.9524,
8886
+ "eval_samples_per_second": 1176.51,
8887
+ "eval_steps_per_second": 18.439,
8888
+ "step": 444000
8889
+ },
8890
+ {
8891
+ "epoch": 9.91,
8892
+ "learning_rate": 1.9659498662340474e-05,
8893
+ "loss": 0.2563,
8894
+ "step": 444500
8895
+ },
8896
+ {
8897
+ "epoch": 9.93,
8898
+ "learning_rate": 1.948815263307819e-05,
8899
+ "loss": 0.2562,
8900
+ "step": 445000
8901
+ },
8902
+ {
8903
+ "epoch": 9.93,
8904
+ "eval_loss": 0.23801474273204803,
8905
+ "eval_runtime": 2.0266,
8906
+ "eval_samples_per_second": 1133.424,
8907
+ "eval_steps_per_second": 17.764,
8908
+ "step": 445000
8909
+ },
8910
+ {
8911
+ "epoch": 9.94,
8912
+ "learning_rate": 1.9318288539667765e-05,
8913
+ "loss": 0.2562,
8914
+ "step": 445500
8915
+ },
8916
+ {
8917
+ "epoch": 9.95,
8918
+ "learning_rate": 1.914990823971574e-05,
8919
+ "loss": 0.2558,
8920
+ "step": 446000
8921
+ },
8922
+ {
8923
+ "epoch": 9.95,
8924
+ "eval_loss": 0.23844870924949646,
8925
+ "eval_runtime": 2.0906,
8926
+ "eval_samples_per_second": 1098.726,
8927
+ "eval_steps_per_second": 17.22,
8928
+ "step": 446000
8929
+ },
8930
+ {
8931
+ "epoch": 9.96,
8932
+ "learning_rate": 1.8983013574602096e-05,
8933
+ "loss": 0.2559,
8934
+ "step": 446500
8935
+ },
8936
+ {
8937
+ "epoch": 9.97,
8938
+ "learning_rate": 1.8817606369460156e-05,
8939
+ "loss": 0.2555,
8940
+ "step": 447000
8941
+ },
8942
+ {
8943
+ "epoch": 9.97,
8944
+ "eval_loss": 0.23887751996517181,
8945
+ "eval_runtime": 2.059,
8946
+ "eval_samples_per_second": 1115.606,
8947
+ "eval_steps_per_second": 17.484,
8948
+ "step": 447000
8949
+ },
8950
+ {
8951
+ "epoch": 9.98,
8952
+ "learning_rate": 1.865368843315663e-05,
8953
+ "loss": 0.2561,
8954
+ "step": 447500
8955
+ },
8956
+ {
8957
+ "epoch": 9.99,
8958
+ "learning_rate": 1.8491261558271762e-05,
8959
+ "loss": 0.2558,
8960
+ "step": 448000
8961
+ },
8962
+ {
8963
+ "epoch": 9.99,
8964
+ "eval_loss": 0.2371719628572464,
8965
+ "eval_runtime": 1.9897,
8966
+ "eval_samples_per_second": 1154.434,
8967
+ "eval_steps_per_second": 18.093,
8968
+ "step": 448000
8969
+ },
8970
+ {
8971
+ "epoch": 10.0,
8972
+ "learning_rate": 1.833032752107986e-05,
8973
+ "loss": 0.256,
8974
+ "step": 448500
8975
+ },
8976
+ {
8977
+ "epoch": 10.02,
8978
+ "learning_rate": 1.817088808152978e-05,
8979
+ "loss": 0.2563,
8980
+ "step": 449000
8981
+ },
8982
+ {
8983
+ "epoch": 10.02,
8984
+ "eval_loss": 0.23607970774173737,
8985
+ "eval_runtime": 2.0104,
8986
+ "eval_samples_per_second": 1142.561,
8987
+ "eval_steps_per_second": 17.907,
8988
+ "step": 449000
8989
+ },
8990
+ {
8991
+ "epoch": 10.03,
8992
+ "learning_rate": 1.801294498322569e-05,
8993
+ "loss": 0.2559,
8994
+ "step": 449500
8995
+ },
8996
+ {
8997
+ "epoch": 10.04,
8998
+ "learning_rate": 1.7856499953407978e-05,
8999
+ "loss": 0.2555,
9000
+ "step": 450000
9001
+ },
9002
+ {
9003
+ "epoch": 10.04,
9004
+ "eval_loss": 0.238793283700943,
9005
+ "eval_runtime": 2.0158,
9006
+ "eval_samples_per_second": 1139.483,
9007
+ "eval_steps_per_second": 17.859,
9008
+ "step": 450000
9009
  }
9010
  ],
9011
  "max_steps": 500000,
9012
  "num_train_epochs": 12,
9013
+ "total_flos": 1.4376636702238347e+22,
9014
  "trial_name": null,
9015
  "trial_params": null
9016
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c579869ee7eca1f3cfb28c1e8c5c1a2c4c07c47eb9cbbb5f2453c93bc559b64a
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96730bdf9c613274e5002868e7a4d31f7cf6da025343ac9c04b48c36b22d877f
3
  size 102501541