plip commited on
Commit
8bc6e89
1 Parent(s): 5061a60

Training in progress, step 100000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8be6802b9d992ac2c3994565edfc5567d3f523032a5e6e5411ff08074758c58f
3
  size 50044689
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:229f1f5ff8c8c98bdcec06bdaa6629d9502e6ccb2217aeaa76f4e33e88972e4f
3
  size 50044689
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a0ed0a890cada09375208ef03bdfdd6d63ca0c33270b092aa04ab6ca525c88e1
3
  size 25761253
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68cdcd4cdbc673fcb49958bfe32cced5d0dfd7765e48765f4a191aa568bbef48
3
  size 25761253
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6c6f8570c2dac95236cdc5d351a3c8ca55f21ec8e5bc0a65475a416cca474cb1
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9dcfba93a07c8445e392e88db40749e6e684f371330279ce6dcb90e24daa020a
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6c6f8570c2dac95236cdc5d351a3c8ca55f21ec8e5bc0a65475a416cca474cb1
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9dcfba93a07c8445e392e88db40749e6e684f371330279ce6dcb90e24daa020a
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6c6f8570c2dac95236cdc5d351a3c8ca55f21ec8e5bc0a65475a416cca474cb1
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9dcfba93a07c8445e392e88db40749e6e684f371330279ce6dcb90e24daa020a
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6c6f8570c2dac95236cdc5d351a3c8ca55f21ec8e5bc0a65475a416cca474cb1
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9dcfba93a07c8445e392e88db40749e6e684f371330279ce6dcb90e24daa020a
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6c6f8570c2dac95236cdc5d351a3c8ca55f21ec8e5bc0a65475a416cca474cb1
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9dcfba93a07c8445e392e88db40749e6e684f371330279ce6dcb90e24daa020a
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6c6f8570c2dac95236cdc5d351a3c8ca55f21ec8e5bc0a65475a416cca474cb1
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9dcfba93a07c8445e392e88db40749e6e684f371330279ce6dcb90e24daa020a
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6c6f8570c2dac95236cdc5d351a3c8ca55f21ec8e5bc0a65475a416cca474cb1
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9dcfba93a07c8445e392e88db40749e6e684f371330279ce6dcb90e24daa020a
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6c6f8570c2dac95236cdc5d351a3c8ca55f21ec8e5bc0a65475a416cca474cb1
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9dcfba93a07c8445e392e88db40749e6e684f371330279ce6dcb90e24daa020a
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:36f11c380127c9b2248f3452b83eb7e7a6efc224b0b84b63651e3db0e819c91c
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d85ea74361bfabc4dca40ed2a4dec24f25124d91f625a1176acad7044d70175
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 5.514705882352941,
5
- "global_step": 90000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1806,11 +1806,211 @@
1806
  "eval_samples_per_second": 793.671,
1807
  "eval_steps_per_second": 12.699,
1808
  "step": 90000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1809
  }
1810
  ],
1811
  "max_steps": 250000,
1812
  "num_train_epochs": 16,
1813
- "total_flos": 1.4414619732741613e+21,
1814
  "trial_name": null,
1815
  "trial_params": null
1816
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 6.127450980392156,
5
+ "global_step": 100000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1806
  "eval_samples_per_second": 793.671,
1807
  "eval_steps_per_second": 12.699,
1808
  "step": 90000
1809
+ },
1810
+ {
1811
+ "epoch": 5.55,
1812
+ "learning_rate": 0.00045642490670472436,
1813
+ "loss": 0.4947,
1814
+ "step": 90500
1815
+ },
1816
+ {
1817
+ "epoch": 5.58,
1818
+ "learning_rate": 0.0004547471653280225,
1819
+ "loss": 0.4946,
1820
+ "step": 91000
1821
+ },
1822
+ {
1823
+ "epoch": 5.58,
1824
+ "eval_loss": 0.8400516510009766,
1825
+ "eval_runtime": 1.2358,
1826
+ "eval_samples_per_second": 809.182,
1827
+ "eval_steps_per_second": 12.947,
1828
+ "step": 91000
1829
+ },
1830
+ {
1831
+ "epoch": 5.61,
1832
+ "learning_rate": 0.00045306287352519543,
1833
+ "loss": 0.4939,
1834
+ "step": 91500
1835
+ },
1836
+ {
1837
+ "epoch": 5.64,
1838
+ "learning_rate": 0.00045137210497262333,
1839
+ "loss": 0.4935,
1840
+ "step": 92000
1841
+ },
1842
+ {
1843
+ "epoch": 5.64,
1844
+ "eval_loss": 0.8373098969459534,
1845
+ "eval_runtime": 1.3142,
1846
+ "eval_samples_per_second": 760.929,
1847
+ "eval_steps_per_second": 12.175,
1848
+ "step": 92000
1849
+ },
1850
+ {
1851
+ "epoch": 5.67,
1852
+ "learning_rate": 0.0004496749336299999,
1853
+ "loss": 0.4931,
1854
+ "step": 92500
1855
+ },
1856
+ {
1857
+ "epoch": 5.7,
1858
+ "learning_rate": 0.0004479714337370977,
1859
+ "loss": 0.4929,
1860
+ "step": 93000
1861
+ },
1862
+ {
1863
+ "epoch": 5.7,
1864
+ "eval_loss": 0.840123176574707,
1865
+ "eval_runtime": 1.2896,
1866
+ "eval_samples_per_second": 775.451,
1867
+ "eval_steps_per_second": 12.407,
1868
+ "step": 93000
1869
+ },
1870
+ {
1871
+ "epoch": 5.73,
1872
+ "learning_rate": 0.00044626167981052036,
1873
+ "loss": 0.4924,
1874
+ "step": 93500
1875
+ },
1876
+ {
1877
+ "epoch": 5.76,
1878
+ "learning_rate": 0.00044454574664044404,
1879
+ "loss": 0.492,
1880
+ "step": 94000
1881
+ },
1882
+ {
1883
+ "epoch": 5.76,
1884
+ "eval_loss": 0.8355880379676819,
1885
+ "eval_runtime": 1.2671,
1886
+ "eval_samples_per_second": 789.19,
1887
+ "eval_steps_per_second": 12.627,
1888
+ "step": 94000
1889
+ },
1890
+ {
1891
+ "epoch": 5.79,
1892
+ "learning_rate": 0.000442823709287344,
1893
+ "loss": 0.4916,
1894
+ "step": 94500
1895
+ },
1896
+ {
1897
+ "epoch": 5.82,
1898
+ "learning_rate": 0.0004410956430787129,
1899
+ "loss": 0.4912,
1900
+ "step": 95000
1901
+ },
1902
+ {
1903
+ "epoch": 5.82,
1904
+ "eval_loss": 0.8333644866943359,
1905
+ "eval_runtime": 1.299,
1906
+ "eval_samples_per_second": 769.834,
1907
+ "eval_steps_per_second": 12.317,
1908
+ "step": 95000
1909
+ },
1910
+ {
1911
+ "epoch": 5.85,
1912
+ "learning_rate": 0.0004393616236057647,
1913
+ "loss": 0.4912,
1914
+ "step": 95500
1915
+ },
1916
+ {
1917
+ "epoch": 5.88,
1918
+ "learning_rate": 0.00043762172672012875,
1919
+ "loss": 0.4904,
1920
+ "step": 96000
1921
+ },
1922
+ {
1923
+ "epoch": 5.88,
1924
+ "eval_loss": 0.8280515074729919,
1925
+ "eval_runtime": 1.2803,
1926
+ "eval_samples_per_second": 781.038,
1927
+ "eval_steps_per_second": 12.497,
1928
+ "step": 96000
1929
+ },
1930
+ {
1931
+ "epoch": 5.91,
1932
+ "learning_rate": 0.0004358760285305312,
1933
+ "loss": 0.4901,
1934
+ "step": 96500
1935
+ },
1936
+ {
1937
+ "epoch": 5.94,
1938
+ "learning_rate": 0.0004341246053994663,
1939
+ "loss": 0.4898,
1940
+ "step": 97000
1941
+ },
1942
+ {
1943
+ "epoch": 5.94,
1944
+ "eval_loss": 0.8338386416435242,
1945
+ "eval_runtime": 1.3216,
1946
+ "eval_samples_per_second": 756.685,
1947
+ "eval_steps_per_second": 12.107,
1948
+ "step": 97000
1949
+ },
1950
+ {
1951
+ "epoch": 5.97,
1952
+ "learning_rate": 0.00043236753393985534,
1953
+ "loss": 0.4892,
1954
+ "step": 97500
1955
+ },
1956
+ {
1957
+ "epoch": 6.0,
1958
+ "learning_rate": 0.0004306048910116964,
1959
+ "loss": 0.4891,
1960
+ "step": 98000
1961
+ },
1962
+ {
1963
+ "epoch": 6.0,
1964
+ "eval_loss": 0.8300430774688721,
1965
+ "eval_runtime": 1.316,
1966
+ "eval_samples_per_second": 759.879,
1967
+ "eval_steps_per_second": 12.158,
1968
+ "step": 98000
1969
+ },
1970
+ {
1971
+ "epoch": 6.04,
1972
+ "learning_rate": 0.0004288367537187012,
1973
+ "loss": 0.4887,
1974
+ "step": 98500
1975
+ },
1976
+ {
1977
+ "epoch": 6.07,
1978
+ "learning_rate": 0.00042706319940492284,
1979
+ "loss": 0.4882,
1980
+ "step": 99000
1981
+ },
1982
+ {
1983
+ "epoch": 6.07,
1984
+ "eval_loss": 0.8262238502502441,
1985
+ "eval_runtime": 1.3745,
1986
+ "eval_samples_per_second": 727.548,
1987
+ "eval_steps_per_second": 11.641,
1988
+ "step": 99000
1989
+ },
1990
+ {
1991
+ "epoch": 6.1,
1992
+ "learning_rate": 0.00042528430565137254,
1993
+ "loss": 0.488,
1994
+ "step": 99500
1995
+ },
1996
+ {
1997
+ "epoch": 6.13,
1998
+ "learning_rate": 0.00042350015027262593,
1999
+ "loss": 0.4876,
2000
+ "step": 100000
2001
+ },
2002
+ {
2003
+ "epoch": 6.13,
2004
+ "eval_loss": 0.8171582221984863,
2005
+ "eval_runtime": 1.3024,
2006
+ "eval_samples_per_second": 767.786,
2007
+ "eval_steps_per_second": 12.285,
2008
+ "step": 100000
2009
  }
2010
  ],
2011
  "max_steps": 250000,
2012
  "num_train_epochs": 16,
2013
+ "total_flos": 1.6016199656363503e+21,
2014
  "trial_name": null,
2015
  "trial_params": null
2016
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a0ed0a890cada09375208ef03bdfdd6d63ca0c33270b092aa04ab6ca525c88e1
3
  size 25761253
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68cdcd4cdbc673fcb49958bfe32cced5d0dfd7765e48765f4a191aa568bbef48
3
  size 25761253