plip commited on
Commit
b4036da
1 Parent(s): 8bc6e89

Training in progress, step 110000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:229f1f5ff8c8c98bdcec06bdaa6629d9502e6ccb2217aeaa76f4e33e88972e4f
3
  size 50044689
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c0c13c2b4862e7a34ebc8b922768e1e9178a36b674a1308c54623a6b682441d
3
  size 50044689
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:68cdcd4cdbc673fcb49958bfe32cced5d0dfd7765e48765f4a191aa568bbef48
3
  size 25761253
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a822dc652f5b5007dd659ff9a97261c9af2e549d16d025cb7bff7d5d1f73941
3
  size 25761253
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9dcfba93a07c8445e392e88db40749e6e684f371330279ce6dcb90e24daa020a
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b376139e7a9e28339f66d6b916608b35982b8acd6efb16cc7b604be132a5044c
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9dcfba93a07c8445e392e88db40749e6e684f371330279ce6dcb90e24daa020a
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b376139e7a9e28339f66d6b916608b35982b8acd6efb16cc7b604be132a5044c
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9dcfba93a07c8445e392e88db40749e6e684f371330279ce6dcb90e24daa020a
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b376139e7a9e28339f66d6b916608b35982b8acd6efb16cc7b604be132a5044c
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9dcfba93a07c8445e392e88db40749e6e684f371330279ce6dcb90e24daa020a
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b376139e7a9e28339f66d6b916608b35982b8acd6efb16cc7b604be132a5044c
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9dcfba93a07c8445e392e88db40749e6e684f371330279ce6dcb90e24daa020a
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b376139e7a9e28339f66d6b916608b35982b8acd6efb16cc7b604be132a5044c
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9dcfba93a07c8445e392e88db40749e6e684f371330279ce6dcb90e24daa020a
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b376139e7a9e28339f66d6b916608b35982b8acd6efb16cc7b604be132a5044c
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9dcfba93a07c8445e392e88db40749e6e684f371330279ce6dcb90e24daa020a
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b376139e7a9e28339f66d6b916608b35982b8acd6efb16cc7b604be132a5044c
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9dcfba93a07c8445e392e88db40749e6e684f371330279ce6dcb90e24daa020a
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b376139e7a9e28339f66d6b916608b35982b8acd6efb16cc7b604be132a5044c
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d85ea74361bfabc4dca40ed2a4dec24f25124d91f625a1176acad7044d70175
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe4bb2f202e1c5bfac6f58d7d7aff54991c6919cce0ee2976f31297f1718992f
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 6.127450980392156,
5
- "global_step": 100000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -2006,11 +2006,211 @@
2006
  "eval_samples_per_second": 767.786,
2007
  "eval_steps_per_second": 12.285,
2008
  "step": 100000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2009
  }
2010
  ],
2011
  "max_steps": 250000,
2012
  "num_train_epochs": 16,
2013
- "total_flos": 1.6016199656363503e+21,
2014
  "trial_name": null,
2015
  "trial_params": null
2016
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 6.740196078431373,
5
+ "global_step": 110000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
2006
  "eval_samples_per_second": 767.786,
2007
  "eval_steps_per_second": 12.285,
2008
  "step": 100000
2009
+ },
2010
+ {
2011
+ "epoch": 6.16,
2012
+ "learning_rate": 0.00042171081131341917,
2013
+ "loss": 0.4877,
2014
+ "step": 100500
2015
+ },
2016
+ {
2017
+ "epoch": 6.19,
2018
+ "learning_rate": 0.00041991636704523497,
2019
+ "loss": 0.4868,
2020
+ "step": 101000
2021
+ },
2022
+ {
2023
+ "epoch": 6.19,
2024
+ "eval_loss": 0.8239555954933167,
2025
+ "eval_runtime": 1.2567,
2026
+ "eval_samples_per_second": 795.735,
2027
+ "eval_steps_per_second": 12.732,
2028
+ "step": 101000
2029
+ },
2030
+ {
2031
+ "epoch": 6.22,
2032
+ "learning_rate": 0.00041811689596287893,
2033
+ "loss": 0.4864,
2034
+ "step": 101500
2035
+ },
2036
+ {
2037
+ "epoch": 6.25,
2038
+ "learning_rate": 0.0004163124767810454,
2039
+ "loss": 0.4861,
2040
+ "step": 102000
2041
+ },
2042
+ {
2043
+ "epoch": 6.25,
2044
+ "eval_loss": 0.8212010264396667,
2045
+ "eval_runtime": 1.2247,
2046
+ "eval_samples_per_second": 816.535,
2047
+ "eval_steps_per_second": 13.065,
2048
+ "step": 102000
2049
+ },
2050
+ {
2051
+ "epoch": 6.28,
2052
+ "learning_rate": 0.00041450318843087506,
2053
+ "loss": 0.4858,
2054
+ "step": 102500
2055
+ },
2056
+ {
2057
+ "epoch": 6.31,
2058
+ "learning_rate": 0.00041268911005650166,
2059
+ "loss": 0.4854,
2060
+ "step": 103000
2061
+ },
2062
+ {
2063
+ "epoch": 6.31,
2064
+ "eval_loss": 0.8242572546005249,
2065
+ "eval_runtime": 1.357,
2066
+ "eval_samples_per_second": 736.942,
2067
+ "eval_steps_per_second": 11.791,
2068
+ "step": 103000
2069
+ },
2070
+ {
2071
+ "epoch": 6.34,
2072
+ "learning_rate": 0.00041087032101159006,
2073
+ "loss": 0.4851,
2074
+ "step": 103500
2075
+ },
2076
+ {
2077
+ "epoch": 6.37,
2078
+ "learning_rate": 0.00040904690085586515,
2079
+ "loss": 0.4847,
2080
+ "step": 104000
2081
+ },
2082
+ {
2083
+ "epoch": 6.37,
2084
+ "eval_loss": 0.8227641582489014,
2085
+ "eval_runtime": 1.3326,
2086
+ "eval_samples_per_second": 750.437,
2087
+ "eval_steps_per_second": 12.007,
2088
+ "step": 104000
2089
+ },
2090
+ {
2091
+ "epoch": 6.4,
2092
+ "learning_rate": 0.0004072189293516316,
2093
+ "loss": 0.4844,
2094
+ "step": 104500
2095
+ },
2096
+ {
2097
+ "epoch": 6.43,
2098
+ "learning_rate": 0.0004053864864602847,
2099
+ "loss": 0.4841,
2100
+ "step": 105000
2101
+ },
2102
+ {
2103
+ "epoch": 6.43,
2104
+ "eval_loss": 0.8184976577758789,
2105
+ "eval_runtime": 1.3395,
2106
+ "eval_samples_per_second": 746.538,
2107
+ "eval_steps_per_second": 11.945,
2108
+ "step": 105000
2109
+ },
2110
+ {
2111
+ "epoch": 6.46,
2112
+ "learning_rate": 0.00040354965233881297,
2113
+ "loss": 0.4846,
2114
+ "step": 105500
2115
+ },
2116
+ {
2117
+ "epoch": 6.5,
2118
+ "learning_rate": 0.0004017085073362913,
2119
+ "loss": 0.4837,
2120
+ "step": 106000
2121
+ },
2122
+ {
2123
+ "epoch": 6.5,
2124
+ "eval_loss": 0.8177208304405212,
2125
+ "eval_runtime": 1.2838,
2126
+ "eval_samples_per_second": 778.932,
2127
+ "eval_steps_per_second": 12.463,
2128
+ "step": 106000
2129
+ },
2130
+ {
2131
+ "epoch": 6.53,
2132
+ "learning_rate": 0.00039986313199036664,
2133
+ "loss": 0.4828,
2134
+ "step": 106500
2135
+ },
2136
+ {
2137
+ "epoch": 6.56,
2138
+ "learning_rate": 0.00039801360702373484,
2139
+ "loss": 0.4827,
2140
+ "step": 107000
2141
+ },
2142
+ {
2143
+ "epoch": 6.56,
2144
+ "eval_loss": 0.8140051364898682,
2145
+ "eval_runtime": 1.3554,
2146
+ "eval_samples_per_second": 737.763,
2147
+ "eval_steps_per_second": 11.804,
2148
+ "step": 107000
2149
+ },
2150
+ {
2151
+ "epoch": 6.59,
2152
+ "learning_rate": 0.00039616001334060954,
2153
+ "loss": 0.4824,
2154
+ "step": 107500
2155
+ },
2156
+ {
2157
+ "epoch": 6.62,
2158
+ "learning_rate": 0.00039430243202318314,
2159
+ "loss": 0.4819,
2160
+ "step": 108000
2161
+ },
2162
+ {
2163
+ "epoch": 6.62,
2164
+ "eval_loss": 0.8147432208061218,
2165
+ "eval_runtime": 1.3502,
2166
+ "eval_samples_per_second": 740.632,
2167
+ "eval_steps_per_second": 11.85,
2168
+ "step": 108000
2169
+ },
2170
+ {
2171
+ "epoch": 6.65,
2172
+ "learning_rate": 0.00039244094432808034,
2173
+ "loss": 0.4816,
2174
+ "step": 108500
2175
+ },
2176
+ {
2177
+ "epoch": 6.68,
2178
+ "learning_rate": 0.0003905756316828033,
2179
+ "loss": 0.4813,
2180
+ "step": 109000
2181
+ },
2182
+ {
2183
+ "epoch": 6.68,
2184
+ "eval_loss": 0.8172094225883484,
2185
+ "eval_runtime": 1.4168,
2186
+ "eval_samples_per_second": 705.811,
2187
+ "eval_steps_per_second": 11.293,
2188
+ "step": 109000
2189
+ },
2190
+ {
2191
+ "epoch": 6.71,
2192
+ "learning_rate": 0.00038870657568216963,
2193
+ "loss": 0.4808,
2194
+ "step": 109500
2195
+ },
2196
+ {
2197
+ "epoch": 6.74,
2198
+ "learning_rate": 0.00038683385808474416,
2199
+ "loss": 0.4807,
2200
+ "step": 110000
2201
+ },
2202
+ {
2203
+ "epoch": 6.74,
2204
+ "eval_loss": 0.8148666024208069,
2205
+ "eval_runtime": 1.2865,
2206
+ "eval_samples_per_second": 777.303,
2207
+ "eval_steps_per_second": 12.437,
2208
+ "step": 110000
2209
  }
2210
  ],
2211
  "max_steps": 250000,
2212
  "num_train_epochs": 16,
2213
+ "total_flos": 1.7617879685021544e+21,
2214
  "trial_name": null,
2215
  "trial_params": null
2216
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:68cdcd4cdbc673fcb49958bfe32cced5d0dfd7765e48765f4a191aa568bbef48
3
  size 25761253
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a822dc652f5b5007dd659ff9a97261c9af2e549d16d025cb7bff7d5d1f73941
3
  size 25761253