marinone94 commited on
Commit
216b1e3
β€’
1 Parent(s): 57a6490

Training in progress, step 350

Browse files
{checkpoint-150 β†’ checkpoint-350}/config.json RENAMED
File without changes
{checkpoint-150 β†’ checkpoint-350}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a9f8e6594c4e496478e4866e70b042c6a88c87612acaf661f65750f58a301e4f
3
- size 2490361937
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:646667c125c12f737d1ffee17d6371268d6575f3796018a92eb4102f5f1fff0b
3
+ size 2490362385
{checkpoint-150 β†’ checkpoint-350}/preprocessor_config.json RENAMED
File without changes
{checkpoint-150 β†’ checkpoint-350}/pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:386439db8a06ec7bb8279d96099b6db5fee78683933dd8e79efdb684fee3e2c0
3
  size 1262075377
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0d76767012f4da5dc0d5f53230af28368c23a384f4a2b862a3afb09aea2dc6d
3
  size 1262075377
{checkpoint-150 β†’ checkpoint-350}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f704415bae2d9ff4df8dc44b817ddfc862d4dd0c72e611cb60926dc7d7e582d1
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4671c6ef20752fdeca41dd978833212c15422e660369baad61a24c693eba960d
3
  size 14567
{checkpoint-150 β†’ checkpoint-350}/scaler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:968c71c75fc57c68b8d331de92abbf974f36bbab2c395cec452f6a60d0999a20
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11a38190a29b2e515a160c453a45f3b2acc23e2c8c2240009e053ed0dbf017f0
3
  size 559
{checkpoint-150 β†’ checkpoint-350}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dac0ff029249bda5eded6c55167fca809c071e881ce886494b4c2d2d40b64c39
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68cb16be7c67302d24fc36708cbe6b5ff6ca823143d0ed4ccd59b12de9852185
3
  size 623
{checkpoint-150 β†’ checkpoint-350}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.43509789702683105,
5
- "global_step": 150,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -123,11 +123,167 @@
123
  "eval_steps_per_second": 4.573,
124
  "eval_wer": 1.0,
125
  "step": 150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  }
127
  ],
128
  "max_steps": 688,
129
  "num_train_epochs": 2,
130
- "total_flos": 4.4091799296804864e+17,
131
  "trial_name": null,
132
  "trial_params": null
133
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0174039158810733,
5
+ "global_step": 350,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
123
  "eval_steps_per_second": 4.573,
124
  "eval_wer": 1.0,
125
  "step": 150
126
+ },
127
+ {
128
+ "epoch": 0.46,
129
+ "learning_rate": 5.9393712574850293e-05,
130
+ "loss": 3.0588,
131
+ "step": 160
132
+ },
133
+ {
134
+ "epoch": 0.49,
135
+ "learning_rate": 5.827095808383233e-05,
136
+ "loss": 3.0477,
137
+ "step": 170
138
+ },
139
+ {
140
+ "epoch": 0.52,
141
+ "learning_rate": 5.714820359281436e-05,
142
+ "loss": 3.045,
143
+ "step": 180
144
+ },
145
+ {
146
+ "epoch": 0.55,
147
+ "learning_rate": 5.602544910179641e-05,
148
+ "loss": 3.0439,
149
+ "step": 190
150
+ },
151
+ {
152
+ "epoch": 0.58,
153
+ "learning_rate": 5.490269461077844e-05,
154
+ "loss": 3.0846,
155
+ "step": 200
156
+ },
157
+ {
158
+ "epoch": 0.58,
159
+ "eval_loss": 3.079519271850586,
160
+ "eval_runtime": 125.7215,
161
+ "eval_samples_per_second": 36.748,
162
+ "eval_steps_per_second": 4.597,
163
+ "eval_wer": 1.0,
164
+ "step": 200
165
+ },
166
+ {
167
+ "epoch": 0.61,
168
+ "learning_rate": 5.3779940119760477e-05,
169
+ "loss": 3.0512,
170
+ "step": 210
171
+ },
172
+ {
173
+ "epoch": 0.64,
174
+ "learning_rate": 5.265718562874251e-05,
175
+ "loss": 3.0143,
176
+ "step": 220
177
+ },
178
+ {
179
+ "epoch": 0.67,
180
+ "learning_rate": 5.1534431137724546e-05,
181
+ "loss": 3.0387,
182
+ "step": 230
183
+ },
184
+ {
185
+ "epoch": 0.7,
186
+ "learning_rate": 5.0411676646706584e-05,
187
+ "loss": 3.0311,
188
+ "step": 240
189
+ },
190
+ {
191
+ "epoch": 0.73,
192
+ "learning_rate": 4.9288922155688615e-05,
193
+ "loss": 3.0417,
194
+ "step": 250
195
+ },
196
+ {
197
+ "epoch": 0.73,
198
+ "eval_loss": 3.069390058517456,
199
+ "eval_runtime": 125.7339,
200
+ "eval_samples_per_second": 36.744,
201
+ "eval_steps_per_second": 4.597,
202
+ "eval_wer": 1.0,
203
+ "step": 250
204
+ },
205
+ {
206
+ "epoch": 0.75,
207
+ "learning_rate": 4.816616766467066e-05,
208
+ "loss": 3.0219,
209
+ "step": 260
210
+ },
211
+ {
212
+ "epoch": 0.78,
213
+ "learning_rate": 4.704341317365269e-05,
214
+ "loss": 3.0194,
215
+ "step": 270
216
+ },
217
+ {
218
+ "epoch": 0.81,
219
+ "learning_rate": 4.592065868263473e-05,
220
+ "loss": 2.9974,
221
+ "step": 280
222
+ },
223
+ {
224
+ "epoch": 0.84,
225
+ "learning_rate": 4.479790419161676e-05,
226
+ "loss": 2.9996,
227
+ "step": 290
228
+ },
229
+ {
230
+ "epoch": 0.87,
231
+ "learning_rate": 4.36751497005988e-05,
232
+ "loss": 3.0016,
233
+ "step": 300
234
+ },
235
+ {
236
+ "epoch": 0.87,
237
+ "eval_loss": 3.0347490310668945,
238
+ "eval_runtime": 132.3039,
239
+ "eval_samples_per_second": 34.92,
240
+ "eval_steps_per_second": 4.369,
241
+ "eval_wer": 1.0,
242
+ "step": 300
243
+ },
244
+ {
245
+ "epoch": 0.9,
246
+ "learning_rate": 4.255239520958083e-05,
247
+ "loss": 3.0052,
248
+ "step": 310
249
+ },
250
+ {
251
+ "epoch": 0.93,
252
+ "learning_rate": 4.142964071856287e-05,
253
+ "loss": 2.9826,
254
+ "step": 320
255
+ },
256
+ {
257
+ "epoch": 0.96,
258
+ "learning_rate": 4.030688622754491e-05,
259
+ "loss": 2.9747,
260
+ "step": 330
261
+ },
262
+ {
263
+ "epoch": 0.99,
264
+ "learning_rate": 3.918413173652694e-05,
265
+ "loss": 2.9617,
266
+ "step": 340
267
+ },
268
+ {
269
+ "epoch": 1.02,
270
+ "learning_rate": 3.806137724550898e-05,
271
+ "loss": 3.2053,
272
+ "step": 350
273
+ },
274
+ {
275
+ "epoch": 1.02,
276
+ "eval_loss": 2.984886407852173,
277
+ "eval_runtime": 149.1508,
278
+ "eval_samples_per_second": 30.975,
279
+ "eval_steps_per_second": 3.875,
280
+ "eval_wer": 1.0,
281
+ "step": 350
282
  }
283
  ],
284
  "max_steps": 688,
285
  "num_train_epochs": 2,
286
+ "total_flos": 1.0437262031798554e+18,
287
  "trial_name": null,
288
  "trial_params": null
289
  }
{checkpoint-150 β†’ checkpoint-350}/training_args.bin RENAMED
File without changes
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8edb565b658376cb9cf7a44bbc1bfea5a26ebcd17d3f36739a8535e85a13dcee
3
  size 1262075377
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0d76767012f4da5dc0d5f53230af28368c23a384f4a2b862a3afb09aea2dc6d
3
  size 1262075377