anuragshas commited on
Commit
b9b42b4
1 Parent(s): 68a6e57

End of training

Browse files
Files changed (4) hide show
  1. all_results.json +15 -0
  2. eval_results.json +10 -0
  3. train_results.json +8 -0
  4. trainer_state.json +751 -0
all_results.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 49.95,
3
+ "eval_cer": 0.06599821087248926,
4
+ "eval_loss": 0.22972388565540314,
5
+ "eval_runtime": 384.1717,
6
+ "eval_samples": 7748,
7
+ "eval_samples_per_second": 20.168,
8
+ "eval_steps_per_second": 0.318,
9
+ "eval_wer": 0.28497263925915534,
10
+ "train_loss": 1.5808194972818148,
11
+ "train_runtime": 72373.5093,
12
+ "train_samples": 22251,
13
+ "train_samples_per_second": 15.373,
14
+ "train_steps_per_second": 0.12
15
+ }
eval_results.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 49.95,
3
+ "eval_cer": 0.06599821087248926,
4
+ "eval_loss": 0.22972388565540314,
5
+ "eval_runtime": 384.1717,
6
+ "eval_samples": 7748,
7
+ "eval_samples_per_second": 20.168,
8
+ "eval_steps_per_second": 0.318,
9
+ "eval_wer": 0.28497263925915534
10
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 49.95,
3
+ "train_loss": 1.5808194972818148,
4
+ "train_runtime": 72373.5093,
5
+ "train_samples": 22251,
6
+ "train_samples_per_second": 15.373,
7
+ "train_steps_per_second": 0.12
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,751 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 49.95402298850575,
5
+ "global_step": 8692,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.57,
12
+ "learning_rate": 9.123417721518986e-06,
13
+ "loss": 14.6554,
14
+ "step": 100
15
+ },
16
+ {
17
+ "epoch": 1.15,
18
+ "learning_rate": 1.766772151898734e-05,
19
+ "loss": 6.292,
20
+ "step": 200
21
+ },
22
+ {
23
+ "epoch": 1.72,
24
+ "learning_rate": 2.6212025316455694e-05,
25
+ "loss": 4.5602,
26
+ "step": 300
27
+ },
28
+ {
29
+ "epoch": 2.3,
30
+ "learning_rate": 3.475632911392405e-05,
31
+ "loss": 3.675,
32
+ "step": 400
33
+ },
34
+ {
35
+ "epoch": 2.3,
36
+ "eval_cer": 1.0,
37
+ "eval_loss": 3.505185604095459,
38
+ "eval_runtime": 388.8147,
39
+ "eval_samples_per_second": 19.927,
40
+ "eval_steps_per_second": 0.314,
41
+ "eval_wer": 1.0,
42
+ "step": 400
43
+ },
44
+ {
45
+ "epoch": 2.87,
46
+ "learning_rate": 4.3300632911392406e-05,
47
+ "loss": 3.4053,
48
+ "step": 500
49
+ },
50
+ {
51
+ "epoch": 3.45,
52
+ "learning_rate": 5.184493670886076e-05,
53
+ "loss": 3.3269,
54
+ "step": 600
55
+ },
56
+ {
57
+ "epoch": 4.02,
58
+ "learning_rate": 6.038924050632911e-05,
59
+ "loss": 3.2702,
60
+ "step": 700
61
+ },
62
+ {
63
+ "epoch": 4.6,
64
+ "learning_rate": 6.893354430379747e-05,
65
+ "loss": 3.0446,
66
+ "step": 800
67
+ },
68
+ {
69
+ "epoch": 4.6,
70
+ "eval_cer": 0.52146416703672,
71
+ "eval_loss": 2.2759008407592773,
72
+ "eval_runtime": 391.3771,
73
+ "eval_samples_per_second": 19.797,
74
+ "eval_steps_per_second": 0.312,
75
+ "eval_wer": 1.0052476497825171,
76
+ "step": 800
77
+ },
78
+ {
79
+ "epoch": 5.17,
80
+ "learning_rate": 7.5e-05,
81
+ "loss": 2.3045,
82
+ "step": 900
83
+ },
84
+ {
85
+ "epoch": 5.75,
86
+ "learning_rate": 7.5e-05,
87
+ "loss": 1.9583,
88
+ "step": 1000
89
+ },
90
+ {
91
+ "epoch": 6.32,
92
+ "learning_rate": 7.5e-05,
93
+ "loss": 1.8142,
94
+ "step": 1100
95
+ },
96
+ {
97
+ "epoch": 6.9,
98
+ "learning_rate": 7.5e-05,
99
+ "loss": 1.7276,
100
+ "step": 1200
101
+ },
102
+ {
103
+ "epoch": 6.9,
104
+ "eval_cer": 0.19686409281152592,
105
+ "eval_loss": 0.7083391547203064,
106
+ "eval_runtime": 386.6895,
107
+ "eval_samples_per_second": 20.037,
108
+ "eval_steps_per_second": 0.315,
109
+ "eval_wer": 0.6697067489827417,
110
+ "step": 1200
111
+ },
112
+ {
113
+ "epoch": 7.47,
114
+ "learning_rate": 7.5e-05,
115
+ "loss": 1.6571,
116
+ "step": 1300
117
+ },
118
+ {
119
+ "epoch": 8.05,
120
+ "learning_rate": 7.5e-05,
121
+ "loss": 1.6004,
122
+ "step": 1400
123
+ },
124
+ {
125
+ "epoch": 8.62,
126
+ "learning_rate": 7.5e-05,
127
+ "loss": 1.5572,
128
+ "step": 1500
129
+ },
130
+ {
131
+ "epoch": 9.2,
132
+ "learning_rate": 7.5e-05,
133
+ "loss": 1.5171,
134
+ "step": 1600
135
+ },
136
+ {
137
+ "epoch": 9.2,
138
+ "eval_cer": 0.15683826416017196,
139
+ "eval_loss": 0.5328246355056763,
140
+ "eval_runtime": 388.9054,
141
+ "eval_samples_per_second": 19.923,
142
+ "eval_steps_per_second": 0.314,
143
+ "eval_wer": 0.5733408166128806,
144
+ "step": 1600
145
+ },
146
+ {
147
+ "epoch": 9.77,
148
+ "learning_rate": 7.5e-05,
149
+ "loss": 1.4951,
150
+ "step": 1700
151
+ },
152
+ {
153
+ "epoch": 10.34,
154
+ "learning_rate": 7.5e-05,
155
+ "loss": 1.4553,
156
+ "step": 1800
157
+ },
158
+ {
159
+ "epoch": 10.92,
160
+ "learning_rate": 7.5e-05,
161
+ "loss": 1.4348,
162
+ "step": 1900
163
+ },
164
+ {
165
+ "epoch": 11.49,
166
+ "learning_rate": 7.5e-05,
167
+ "loss": 1.4176,
168
+ "step": 2000
169
+ },
170
+ {
171
+ "epoch": 11.49,
172
+ "eval_cer": 0.13810176574442937,
173
+ "eval_loss": 0.4571371376514435,
174
+ "eval_runtime": 390.2121,
175
+ "eval_samples_per_second": 19.856,
176
+ "eval_steps_per_second": 0.313,
177
+ "eval_wer": 0.5160656657780273,
178
+ "step": 2000
179
+ },
180
+ {
181
+ "epoch": 12.07,
182
+ "learning_rate": 7.5e-05,
183
+ "loss": 1.3988,
184
+ "step": 2100
185
+ },
186
+ {
187
+ "epoch": 12.64,
188
+ "learning_rate": 7.5e-05,
189
+ "loss": 1.3666,
190
+ "step": 2200
191
+ },
192
+ {
193
+ "epoch": 13.22,
194
+ "learning_rate": 7.5e-05,
195
+ "loss": 1.3585,
196
+ "step": 2300
197
+ },
198
+ {
199
+ "epoch": 13.79,
200
+ "learning_rate": 7.5e-05,
201
+ "loss": 1.343,
202
+ "step": 2400
203
+ },
204
+ {
205
+ "epoch": 13.79,
206
+ "eval_cer": 0.11596506696356744,
207
+ "eval_loss": 0.3910418748855591,
208
+ "eval_runtime": 386.6143,
209
+ "eval_samples_per_second": 20.041,
210
+ "eval_steps_per_second": 0.316,
211
+ "eval_wer": 0.45223796828960294,
212
+ "step": 2400
213
+ },
214
+ {
215
+ "epoch": 14.37,
216
+ "learning_rate": 7.5e-05,
217
+ "loss": 1.3196,
218
+ "step": 2500
219
+ },
220
+ {
221
+ "epoch": 14.94,
222
+ "learning_rate": 7.5e-05,
223
+ "loss": 1.3085,
224
+ "step": 2600
225
+ },
226
+ {
227
+ "epoch": 15.52,
228
+ "learning_rate": 7.5e-05,
229
+ "loss": 1.3006,
230
+ "step": 2700
231
+ },
232
+ {
233
+ "epoch": 16.09,
234
+ "learning_rate": 7.5e-05,
235
+ "loss": 1.2743,
236
+ "step": 2800
237
+ },
238
+ {
239
+ "epoch": 16.09,
240
+ "eval_cer": 0.10436362622251684,
241
+ "eval_loss": 0.3533952534198761,
242
+ "eval_runtime": 386.8689,
243
+ "eval_samples_per_second": 20.027,
244
+ "eval_steps_per_second": 0.315,
245
+ "eval_wer": 0.41365230812403536,
246
+ "step": 2800
247
+ },
248
+ {
249
+ "epoch": 16.67,
250
+ "learning_rate": 7.5e-05,
251
+ "loss": 1.256,
252
+ "step": 2900
253
+ },
254
+ {
255
+ "epoch": 17.24,
256
+ "learning_rate": 7.5e-05,
257
+ "loss": 1.2627,
258
+ "step": 3000
259
+ },
260
+ {
261
+ "epoch": 17.82,
262
+ "learning_rate": 7.5e-05,
263
+ "loss": 1.2496,
264
+ "step": 3100
265
+ },
266
+ {
267
+ "epoch": 18.39,
268
+ "learning_rate": 7.5e-05,
269
+ "loss": 1.2396,
270
+ "step": 3200
271
+ },
272
+ {
273
+ "epoch": 18.39,
274
+ "eval_cer": 0.09588994077001131,
275
+ "eval_loss": 0.3277980387210846,
276
+ "eval_runtime": 388.8491,
277
+ "eval_samples_per_second": 19.925,
278
+ "eval_steps_per_second": 0.314,
279
+ "eval_wer": 0.38773677564192505,
280
+ "step": 3200
281
+ },
282
+ {
283
+ "epoch": 18.97,
284
+ "learning_rate": 7.5e-05,
285
+ "loss": 1.2291,
286
+ "step": 3300
287
+ },
288
+ {
289
+ "epoch": 19.54,
290
+ "learning_rate": 7.5e-05,
291
+ "loss": 1.2254,
292
+ "step": 3400
293
+ },
294
+ {
295
+ "epoch": 20.11,
296
+ "learning_rate": 7.5e-05,
297
+ "loss": 1.2063,
298
+ "step": 3500
299
+ },
300
+ {
301
+ "epoch": 20.69,
302
+ "learning_rate": 7.5e-05,
303
+ "loss": 1.2035,
304
+ "step": 3600
305
+ },
306
+ {
307
+ "epoch": 20.69,
308
+ "eval_cer": 0.09172603609575479,
309
+ "eval_loss": 0.31091800332069397,
310
+ "eval_runtime": 384.3384,
311
+ "eval_samples_per_second": 20.159,
312
+ "eval_steps_per_second": 0.317,
313
+ "eval_wer": 0.3740844675178897,
314
+ "step": 3600
315
+ },
316
+ {
317
+ "epoch": 21.26,
318
+ "learning_rate": 7.5e-05,
319
+ "loss": 1.1937,
320
+ "step": 3700
321
+ },
322
+ {
323
+ "epoch": 21.84,
324
+ "learning_rate": 7.5e-05,
325
+ "loss": 1.1787,
326
+ "step": 3800
327
+ },
328
+ {
329
+ "epoch": 22.41,
330
+ "learning_rate": 7.5e-05,
331
+ "loss": 1.1779,
332
+ "step": 3900
333
+ },
334
+ {
335
+ "epoch": 22.99,
336
+ "learning_rate": 7.5e-05,
337
+ "loss": 1.1745,
338
+ "step": 4000
339
+ },
340
+ {
341
+ "epoch": 22.99,
342
+ "eval_cer": 0.08818425010028982,
343
+ "eval_loss": 0.2971595227718353,
344
+ "eval_runtime": 385.6559,
345
+ "eval_samples_per_second": 20.09,
346
+ "eval_steps_per_second": 0.316,
347
+ "eval_wer": 0.3618212431598148,
348
+ "step": 4000
349
+ },
350
+ {
351
+ "epoch": 23.56,
352
+ "learning_rate": 7.5e-05,
353
+ "loss": 1.1697,
354
+ "step": 4100
355
+ },
356
+ {
357
+ "epoch": 24.14,
358
+ "learning_rate": 7.5e-05,
359
+ "loss": 1.1506,
360
+ "step": 4200
361
+ },
362
+ {
363
+ "epoch": 24.71,
364
+ "learning_rate": 7.5e-05,
365
+ "loss": 1.1512,
366
+ "step": 4300
367
+ },
368
+ {
369
+ "epoch": 25.29,
370
+ "learning_rate": 7.414768806073154e-05,
371
+ "loss": 1.1541,
372
+ "step": 4400
373
+ },
374
+ {
375
+ "epoch": 25.29,
376
+ "eval_cer": 0.08323089827501506,
377
+ "eval_loss": 0.28362834453582764,
378
+ "eval_runtime": 385.7883,
379
+ "eval_samples_per_second": 20.084,
380
+ "eval_steps_per_second": 0.316,
381
+ "eval_wer": 0.3427108180159955,
382
+ "step": 4400
383
+ },
384
+ {
385
+ "epoch": 25.86,
386
+ "learning_rate": 7.250862663906142e-05,
387
+ "loss": 1.1464,
388
+ "step": 4500
389
+ },
390
+ {
391
+ "epoch": 26.44,
392
+ "learning_rate": 7.08695652173913e-05,
393
+ "loss": 1.1395,
394
+ "step": 4600
395
+ },
396
+ {
397
+ "epoch": 27.01,
398
+ "learning_rate": 6.923050379572118e-05,
399
+ "loss": 1.1432,
400
+ "step": 4700
401
+ },
402
+ {
403
+ "epoch": 27.59,
404
+ "learning_rate": 6.759144237405107e-05,
405
+ "loss": 1.1372,
406
+ "step": 4800
407
+ },
408
+ {
409
+ "epoch": 27.59,
410
+ "eval_cer": 0.08124869945017581,
411
+ "eval_loss": 0.2759494483470917,
412
+ "eval_runtime": 383.6861,
413
+ "eval_samples_per_second": 20.194,
414
+ "eval_steps_per_second": 0.318,
415
+ "eval_wer": 0.3357373368878911,
416
+ "step": 4800
417
+ },
418
+ {
419
+ "epoch": 28.16,
420
+ "learning_rate": 6.595238095238095e-05,
421
+ "loss": 1.1179,
422
+ "step": 4900
423
+ },
424
+ {
425
+ "epoch": 28.74,
426
+ "learning_rate": 6.431331953071083e-05,
427
+ "loss": 1.1221,
428
+ "step": 5000
429
+ },
430
+ {
431
+ "epoch": 29.31,
432
+ "learning_rate": 6.267425810904071e-05,
433
+ "loss": 1.116,
434
+ "step": 5100
435
+ },
436
+ {
437
+ "epoch": 29.89,
438
+ "learning_rate": 6.103519668737059e-05,
439
+ "loss": 1.1048,
440
+ "step": 5200
441
+ },
442
+ {
443
+ "epoch": 29.89,
444
+ "eval_cer": 0.07828827263385742,
445
+ "eval_loss": 0.2669001519680023,
446
+ "eval_runtime": 386.0726,
447
+ "eval_samples_per_second": 20.069,
448
+ "eval_steps_per_second": 0.316,
449
+ "eval_wer": 0.32842710818015997,
450
+ "step": 5200
451
+ },
452
+ {
453
+ "epoch": 30.46,
454
+ "learning_rate": 5.939613526570048e-05,
455
+ "loss": 1.1005,
456
+ "step": 5300
457
+ },
458
+ {
459
+ "epoch": 31.03,
460
+ "learning_rate": 5.775707384403037e-05,
461
+ "loss": 1.1056,
462
+ "step": 5400
463
+ },
464
+ {
465
+ "epoch": 31.61,
466
+ "learning_rate": 5.6118012422360246e-05,
467
+ "loss": 1.093,
468
+ "step": 5500
469
+ },
470
+ {
471
+ "epoch": 32.18,
472
+ "learning_rate": 5.4478951000690126e-05,
473
+ "loss": 1.0966,
474
+ "step": 5600
475
+ },
476
+ {
477
+ "epoch": 32.18,
478
+ "eval_cer": 0.07754173021930756,
479
+ "eval_loss": 0.2677817642688751,
480
+ "eval_runtime": 386.0716,
481
+ "eval_samples_per_second": 20.069,
482
+ "eval_steps_per_second": 0.316,
483
+ "eval_wer": 0.32491932089238107,
484
+ "step": 5600
485
+ },
486
+ {
487
+ "epoch": 32.76,
488
+ "learning_rate": 5.283988957902001e-05,
489
+ "loss": 1.0884,
490
+ "step": 5700
491
+ },
492
+ {
493
+ "epoch": 33.33,
494
+ "learning_rate": 5.120082815734989e-05,
495
+ "loss": 1.0878,
496
+ "step": 5800
497
+ },
498
+ {
499
+ "epoch": 33.91,
500
+ "learning_rate": 4.956176673567977e-05,
501
+ "loss": 1.0803,
502
+ "step": 5900
503
+ },
504
+ {
505
+ "epoch": 34.48,
506
+ "learning_rate": 4.792270531400966e-05,
507
+ "loss": 1.0747,
508
+ "step": 6000
509
+ },
510
+ {
511
+ "epoch": 34.48,
512
+ "eval_cer": 0.07477866519074373,
513
+ "eval_loss": 0.25474071502685547,
514
+ "eval_runtime": 385.0232,
515
+ "eval_samples_per_second": 20.123,
516
+ "eval_steps_per_second": 0.317,
517
+ "eval_wer": 0.31337168514101305,
518
+ "step": 6000
519
+ },
520
+ {
521
+ "epoch": 35.06,
522
+ "learning_rate": 4.628364389233954e-05,
523
+ "loss": 1.0704,
524
+ "step": 6100
525
+ },
526
+ {
527
+ "epoch": 35.63,
528
+ "learning_rate": 4.464458247066942e-05,
529
+ "loss": 1.0707,
530
+ "step": 6200
531
+ },
532
+ {
533
+ "epoch": 36.21,
534
+ "learning_rate": 4.30055210489993e-05,
535
+ "loss": 1.0599,
536
+ "step": 6300
537
+ },
538
+ {
539
+ "epoch": 36.78,
540
+ "learning_rate": 4.136645962732919e-05,
541
+ "loss": 1.0593,
542
+ "step": 6400
543
+ },
544
+ {
545
+ "epoch": 36.78,
546
+ "eval_cer": 0.07282435444460891,
547
+ "eval_loss": 0.2490725815296173,
548
+ "eval_runtime": 384.9593,
549
+ "eval_samples_per_second": 20.127,
550
+ "eval_steps_per_second": 0.317,
551
+ "eval_wer": 0.3077311631822646,
552
+ "step": 6400
553
+ },
554
+ {
555
+ "epoch": 37.36,
556
+ "learning_rate": 3.972739820565908e-05,
557
+ "loss": 1.0572,
558
+ "step": 6500
559
+ },
560
+ {
561
+ "epoch": 37.93,
562
+ "learning_rate": 3.808833678398896e-05,
563
+ "loss": 1.0506,
564
+ "step": 6600
565
+ },
566
+ {
567
+ "epoch": 38.51,
568
+ "learning_rate": 3.644927536231883e-05,
569
+ "loss": 1.0478,
570
+ "step": 6700
571
+ },
572
+ {
573
+ "epoch": 39.08,
574
+ "learning_rate": 3.4810213940648726e-05,
575
+ "loss": 1.0417,
576
+ "step": 6800
577
+ },
578
+ {
579
+ "epoch": 39.08,
580
+ "eval_cer": 0.07105667930211156,
581
+ "eval_loss": 0.24495387077331543,
582
+ "eval_runtime": 382.2473,
583
+ "eval_samples_per_second": 20.27,
584
+ "eval_steps_per_second": 0.319,
585
+ "eval_wer": 0.301220709976147,
586
+ "step": 6800
587
+ },
588
+ {
589
+ "epoch": 39.66,
590
+ "learning_rate": 3.3171152518978605e-05,
591
+ "loss": 1.0359,
592
+ "step": 6900
593
+ },
594
+ {
595
+ "epoch": 40.23,
596
+ "learning_rate": 3.153209109730849e-05,
597
+ "loss": 1.0389,
598
+ "step": 7000
599
+ },
600
+ {
601
+ "epoch": 40.8,
602
+ "learning_rate": 2.989302967563837e-05,
603
+ "loss": 1.0365,
604
+ "step": 7100
605
+ },
606
+ {
607
+ "epoch": 41.38,
608
+ "learning_rate": 2.8253968253968247e-05,
609
+ "loss": 1.024,
610
+ "step": 7200
611
+ },
612
+ {
613
+ "epoch": 41.38,
614
+ "eval_cer": 0.06938768505349148,
615
+ "eval_loss": 0.24019765853881836,
616
+ "eval_runtime": 381.671,
617
+ "eval_samples_per_second": 20.3,
618
+ "eval_steps_per_second": 0.32,
619
+ "eval_wer": 0.29560825031570087,
620
+ "step": 7200
621
+ },
622
+ {
623
+ "epoch": 41.95,
624
+ "learning_rate": 2.6614906832298136e-05,
625
+ "loss": 1.0288,
626
+ "step": 7300
627
+ },
628
+ {
629
+ "epoch": 42.53,
630
+ "learning_rate": 2.4975845410628016e-05,
631
+ "loss": 1.0283,
632
+ "step": 7400
633
+ },
634
+ {
635
+ "epoch": 43.1,
636
+ "learning_rate": 2.3353174603174605e-05,
637
+ "loss": 1.0153,
638
+ "step": 7500
639
+ },
640
+ {
641
+ "epoch": 43.68,
642
+ "learning_rate": 2.1714113181504485e-05,
643
+ "loss": 1.0106,
644
+ "step": 7600
645
+ },
646
+ {
647
+ "epoch": 43.68,
648
+ "eval_cer": 0.06811555961720395,
649
+ "eval_loss": 0.23507660627365112,
650
+ "eval_runtime": 383.1253,
651
+ "eval_samples_per_second": 20.223,
652
+ "eval_steps_per_second": 0.318,
653
+ "eval_wer": 0.29151115476357514,
654
+ "step": 7600
655
+ },
656
+ {
657
+ "epoch": 44.25,
658
+ "learning_rate": 2.007505175983437e-05,
659
+ "loss": 1.013,
660
+ "step": 7700
661
+ },
662
+ {
663
+ "epoch": 44.83,
664
+ "learning_rate": 1.843599033816425e-05,
665
+ "loss": 1.0067,
666
+ "step": 7800
667
+ },
668
+ {
669
+ "epoch": 45.4,
670
+ "learning_rate": 1.6796928916494137e-05,
671
+ "loss": 1.0076,
672
+ "step": 7900
673
+ },
674
+ {
675
+ "epoch": 45.98,
676
+ "learning_rate": 1.5157867494824016e-05,
677
+ "loss": 1.0014,
678
+ "step": 8000
679
+ },
680
+ {
681
+ "epoch": 45.98,
682
+ "eval_cer": 0.06731109580842178,
683
+ "eval_loss": 0.23282095789909363,
684
+ "eval_runtime": 382.9081,
685
+ "eval_samples_per_second": 20.235,
686
+ "eval_steps_per_second": 0.319,
687
+ "eval_wer": 0.2896309807773257,
688
+ "step": 8000
689
+ },
690
+ {
691
+ "epoch": 46.55,
692
+ "learning_rate": 1.3518806073153895e-05,
693
+ "loss": 0.9955,
694
+ "step": 8100
695
+ },
696
+ {
697
+ "epoch": 47.13,
698
+ "learning_rate": 1.1879744651483783e-05,
699
+ "loss": 0.9998,
700
+ "step": 8200
701
+ },
702
+ {
703
+ "epoch": 47.7,
704
+ "learning_rate": 1.0240683229813662e-05,
705
+ "loss": 0.9956,
706
+ "step": 8300
707
+ },
708
+ {
709
+ "epoch": 48.28,
710
+ "learning_rate": 8.60162180814355e-06,
711
+ "loss": 0.9999,
712
+ "step": 8400
713
+ },
714
+ {
715
+ "epoch": 48.28,
716
+ "eval_cer": 0.06668254141915997,
717
+ "eval_loss": 0.2318294197320938,
718
+ "eval_runtime": 383.814,
719
+ "eval_samples_per_second": 20.187,
720
+ "eval_steps_per_second": 0.318,
721
+ "eval_wer": 0.2865862214115336,
722
+ "step": 8400
723
+ },
724
+ {
725
+ "epoch": 48.85,
726
+ "learning_rate": 6.962560386473429e-06,
727
+ "loss": 0.9898,
728
+ "step": 8500
729
+ },
730
+ {
731
+ "epoch": 49.43,
732
+ "learning_rate": 5.323498964803316e-06,
733
+ "loss": 0.997,
734
+ "step": 8600
735
+ },
736
+ {
737
+ "epoch": 49.95,
738
+ "step": 8692,
739
+ "total_flos": 2.038617513286856e+20,
740
+ "train_loss": 1.5808194972818148,
741
+ "train_runtime": 72373.5093,
742
+ "train_samples_per_second": 15.373,
743
+ "train_steps_per_second": 0.12
744
+ }
745
+ ],
746
+ "max_steps": 8692,
747
+ "num_train_epochs": 50,
748
+ "total_flos": 2.038617513286856e+20,
749
+ "trial_name": null,
750
+ "trial_params": null
751
+ }