lucio commited on
Commit
206bed3
1 Parent(s): 1118cdf

End of training

Browse files
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
- "epoch": 50.0,
3
- "eval_loss": 0.226610004901886,
4
- "eval_runtime": 133.034,
5
  "eval_samples": 2620,
6
- "eval_samples_per_second": 19.694,
7
- "eval_steps_per_second": 2.466,
8
- "eval_wer": 0.3654982085794519,
9
- "train_loss": 1.6428977303426773,
10
- "train_runtime": 23684.6778,
11
  "train_samples": 5855,
12
- "train_samples_per_second": 12.36,
13
- "train_steps_per_second": 0.386
14
  }
 
1
  {
2
+ "epoch": 100.0,
3
+ "eval_loss": 0.17718610167503357,
4
+ "eval_runtime": 127.8129,
5
  "eval_samples": 2620,
6
+ "eval_samples_per_second": 20.499,
7
+ "eval_steps_per_second": 2.566,
8
+ "eval_wer": 0.2588896424765042,
9
+ "train_loss": 1.1684310275218526,
10
+ "train_runtime": 46836.9916,
11
  "train_samples": 5855,
12
+ "train_samples_per_second": 12.501,
13
+ "train_steps_per_second": 0.391
14
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 50.0,
3
- "eval_loss": 0.226610004901886,
4
- "eval_runtime": 133.034,
5
  "eval_samples": 2620,
6
- "eval_samples_per_second": 19.694,
7
- "eval_steps_per_second": 2.466,
8
- "eval_wer": 0.3654982085794519
9
  }
 
1
  {
2
+ "epoch": 100.0,
3
+ "eval_loss": 0.17718610167503357,
4
+ "eval_runtime": 127.8129,
5
  "eval_samples": 2620,
6
+ "eval_samples_per_second": 20.499,
7
+ "eval_steps_per_second": 2.566,
8
+ "eval_wer": 0.2588896424765042
9
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:64821123b95ae3fcb0a541bf3eea6771834721786c5676b90cc4ba9f982868b5
3
  size 1262079473
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8912bd712e88f88026bdceaa799f14883397e3d5270c9e6f39d9213587026e3b
3
  size 1262079473
runs/Jan30_06-27-48_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/events.out.tfevents.1643524110.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.1718926.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:53d36233f4d55b2f5de75f67185f3cdf586344de20438e21ad61591b97f41ed3
3
- size 44513
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aaaa9e6330542eb98979636560d8178dfb765852dd99f48a97f526a06fa0cb98
3
+ size 45353
runs/Jan30_06-27-48_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/events.out.tfevents.1643571078.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.1718926.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea029fb671b46172b33eeb75c2f39fea8c67239bd5f2139e6a63adad72f08478
3
+ size 364
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 50.0,
3
- "train_loss": 1.6428977303426773,
4
- "train_runtime": 23684.6778,
5
  "train_samples": 5855,
6
- "train_samples_per_second": 12.36,
7
- "train_steps_per_second": 0.386
8
  }
 
1
  {
2
+ "epoch": 100.0,
3
+ "train_loss": 1.1684310275218526,
4
+ "train_runtime": 46836.9916,
5
  "train_samples": 5855,
6
+ "train_samples_per_second": 12.501,
7
+ "train_steps_per_second": 0.391
8
  }
trainer_state.json CHANGED
@@ -1,733 +1,1447 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 50.0,
5
- "global_step": 9150,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.55,
12
- "learning_rate": 3.7125e-06,
13
- "loss": 14.9109,
14
  "step": 100
15
  },
16
  {
17
  "epoch": 1.09,
18
- "learning_rate": 7.4625e-06,
19
- "loss": 7.5771,
20
  "step": 200
21
  },
22
  {
23
  "epoch": 1.64,
24
- "learning_rate": 1.1212499999999998e-05,
25
- "loss": 4.939,
26
  "step": 300
27
  },
28
  {
29
  "epoch": 2.19,
30
- "learning_rate": 1.49625e-05,
31
- "loss": 4.1873,
32
  "step": 400
33
  },
34
  {
35
  "epoch": 2.73,
36
- "learning_rate": 1.8712499999999997e-05,
37
- "loss": 3.6863,
38
  "step": 500
39
  },
40
  {
41
  "epoch": 2.73,
42
- "eval_loss": 3.5362327098846436,
43
- "eval_runtime": 132.4771,
44
- "eval_samples_per_second": 19.777,
45
- "eval_steps_per_second": 2.476,
46
  "eval_wer": 1.0,
47
  "step": 500
48
  },
49
  {
50
  "epoch": 3.28,
51
- "learning_rate": 2.2462499999999997e-05,
52
- "loss": 3.3718,
53
  "step": 600
54
  },
55
  {
56
  "epoch": 3.83,
57
- "learning_rate": 2.6212499999999997e-05,
58
- "loss": 3.2608,
59
  "step": 700
60
  },
61
  {
62
  "epoch": 4.37,
63
- "learning_rate": 2.99625e-05,
64
- "loss": 3.2251,
65
  "step": 800
66
  },
67
  {
68
  "epoch": 4.92,
69
- "learning_rate": 3.37125e-05,
70
- "loss": 3.1741,
71
  "step": 900
72
  },
73
  {
74
  "epoch": 5.46,
75
- "learning_rate": 3.7462499999999996e-05,
76
- "loss": 3.1409,
77
  "step": 1000
78
  },
79
  {
80
  "epoch": 5.46,
81
- "eval_loss": 3.132805347442627,
82
- "eval_runtime": 132.2993,
83
- "eval_samples_per_second": 19.804,
84
- "eval_steps_per_second": 2.479,
85
  "eval_wer": 1.0,
86
  "step": 1000
87
  },
88
  {
89
  "epoch": 6.01,
90
- "learning_rate": 4.12125e-05,
91
- "loss": 3.1047,
92
  "step": 1100
93
  },
94
  {
95
  "epoch": 6.56,
96
- "learning_rate": 4.4962499999999995e-05,
97
- "loss": 2.8994,
98
  "step": 1200
99
  },
100
  {
101
  "epoch": 7.1,
102
- "learning_rate": 4.871249999999999e-05,
103
- "loss": 2.5085,
104
  "step": 1300
105
  },
106
  {
107
  "epoch": 7.65,
108
- "learning_rate": 5.2462499999999994e-05,
109
- "loss": 2.1499,
110
  "step": 1400
111
  },
112
  {
113
  "epoch": 8.2,
114
- "learning_rate": 5.6175e-05,
115
- "loss": 1.8979,
116
  "step": 1500
117
  },
118
  {
119
  "epoch": 8.2,
120
- "eval_loss": 0.9715010523796082,
121
- "eval_runtime": 131.829,
122
- "eval_samples_per_second": 19.874,
123
- "eval_steps_per_second": 2.488,
124
- "eval_wer": 0.8863658371259805,
125
  "step": 1500
126
  },
127
  {
128
  "epoch": 8.74,
129
- "learning_rate": 5.9925e-05,
130
- "loss": 1.7702,
131
  "step": 1600
132
  },
133
  {
134
  "epoch": 9.29,
135
- "learning_rate": 6.367499999999999e-05,
136
- "loss": 1.6541,
137
  "step": 1700
138
  },
139
  {
140
  "epoch": 9.84,
141
- "learning_rate": 6.738749999999999e-05,
142
- "loss": 1.6072,
143
  "step": 1800
144
  },
145
  {
146
  "epoch": 10.38,
147
- "learning_rate": 7.11375e-05,
148
- "loss": 1.5188,
149
  "step": 1900
150
  },
151
  {
152
  "epoch": 10.93,
153
- "learning_rate": 7.48875e-05,
154
- "loss": 1.4859,
155
  "step": 2000
156
  },
157
  {
158
  "epoch": 10.93,
159
- "eval_loss": 0.5233765244483948,
160
- "eval_runtime": 130.5844,
161
- "eval_samples_per_second": 20.064,
162
- "eval_steps_per_second": 2.512,
163
- "eval_wer": 0.7062554468868016,
164
  "step": 2000
165
  },
166
  {
167
  "epoch": 11.48,
168
- "learning_rate": 7.398251748251747e-05,
169
- "loss": 1.4386,
170
  "step": 2100
171
  },
172
  {
173
  "epoch": 12.02,
174
- "learning_rate": 7.293356643356642e-05,
175
- "loss": 1.4333,
176
  "step": 2200
177
  },
178
  {
179
  "epoch": 12.57,
180
- "learning_rate": 7.188461538461538e-05,
181
- "loss": 1.3923,
182
  "step": 2300
183
  },
184
  {
185
  "epoch": 13.11,
186
- "learning_rate": 7.083566433566433e-05,
187
- "loss": 1.3623,
188
  "step": 2400
189
  },
190
  {
191
  "epoch": 13.66,
192
- "learning_rate": 6.978671328671328e-05,
193
- "loss": 1.3388,
194
  "step": 2500
195
  },
196
  {
197
  "epoch": 13.66,
198
- "eval_loss": 0.4093627333641052,
199
- "eval_runtime": 131.4899,
200
- "eval_samples_per_second": 19.925,
201
- "eval_steps_per_second": 2.494,
202
- "eval_wer": 0.6203156773506343,
203
  "step": 2500
204
  },
205
  {
206
  "epoch": 14.21,
207
- "learning_rate": 6.873776223776224e-05,
208
- "loss": 1.3338,
209
  "step": 2600
210
  },
211
  {
212
  "epoch": 14.75,
213
- "learning_rate": 6.768881118881118e-05,
214
- "loss": 1.3039,
215
  "step": 2700
216
  },
217
  {
218
  "epoch": 15.3,
219
- "learning_rate": 6.663986013986013e-05,
220
- "loss": 1.2935,
221
  "step": 2800
222
  },
223
  {
224
  "epoch": 15.85,
225
- "learning_rate": 6.559090909090908e-05,
226
- "loss": 1.2818,
227
  "step": 2900
228
  },
229
  {
230
  "epoch": 16.39,
231
- "learning_rate": 6.454195804195804e-05,
232
- "loss": 1.2531,
233
  "step": 3000
234
  },
235
  {
236
  "epoch": 16.39,
237
- "eval_loss": 0.3596232533454895,
238
- "eval_runtime": 131.7209,
239
- "eval_samples_per_second": 19.891,
240
- "eval_steps_per_second": 2.49,
241
- "eval_wer": 0.5184952067396146,
242
  "step": 3000
243
  },
244
  {
245
  "epoch": 16.94,
246
- "learning_rate": 6.349300699300699e-05,
247
- "loss": 1.2531,
248
  "step": 3100
249
  },
250
  {
251
  "epoch": 17.49,
252
- "learning_rate": 6.244405594405593e-05,
253
- "loss": 1.2282,
254
  "step": 3200
255
  },
256
  {
257
  "epoch": 18.03,
258
- "learning_rate": 6.139510489510488e-05,
259
- "loss": 1.2088,
260
  "step": 3300
261
  },
262
  {
263
  "epoch": 18.58,
264
- "learning_rate": 6.034615384615384e-05,
265
- "loss": 1.2041,
266
  "step": 3400
267
  },
268
  {
269
  "epoch": 19.13,
270
- "learning_rate": 5.9297202797202796e-05,
271
- "loss": 1.1992,
272
  "step": 3500
273
  },
274
  {
275
  "epoch": 19.13,
276
- "eval_loss": 0.32211676239967346,
277
- "eval_runtime": 130.9541,
278
- "eval_samples_per_second": 20.007,
279
- "eval_steps_per_second": 2.505,
280
- "eval_wer": 0.4854265517575288,
281
  "step": 3500
282
  },
283
  {
284
  "epoch": 19.67,
285
- "learning_rate": 5.8248251748251737e-05,
286
- "loss": 1.1776,
287
  "step": 3600
288
  },
289
  {
290
  "epoch": 20.22,
291
- "learning_rate": 5.719930069930069e-05,
292
- "loss": 1.1815,
293
  "step": 3700
294
  },
295
  {
296
  "epoch": 20.77,
297
- "learning_rate": 5.6160839160839156e-05,
298
- "loss": 1.1624,
299
  "step": 3800
300
  },
301
  {
302
  "epoch": 21.31,
303
- "learning_rate": 5.511188811188811e-05,
304
- "loss": 1.1576,
305
  "step": 3900
306
  },
307
  {
308
  "epoch": 21.86,
309
- "learning_rate": 5.4062937062937064e-05,
310
- "loss": 1.1589,
311
  "step": 4000
312
  },
313
  {
314
  "epoch": 21.86,
315
- "eval_loss": 0.30402758717536926,
316
- "eval_runtime": 130.3078,
317
- "eval_samples_per_second": 20.106,
318
- "eval_steps_per_second": 2.517,
319
- "eval_wer": 0.4610244988864143,
320
  "step": 4000
321
  },
322
  {
323
  "epoch": 22.4,
324
- "learning_rate": 5.3013986013986004e-05,
325
- "loss": 1.1705,
326
  "step": 4100
327
  },
328
  {
329
  "epoch": 22.95,
330
- "learning_rate": 5.196503496503496e-05,
331
- "loss": 1.1486,
332
  "step": 4200
333
  },
334
  {
335
  "epoch": 23.5,
336
- "learning_rate": 5.091608391608391e-05,
337
- "loss": 1.1266,
338
  "step": 4300
339
  },
340
  {
341
  "epoch": 24.04,
342
- "learning_rate": 4.9867132867132866e-05,
343
- "loss": 1.1348,
344
  "step": 4400
345
  },
346
  {
347
  "epoch": 24.59,
348
- "learning_rate": 4.8818181818181813e-05,
349
- "loss": 1.1345,
350
  "step": 4500
351
  },
352
  {
353
  "epoch": 24.59,
354
- "eval_loss": 0.2906973361968994,
355
- "eval_runtime": 132.6128,
356
- "eval_samples_per_second": 19.757,
357
- "eval_steps_per_second": 2.473,
358
- "eval_wer": 0.4449501307252832,
359
  "step": 4500
360
  },
361
  {
362
  "epoch": 25.14,
363
- "learning_rate": 4.776923076923077e-05,
364
- "loss": 1.108,
365
  "step": 4600
366
  },
367
  {
368
  "epoch": 25.68,
369
- "learning_rate": 4.672027972027972e-05,
370
- "loss": 1.1114,
371
  "step": 4700
372
  },
373
  {
374
  "epoch": 26.23,
375
- "learning_rate": 4.567132867132866e-05,
376
- "loss": 1.1071,
377
  "step": 4800
378
  },
379
  {
380
  "epoch": 26.78,
381
- "learning_rate": 4.4622377622377616e-05,
382
- "loss": 1.1009,
383
  "step": 4900
384
  },
385
  {
386
  "epoch": 27.32,
387
- "learning_rate": 4.357342657342657e-05,
388
- "loss": 1.086,
389
  "step": 5000
390
  },
391
  {
392
  "epoch": 27.32,
393
- "eval_loss": 0.27438458800315857,
394
- "eval_runtime": 131.0041,
395
- "eval_samples_per_second": 19.999,
396
- "eval_steps_per_second": 2.504,
397
- "eval_wer": 0.4298925147671153,
398
  "step": 5000
399
  },
400
  {
401
  "epoch": 27.87,
402
- "learning_rate": 4.2524475524475524e-05,
403
- "loss": 1.0843,
404
  "step": 5100
405
  },
406
  {
407
  "epoch": 28.42,
408
- "learning_rate": 4.147552447552447e-05,
409
- "loss": 1.0761,
410
  "step": 5200
411
  },
412
  {
413
  "epoch": 28.96,
414
- "learning_rate": 4.0426573426573425e-05,
415
- "loss": 1.0779,
416
  "step": 5300
417
  },
418
  {
419
  "epoch": 29.51,
420
- "learning_rate": 3.937762237762237e-05,
421
- "loss": 1.074,
422
  "step": 5400
423
  },
424
  {
425
  "epoch": 30.05,
426
- "learning_rate": 3.832867132867133e-05,
427
- "loss": 1.0697,
428
  "step": 5500
429
  },
430
  {
431
  "epoch": 30.05,
432
- "eval_loss": 0.2617148756980896,
433
- "eval_runtime": 131.2229,
434
- "eval_samples_per_second": 19.966,
435
- "eval_steps_per_second": 2.5,
436
- "eval_wer": 0.4147864820373777,
437
  "step": 5500
438
  },
439
  {
440
  "epoch": 30.6,
441
- "learning_rate": 3.727972027972028e-05,
442
- "loss": 1.0599,
443
  "step": 5600
444
  },
445
  {
446
  "epoch": 31.15,
447
- "learning_rate": 3.623076923076923e-05,
448
- "loss": 1.0666,
449
  "step": 5700
450
  },
451
  {
452
  "epoch": 31.69,
453
- "learning_rate": 3.518181818181818e-05,
454
- "loss": 1.0574,
455
  "step": 5800
456
  },
457
  {
458
  "epoch": 32.24,
459
- "learning_rate": 3.413286713286713e-05,
460
- "loss": 1.0534,
461
  "step": 5900
462
  },
463
  {
464
  "epoch": 32.79,
465
- "learning_rate": 3.308391608391608e-05,
466
- "loss": 1.0518,
467
  "step": 6000
468
  },
469
  {
470
  "epoch": 32.79,
471
- "eval_loss": 0.25627899169921875,
472
- "eval_runtime": 130.5236,
473
- "eval_samples_per_second": 20.073,
474
- "eval_steps_per_second": 2.513,
475
- "eval_wer": 0.4033117071753655,
476
  "step": 6000
477
  },
478
  {
479
  "epoch": 33.33,
480
- "learning_rate": 3.204545454545454e-05,
481
- "loss": 1.0304,
482
  "step": 6100
483
  },
484
  {
485
  "epoch": 33.88,
486
- "learning_rate": 3.0996503496503495e-05,
487
- "loss": 1.0336,
488
  "step": 6200
489
  },
490
  {
491
  "epoch": 34.43,
492
- "learning_rate": 2.9947552447552443e-05,
493
- "loss": 1.0289,
494
  "step": 6300
495
  },
496
  {
497
  "epoch": 34.97,
498
- "learning_rate": 2.8898601398601397e-05,
499
- "loss": 1.027,
500
  "step": 6400
501
  },
502
  {
503
  "epoch": 35.52,
504
- "learning_rate": 2.7849650349650347e-05,
505
- "loss": 1.0101,
506
  "step": 6500
507
  },
508
  {
509
  "epoch": 35.52,
510
- "eval_loss": 0.24797162413597107,
511
- "eval_runtime": 130.8457,
512
- "eval_samples_per_second": 20.024,
513
- "eval_steps_per_second": 2.507,
514
- "eval_wer": 0.3934346857751525,
515
  "step": 6500
516
  },
517
  {
518
  "epoch": 36.07,
519
- "learning_rate": 2.6800699300699298e-05,
520
- "loss": 1.0278,
521
  "step": 6600
522
  },
523
  {
524
  "epoch": 36.61,
525
- "learning_rate": 2.575174825174825e-05,
526
- "loss": 0.9947,
527
  "step": 6700
528
  },
529
  {
530
  "epoch": 37.16,
531
- "learning_rate": 2.4702797202797203e-05,
532
- "loss": 1.0011,
533
  "step": 6800
534
  },
535
  {
536
  "epoch": 37.7,
537
- "learning_rate": 2.365384615384615e-05,
538
- "loss": 1.0041,
539
  "step": 6900
540
  },
541
  {
542
  "epoch": 38.25,
543
- "learning_rate": 2.2604895104895104e-05,
544
- "loss": 1.0013,
545
  "step": 7000
546
  },
547
  {
548
  "epoch": 38.25,
549
- "eval_loss": 0.2412029653787613,
550
- "eval_runtime": 130.9316,
551
- "eval_samples_per_second": 20.01,
552
- "eval_steps_per_second": 2.505,
553
- "eval_wer": 0.38549433523772636,
554
  "step": 7000
555
  },
556
  {
557
  "epoch": 38.8,
558
- "learning_rate": 2.1555944055944055e-05,
559
- "loss": 0.9881,
560
  "step": 7100
561
  },
562
  {
563
  "epoch": 39.34,
564
- "learning_rate": 2.0506993006993005e-05,
565
- "loss": 0.9897,
566
  "step": 7200
567
  },
568
  {
569
  "epoch": 39.89,
570
- "learning_rate": 1.9458041958041956e-05,
571
- "loss": 0.9853,
572
  "step": 7300
573
  },
574
  {
575
  "epoch": 40.44,
576
- "learning_rate": 1.8409090909090907e-05,
577
- "loss": 0.9873,
578
  "step": 7400
579
  },
580
  {
581
  "epoch": 40.98,
582
- "learning_rate": 1.7360139860139857e-05,
583
- "loss": 0.9845,
584
  "step": 7500
585
  },
586
  {
587
  "epoch": 40.98,
588
- "eval_loss": 0.23970751464366913,
589
- "eval_runtime": 131.9142,
590
- "eval_samples_per_second": 19.861,
591
- "eval_steps_per_second": 2.486,
592
- "eval_wer": 0.37711823375617315,
593
  "step": 7500
594
  },
595
  {
596
  "epoch": 41.53,
597
- "learning_rate": 1.631118881118881e-05,
598
- "loss": 0.9802,
599
  "step": 7600
600
  },
601
  {
602
  "epoch": 42.08,
603
- "learning_rate": 1.5262237762237762e-05,
604
- "loss": 0.9856,
605
  "step": 7700
606
  },
607
  {
608
  "epoch": 42.62,
609
- "learning_rate": 1.4213286713286713e-05,
610
- "loss": 0.9738,
611
  "step": 7800
612
  },
613
  {
614
  "epoch": 43.17,
615
- "learning_rate": 1.3164335664335663e-05,
616
- "loss": 0.9703,
617
  "step": 7900
618
  },
619
  {
620
  "epoch": 43.72,
621
- "learning_rate": 1.2115384615384615e-05,
622
- "loss": 0.9739,
623
  "step": 8000
624
  },
625
  {
626
  "epoch": 43.72,
627
- "eval_loss": 0.2302592694759369,
628
- "eval_runtime": 132.2355,
629
- "eval_samples_per_second": 19.813,
630
- "eval_steps_per_second": 2.48,
631
- "eval_wer": 0.3726154740001937,
632
  "step": 8000
633
  },
634
  {
635
  "epoch": 44.26,
636
- "learning_rate": 1.1066433566433566e-05,
637
- "loss": 0.9687,
638
  "step": 8100
639
  },
640
  {
641
  "epoch": 44.81,
642
- "learning_rate": 1.0038461538461537e-05,
643
- "loss": 0.9697,
644
  "step": 8200
645
  },
646
  {
647
  "epoch": 45.36,
648
- "learning_rate": 8.98951048951049e-06,
649
- "loss": 0.9671,
650
  "step": 8300
651
  },
652
  {
653
  "epoch": 45.9,
654
- "learning_rate": 7.94055944055944e-06,
655
- "loss": 0.9578,
656
  "step": 8400
657
  },
658
  {
659
  "epoch": 46.45,
660
- "learning_rate": 6.891608391608391e-06,
661
- "loss": 0.9636,
662
  "step": 8500
663
  },
664
  {
665
  "epoch": 46.45,
666
- "eval_loss": 0.22845281660556793,
667
- "eval_runtime": 132.3979,
668
- "eval_samples_per_second": 19.789,
669
- "eval_steps_per_second": 2.477,
670
- "eval_wer": 0.36869371550305025,
671
  "step": 8500
672
  },
673
  {
674
  "epoch": 46.99,
675
- "learning_rate": 5.842657342657342e-06,
676
- "loss": 0.955,
677
  "step": 8600
678
  },
679
  {
680
  "epoch": 47.54,
681
- "learning_rate": 4.7937062937062936e-06,
682
- "loss": 0.9495,
683
  "step": 8700
684
  },
685
  {
686
  "epoch": 48.09,
687
- "learning_rate": 3.7447552447552446e-06,
688
- "loss": 0.9612,
689
  "step": 8800
690
  },
691
  {
692
  "epoch": 48.63,
693
- "learning_rate": 2.6958041958041957e-06,
694
- "loss": 0.9415,
695
  "step": 8900
696
  },
697
  {
698
  "epoch": 49.18,
699
- "learning_rate": 1.6468531468531468e-06,
700
- "loss": 0.9466,
701
  "step": 9000
702
  },
703
  {
704
  "epoch": 49.18,
705
- "eval_loss": 0.2261359691619873,
706
- "eval_runtime": 131.8096,
707
- "eval_samples_per_second": 19.877,
708
- "eval_steps_per_second": 2.488,
709
- "eval_wer": 0.36477195700590687,
710
  "step": 9000
711
  },
712
  {
713
  "epoch": 49.73,
714
- "learning_rate": 5.979020979020978e-07,
715
- "loss": 0.9533,
716
  "step": 9100
717
  },
718
  {
719
- "epoch": 50.0,
720
- "step": 9150,
721
- "total_flos": 5.14420411350113e+19,
722
- "train_loss": 1.6428977303426773,
723
- "train_runtime": 23684.6778,
724
- "train_samples_per_second": 12.36,
725
- "train_steps_per_second": 0.386
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
726
  }
727
  ],
728
- "max_steps": 9150,
729
- "num_train_epochs": 50,
730
- "total_flos": 5.14420411350113e+19,
731
  "trial_name": null,
732
  "trial_params": null
733
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 100.0,
5
+ "global_step": 18300,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.55,
12
+ "learning_rate": 4.950000000000001e-06,
13
+ "loss": 13.5246,
14
  "step": 100
15
  },
16
  {
17
  "epoch": 1.09,
18
+ "learning_rate": 9.950000000000001e-06,
19
+ "loss": 6.1441,
20
  "step": 200
21
  },
22
  {
23
  "epoch": 1.64,
24
+ "learning_rate": 1.4950000000000001e-05,
25
+ "loss": 4.2189,
26
  "step": 300
27
  },
28
  {
29
  "epoch": 2.19,
30
+ "learning_rate": 1.995e-05,
31
+ "loss": 3.6075,
32
  "step": 400
33
  },
34
  {
35
  "epoch": 2.73,
36
+ "learning_rate": 2.495e-05,
37
+ "loss": 3.3043,
38
  "step": 500
39
  },
40
  {
41
  "epoch": 2.73,
42
+ "eval_loss": 3.241492748260498,
43
+ "eval_runtime": 169.6485,
44
+ "eval_samples_per_second": 15.444,
45
+ "eval_steps_per_second": 1.933,
46
  "eval_wer": 1.0,
47
  "step": 500
48
  },
49
  {
50
  "epoch": 3.28,
51
+ "learning_rate": 2.995e-05,
52
+ "loss": 3.2087,
53
  "step": 600
54
  },
55
  {
56
  "epoch": 3.83,
57
+ "learning_rate": 3.495e-05,
58
+ "loss": 3.1835,
59
  "step": 700
60
  },
61
  {
62
  "epoch": 4.37,
63
+ "learning_rate": 3.995e-05,
64
+ "loss": 3.1414,
65
  "step": 800
66
  },
67
  {
68
  "epoch": 4.92,
69
+ "learning_rate": 4.495e-05,
70
+ "loss": 3.0995,
71
  "step": 900
72
  },
73
  {
74
  "epoch": 5.46,
75
+ "learning_rate": 4.995e-05,
76
+ "loss": 3.0482,
77
  "step": 1000
78
  },
79
  {
80
  "epoch": 5.46,
81
+ "eval_loss": 2.9591026306152344,
82
+ "eval_runtime": 128.3332,
83
+ "eval_samples_per_second": 20.416,
84
+ "eval_steps_per_second": 2.556,
85
  "eval_wer": 1.0,
86
  "step": 1000
87
  },
88
  {
89
  "epoch": 6.01,
90
+ "learning_rate": 5.495e-05,
91
+ "loss": 2.7368,
92
  "step": 1100
93
  },
94
  {
95
  "epoch": 6.56,
96
+ "learning_rate": 5.995000000000001e-05,
97
+ "loss": 2.0079,
98
  "step": 1200
99
  },
100
  {
101
  "epoch": 7.1,
102
+ "learning_rate": 6.494999999999999e-05,
103
+ "loss": 1.6588,
104
  "step": 1300
105
  },
106
  {
107
  "epoch": 7.65,
108
+ "learning_rate": 6.995e-05,
109
+ "loss": 1.5644,
110
  "step": 1400
111
  },
112
  {
113
  "epoch": 8.2,
114
+ "learning_rate": 7.495e-05,
115
+ "loss": 1.4767,
116
  "step": 1500
117
  },
118
  {
119
  "epoch": 8.2,
120
+ "eval_loss": 0.47794264554977417,
121
+ "eval_runtime": 128.1544,
122
+ "eval_samples_per_second": 20.444,
123
+ "eval_steps_per_second": 2.559,
124
+ "eval_wer": 0.5776572037593256,
125
  "step": 1500
126
  },
127
  {
128
  "epoch": 8.74,
129
+ "learning_rate": 7.995e-05,
130
+ "loss": 1.4421,
131
  "step": 1600
132
  },
133
  {
134
  "epoch": 9.29,
135
+ "learning_rate": 8.495e-05,
136
+ "loss": 1.4009,
137
  "step": 1700
138
  },
139
  {
140
  "epoch": 9.84,
141
+ "learning_rate": 8.995e-05,
142
+ "loss": 1.3682,
143
  "step": 1800
144
  },
145
  {
146
  "epoch": 10.38,
147
+ "learning_rate": 9.495e-05,
148
+ "loss": 1.3377,
149
  "step": 1900
150
  },
151
  {
152
  "epoch": 10.93,
153
+ "learning_rate": 9.995e-05,
154
+ "loss": 1.3152,
155
  "step": 2000
156
  },
157
  {
158
  "epoch": 10.93,
159
+ "eval_loss": 0.36967846751213074,
160
+ "eval_runtime": 127.926,
161
+ "eval_samples_per_second": 20.481,
162
+ "eval_steps_per_second": 2.564,
163
+ "eval_wer": 0.49384749539773276,
164
  "step": 2000
165
  },
166
  {
167
  "epoch": 11.48,
168
+ "learning_rate": 9.939877300613497e-05,
169
+ "loss": 1.3009,
170
  "step": 2100
171
  },
172
  {
173
  "epoch": 12.02,
174
+ "learning_rate": 9.878527607361964e-05,
175
+ "loss": 1.2799,
176
  "step": 2200
177
  },
178
  {
179
  "epoch": 12.57,
180
+ "learning_rate": 9.81717791411043e-05,
181
+ "loss": 1.2559,
182
  "step": 2300
183
  },
184
  {
185
  "epoch": 13.11,
186
+ "learning_rate": 9.756441717791411e-05,
187
+ "loss": 1.2379,
188
  "step": 2400
189
  },
190
  {
191
  "epoch": 13.66,
192
+ "learning_rate": 9.695092024539878e-05,
193
+ "loss": 1.2246,
194
  "step": 2500
195
  },
196
  {
197
  "epoch": 13.66,
198
+ "eval_loss": 0.3084094822406769,
199
+ "eval_runtime": 127.6781,
200
+ "eval_samples_per_second": 20.52,
201
+ "eval_steps_per_second": 2.569,
202
+ "eval_wer": 0.44593547136905337,
203
  "step": 2500
204
  },
205
  {
206
  "epoch": 14.21,
207
+ "learning_rate": 9.633742331288344e-05,
208
+ "loss": 1.2209,
209
  "step": 2600
210
  },
211
  {
212
  "epoch": 14.75,
213
+ "learning_rate": 9.57239263803681e-05,
214
+ "loss": 1.1972,
215
  "step": 2700
216
  },
217
  {
218
  "epoch": 15.3,
219
+ "learning_rate": 9.511042944785277e-05,
220
+ "loss": 1.1885,
221
  "step": 2800
222
  },
223
  {
224
  "epoch": 15.85,
225
+ "learning_rate": 9.449693251533743e-05,
226
+ "loss": 1.1807,
227
  "step": 2900
228
  },
229
  {
230
  "epoch": 16.39,
231
+ "learning_rate": 9.388343558282209e-05,
232
+ "loss": 1.1781,
233
  "step": 3000
234
  },
235
  {
236
  "epoch": 16.39,
237
+ "eval_loss": 0.2842142581939697,
238
+ "eval_runtime": 128.3875,
239
+ "eval_samples_per_second": 20.407,
240
+ "eval_steps_per_second": 2.555,
241
+ "eval_wer": 0.4154151729483577,
242
  "step": 3000
243
  },
244
  {
245
  "epoch": 16.94,
246
+ "learning_rate": 9.326993865030675e-05,
247
+ "loss": 1.1757,
248
  "step": 3100
249
  },
250
  {
251
  "epoch": 17.49,
252
+ "learning_rate": 9.265644171779141e-05,
253
+ "loss": 1.155,
254
  "step": 3200
255
  },
256
  {
257
  "epoch": 18.03,
258
+ "learning_rate": 9.204294478527608e-05,
259
+ "loss": 1.1455,
260
  "step": 3300
261
  },
262
  {
263
  "epoch": 18.58,
264
+ "learning_rate": 9.142944785276074e-05,
265
+ "loss": 1.1376,
266
  "step": 3400
267
  },
268
  {
269
  "epoch": 19.13,
270
+ "learning_rate": 9.081595092024541e-05,
271
+ "loss": 1.1351,
272
  "step": 3500
273
  },
274
  {
275
  "epoch": 19.13,
276
+ "eval_loss": 0.26151829957962036,
277
+ "eval_runtime": 127.9176,
278
+ "eval_samples_per_second": 20.482,
279
+ "eval_steps_per_second": 2.564,
280
+ "eval_wer": 0.3929367309369247,
281
  "step": 3500
282
  },
283
  {
284
  "epoch": 19.67,
285
+ "learning_rate": 9.020245398773006e-05,
286
+ "loss": 1.1262,
287
  "step": 3600
288
  },
289
  {
290
  "epoch": 20.22,
291
+ "learning_rate": 8.958895705521472e-05,
292
+ "loss": 1.1265,
293
  "step": 3700
294
  },
295
  {
296
  "epoch": 20.77,
297
+ "learning_rate": 8.897546012269939e-05,
298
+ "loss": 1.1033,
299
  "step": 3800
300
  },
301
  {
302
  "epoch": 21.31,
303
+ "learning_rate": 8.836196319018405e-05,
304
+ "loss": 1.1016,
305
  "step": 3900
306
  },
307
  {
308
  "epoch": 21.86,
309
+ "learning_rate": 8.774846625766872e-05,
310
+ "loss": 1.1052,
311
  "step": 4000
312
  },
313
  {
314
  "epoch": 21.86,
315
+ "eval_loss": 0.24618586897850037,
316
+ "eval_runtime": 128.2681,
317
+ "eval_samples_per_second": 20.426,
318
+ "eval_steps_per_second": 2.557,
319
+ "eval_wer": 0.3746729968026354,
320
  "step": 4000
321
  },
322
  {
323
  "epoch": 22.4,
324
+ "learning_rate": 8.714110429447854e-05,
325
+ "loss": 1.0964,
326
  "step": 4100
327
  },
328
  {
329
  "epoch": 22.95,
330
+ "learning_rate": 8.652760736196319e-05,
331
+ "loss": 1.0848,
332
  "step": 4200
333
  },
334
  {
335
  "epoch": 23.5,
336
+ "learning_rate": 8.591411042944786e-05,
337
+ "loss": 1.0714,
338
  "step": 4300
339
  },
340
  {
341
  "epoch": 24.04,
342
+ "learning_rate": 8.530061349693252e-05,
343
+ "loss": 1.0696,
344
  "step": 4400
345
  },
346
  {
347
  "epoch": 24.59,
348
+ "learning_rate": 8.468711656441717e-05,
349
+ "loss": 1.0711,
350
  "step": 4500
351
  },
352
  {
353
  "epoch": 24.59,
354
+ "eval_loss": 0.23661433160305023,
355
+ "eval_runtime": 128.1823,
356
+ "eval_samples_per_second": 20.44,
357
+ "eval_steps_per_second": 2.559,
358
+ "eval_wer": 0.36522623776765817,
359
  "step": 4500
360
  },
361
  {
362
  "epoch": 25.14,
363
+ "learning_rate": 8.407361963190185e-05,
364
+ "loss": 1.0519,
365
  "step": 4600
366
  },
367
  {
368
  "epoch": 25.68,
369
+ "learning_rate": 8.346012269938652e-05,
370
+ "loss": 1.0673,
371
  "step": 4700
372
  },
373
  {
374
  "epoch": 26.23,
375
+ "learning_rate": 8.284662576687117e-05,
376
+ "loss": 1.0606,
377
  "step": 4800
378
  },
379
  {
380
  "epoch": 26.78,
381
+ "learning_rate": 8.223312883435583e-05,
382
+ "loss": 1.0711,
383
  "step": 4900
384
  },
385
  {
386
  "epoch": 27.32,
387
+ "learning_rate": 8.16196319018405e-05,
388
+ "loss": 1.035,
389
  "step": 5000
390
  },
391
  {
392
  "epoch": 27.32,
393
+ "eval_loss": 0.22680768370628357,
394
+ "eval_runtime": 127.7363,
395
+ "eval_samples_per_second": 20.511,
396
+ "eval_steps_per_second": 2.568,
397
+ "eval_wer": 0.3556825888964248,
398
  "step": 5000
399
  },
400
  {
401
  "epoch": 27.87,
402
+ "learning_rate": 8.100613496932515e-05,
403
+ "loss": 1.0428,
404
  "step": 5100
405
  },
406
  {
407
  "epoch": 28.42,
408
+ "learning_rate": 8.039877300613497e-05,
409
+ "loss": 1.0322,
410
  "step": 5200
411
  },
412
  {
413
  "epoch": 28.96,
414
+ "learning_rate": 7.978527607361964e-05,
415
+ "loss": 1.0291,
416
  "step": 5300
417
  },
418
  {
419
  "epoch": 29.51,
420
+ "learning_rate": 7.91717791411043e-05,
421
+ "loss": 1.0258,
422
  "step": 5400
423
  },
424
  {
425
  "epoch": 30.05,
426
+ "learning_rate": 7.855828220858897e-05,
427
+ "loss": 1.0277,
428
  "step": 5500
429
  },
430
  {
431
  "epoch": 30.05,
432
+ "eval_loss": 0.22434431314468384,
433
+ "eval_runtime": 127.9511,
434
+ "eval_samples_per_second": 20.477,
435
+ "eval_steps_per_second": 2.563,
436
+ "eval_wer": 0.3449762619901172,
437
  "step": 5500
438
  },
439
  {
440
  "epoch": 30.6,
441
+ "learning_rate": 7.794478527607363e-05,
442
+ "loss": 1.0208,
443
  "step": 5600
444
  },
445
  {
446
  "epoch": 31.15,
447
+ "learning_rate": 7.733128834355828e-05,
448
+ "loss": 1.0252,
449
  "step": 5700
450
  },
451
  {
452
  "epoch": 31.69,
453
+ "learning_rate": 7.671779141104295e-05,
454
+ "loss": 1.0157,
455
  "step": 5800
456
  },
457
  {
458
  "epoch": 32.24,
459
+ "learning_rate": 7.610429447852761e-05,
460
+ "loss": 1.0094,
461
  "step": 5900
462
  },
463
  {
464
  "epoch": 32.79,
465
+ "learning_rate": 7.549079754601228e-05,
466
+ "loss": 1.002,
467
  "step": 6000
468
  },
469
  {
470
  "epoch": 32.79,
471
+ "eval_loss": 0.22044695913791656,
472
+ "eval_runtime": 127.6026,
473
+ "eval_samples_per_second": 20.532,
474
+ "eval_steps_per_second": 2.57,
475
+ "eval_wer": 0.3388722023059781,
476
  "step": 6000
477
  },
478
  {
479
  "epoch": 33.33,
480
+ "learning_rate": 7.487730061349694e-05,
481
+ "loss": 0.996,
482
  "step": 6100
483
  },
484
  {
485
  "epoch": 33.88,
486
+ "learning_rate": 7.42638036809816e-05,
487
+ "loss": 1.002,
488
  "step": 6200
489
  },
490
  {
491
  "epoch": 34.43,
492
+ "learning_rate": 7.365030674846626e-05,
493
+ "loss": 0.9845,
494
  "step": 6300
495
  },
496
  {
497
  "epoch": 34.97,
498
+ "learning_rate": 7.303680981595092e-05,
499
+ "loss": 0.9958,
500
  "step": 6400
501
  },
502
  {
503
  "epoch": 35.52,
504
+ "learning_rate": 7.242331288343559e-05,
505
+ "loss": 0.9837,
506
  "step": 6500
507
  },
508
  {
509
  "epoch": 35.52,
510
+ "eval_loss": 0.2156379520893097,
511
+ "eval_runtime": 127.7813,
512
+ "eval_samples_per_second": 20.504,
513
+ "eval_steps_per_second": 2.567,
514
+ "eval_wer": 0.33489971901947485,
515
  "step": 6500
516
  },
517
  {
518
  "epoch": 36.07,
519
+ "learning_rate": 7.180981595092025e-05,
520
+ "loss": 1.0028,
521
  "step": 6600
522
  },
523
  {
524
  "epoch": 36.61,
525
+ "learning_rate": 7.119631901840491e-05,
526
+ "loss": 0.9722,
527
  "step": 6700
528
  },
529
  {
530
  "epoch": 37.16,
531
+ "learning_rate": 7.058282208588958e-05,
532
+ "loss": 0.9784,
533
  "step": 6800
534
  },
535
  {
536
  "epoch": 37.7,
537
+ "learning_rate": 6.996932515337423e-05,
538
+ "loss": 0.9822,
539
  "step": 6900
540
  },
541
  {
542
  "epoch": 38.25,
543
+ "learning_rate": 6.93558282208589e-05,
544
+ "loss": 0.9773,
545
  "step": 7000
546
  },
547
  {
548
  "epoch": 38.25,
549
+ "eval_loss": 0.21265123784542084,
550
+ "eval_runtime": 128.3357,
551
+ "eval_samples_per_second": 20.415,
552
+ "eval_steps_per_second": 2.556,
553
+ "eval_wer": 0.3288925491715919,
554
  "step": 7000
555
  },
556
  {
557
  "epoch": 38.8,
558
+ "learning_rate": 6.874233128834356e-05,
559
+ "loss": 0.9649,
560
  "step": 7100
561
  },
562
  {
563
  "epoch": 39.34,
564
+ "learning_rate": 6.812883435582822e-05,
565
+ "loss": 0.9728,
566
  "step": 7200
567
  },
568
  {
569
  "epoch": 39.89,
570
+ "learning_rate": 6.751533742331289e-05,
571
+ "loss": 0.9663,
572
  "step": 7300
573
  },
574
  {
575
  "epoch": 40.44,
576
+ "learning_rate": 6.690184049079755e-05,
577
+ "loss": 0.9762,
578
  "step": 7400
579
  },
580
  {
581
  "epoch": 40.98,
582
+ "learning_rate": 6.629447852760736e-05,
583
+ "loss": 0.9807,
584
  "step": 7500
585
  },
586
  {
587
  "epoch": 40.98,
588
+ "eval_loss": 0.21417580544948578,
589
+ "eval_runtime": 128.3216,
590
+ "eval_samples_per_second": 20.417,
591
+ "eval_steps_per_second": 2.556,
592
+ "eval_wer": 0.32743920162774925,
593
  "step": 7500
594
  },
595
  {
596
  "epoch": 41.53,
597
+ "learning_rate": 6.568098159509203e-05,
598
+ "loss": 0.9647,
599
  "step": 7600
600
  },
601
  {
602
  "epoch": 42.08,
603
+ "learning_rate": 6.506748466257669e-05,
604
+ "loss": 0.9748,
605
  "step": 7700
606
  },
607
  {
608
  "epoch": 42.62,
609
+ "learning_rate": 6.445398773006134e-05,
610
+ "loss": 0.9484,
611
  "step": 7800
612
  },
613
  {
614
  "epoch": 43.17,
615
+ "learning_rate": 6.384049079754602e-05,
616
+ "loss": 0.9558,
617
  "step": 7900
618
  },
619
  {
620
  "epoch": 43.72,
621
+ "learning_rate": 6.322699386503069e-05,
622
+ "loss": 0.9582,
623
  "step": 8000
624
  },
625
  {
626
  "epoch": 43.72,
627
+ "eval_loss": 0.20038354396820068,
628
+ "eval_runtime": 127.7709,
629
+ "eval_samples_per_second": 20.505,
630
+ "eval_steps_per_second": 2.567,
631
+ "eval_wer": 0.314165294060653,
632
  "step": 8000
633
  },
634
  {
635
  "epoch": 44.26,
636
+ "learning_rate": 6.261349693251534e-05,
637
+ "loss": 0.949,
638
  "step": 8100
639
  },
640
  {
641
  "epoch": 44.81,
642
+ "learning_rate": 6.2e-05,
643
+ "loss": 0.9491,
644
  "step": 8200
645
  },
646
  {
647
  "epoch": 45.36,
648
+ "learning_rate": 6.138650306748467e-05,
649
+ "loss": 0.9447,
650
  "step": 8300
651
  },
652
  {
653
  "epoch": 45.9,
654
+ "learning_rate": 6.0773006134969325e-05,
655
+ "loss": 0.9368,
656
  "step": 8400
657
  },
658
  {
659
  "epoch": 46.45,
660
+ "learning_rate": 6.015950920245399e-05,
661
+ "loss": 0.9548,
662
  "step": 8500
663
  },
664
  {
665
  "epoch": 46.45,
666
+ "eval_loss": 0.20219053328037262,
667
+ "eval_runtime": 127.6869,
668
+ "eval_samples_per_second": 20.519,
669
+ "eval_steps_per_second": 2.569,
670
+ "eval_wer": 0.30500920453444436,
671
  "step": 8500
672
  },
673
  {
674
  "epoch": 46.99,
675
+ "learning_rate": 5.9546012269938655e-05,
676
+ "loss": 0.9407,
677
  "step": 8600
678
  },
679
  {
680
  "epoch": 47.54,
681
+ "learning_rate": 5.893251533742331e-05,
682
+ "loss": 0.9312,
683
  "step": 8700
684
  },
685
  {
686
  "epoch": 48.09,
687
+ "learning_rate": 5.831901840490798e-05,
688
+ "loss": 0.9446,
689
  "step": 8800
690
  },
691
  {
692
  "epoch": 48.63,
693
+ "learning_rate": 5.770552147239264e-05,
694
+ "loss": 0.9225,
695
  "step": 8900
696
  },
697
  {
698
  "epoch": 49.18,
699
+ "learning_rate": 5.70920245398773e-05,
700
+ "loss": 0.9251,
701
  "step": 9000
702
  },
703
  {
704
  "epoch": 49.18,
705
+ "eval_loss": 0.2018980085849762,
706
+ "eval_runtime": 130.6299,
707
+ "eval_samples_per_second": 20.057,
708
+ "eval_steps_per_second": 2.511,
709
+ "eval_wer": 0.3035074120724736,
710
  "step": 9000
711
  },
712
  {
713
  "epoch": 49.73,
714
+ "learning_rate": 5.6478527607361965e-05,
715
+ "loss": 0.9251,
716
  "step": 9100
717
  },
718
  {
719
+ "epoch": 50.27,
720
+ "learning_rate": 5.586503067484663e-05,
721
+ "loss": 0.9172,
722
+ "step": 9200
723
+ },
724
+ {
725
+ "epoch": 50.82,
726
+ "learning_rate": 5.5251533742331294e-05,
727
+ "loss": 0.9103,
728
+ "step": 9300
729
+ },
730
+ {
731
+ "epoch": 51.37,
732
+ "learning_rate": 5.463803680981595e-05,
733
+ "loss": 0.9133,
734
+ "step": 9400
735
+ },
736
+ {
737
+ "epoch": 51.91,
738
+ "learning_rate": 5.402453987730062e-05,
739
+ "loss": 0.9103,
740
+ "step": 9500
741
+ },
742
+ {
743
+ "epoch": 51.91,
744
+ "eval_loss": 0.1963759958744049,
745
+ "eval_runtime": 127.8377,
746
+ "eval_samples_per_second": 20.495,
747
+ "eval_steps_per_second": 2.566,
748
+ "eval_wer": 0.30210250944675904,
749
+ "step": 9500
750
+ },
751
+ {
752
+ "epoch": 52.46,
753
+ "learning_rate": 5.341104294478528e-05,
754
+ "loss": 0.9109,
755
+ "step": 9600
756
+ },
757
+ {
758
+ "epoch": 53.01,
759
+ "learning_rate": 5.279754601226994e-05,
760
+ "loss": 0.9153,
761
+ "step": 9700
762
+ },
763
+ {
764
+ "epoch": 53.55,
765
+ "learning_rate": 5.2184049079754604e-05,
766
+ "loss": 0.9113,
767
+ "step": 9800
768
+ },
769
+ {
770
+ "epoch": 54.1,
771
+ "learning_rate": 5.157055214723927e-05,
772
+ "loss": 0.9181,
773
+ "step": 9900
774
+ },
775
+ {
776
+ "epoch": 54.64,
777
+ "learning_rate": 5.095705521472393e-05,
778
+ "loss": 0.915,
779
+ "step": 10000
780
+ },
781
+ {
782
+ "epoch": 54.64,
783
+ "eval_loss": 0.19702854752540588,
784
+ "eval_runtime": 128.314,
785
+ "eval_samples_per_second": 20.419,
786
+ "eval_steps_per_second": 2.556,
787
+ "eval_wer": 0.30316829764557696,
788
+ "step": 10000
789
+ },
790
+ {
791
+ "epoch": 55.19,
792
+ "learning_rate": 5.034969325153375e-05,
793
+ "loss": 0.8971,
794
+ "step": 10100
795
+ },
796
+ {
797
+ "epoch": 55.74,
798
+ "learning_rate": 4.973619631901841e-05,
799
+ "loss": 0.9066,
800
+ "step": 10200
801
+ },
802
+ {
803
+ "epoch": 56.28,
804
+ "learning_rate": 4.9122699386503065e-05,
805
+ "loss": 0.8993,
806
+ "step": 10300
807
+ },
808
+ {
809
+ "epoch": 56.83,
810
+ "learning_rate": 4.850920245398774e-05,
811
+ "loss": 0.8933,
812
+ "step": 10400
813
+ },
814
+ {
815
+ "epoch": 57.38,
816
+ "learning_rate": 4.7895705521472395e-05,
817
+ "loss": 0.8962,
818
+ "step": 10500
819
+ },
820
+ {
821
+ "epoch": 57.38,
822
+ "eval_loss": 0.2006961703300476,
823
+ "eval_runtime": 128.3801,
824
+ "eval_samples_per_second": 20.408,
825
+ "eval_steps_per_second": 2.555,
826
+ "eval_wer": 0.30462164518941964,
827
+ "step": 10500
828
+ },
829
+ {
830
+ "epoch": 57.92,
831
+ "learning_rate": 4.728220858895705e-05,
832
+ "loss": 0.8829,
833
+ "step": 10600
834
+ },
835
+ {
836
+ "epoch": 58.47,
837
+ "learning_rate": 4.6668711656441724e-05,
838
+ "loss": 0.8932,
839
+ "step": 10700
840
+ },
841
+ {
842
+ "epoch": 59.02,
843
+ "learning_rate": 4.605521472392638e-05,
844
+ "loss": 0.8986,
845
+ "step": 10800
846
+ },
847
+ {
848
+ "epoch": 59.56,
849
+ "learning_rate": 4.544171779141104e-05,
850
+ "loss": 0.8892,
851
+ "step": 10900
852
+ },
853
+ {
854
+ "epoch": 60.11,
855
+ "learning_rate": 4.482822085889571e-05,
856
+ "loss": 0.8729,
857
+ "step": 11000
858
+ },
859
+ {
860
+ "epoch": 60.11,
861
+ "eval_loss": 0.19668185710906982,
862
+ "eval_runtime": 128.5612,
863
+ "eval_samples_per_second": 20.379,
864
+ "eval_steps_per_second": 2.551,
865
+ "eval_wer": 0.2942059877918806,
866
+ "step": 11000
867
+ },
868
+ {
869
+ "epoch": 60.66,
870
+ "learning_rate": 4.421472392638037e-05,
871
+ "loss": 0.876,
872
+ "step": 11100
873
+ },
874
+ {
875
+ "epoch": 61.2,
876
+ "learning_rate": 4.3601226993865034e-05,
877
+ "loss": 0.8759,
878
+ "step": 11200
879
+ },
880
+ {
881
+ "epoch": 61.75,
882
+ "learning_rate": 4.29877300613497e-05,
883
+ "loss": 0.8813,
884
+ "step": 11300
885
+ },
886
+ {
887
+ "epoch": 62.3,
888
+ "learning_rate": 4.237423312883436e-05,
889
+ "loss": 0.8684,
890
+ "step": 11400
891
+ },
892
+ {
893
+ "epoch": 62.84,
894
+ "learning_rate": 4.176073619631902e-05,
895
+ "loss": 0.8744,
896
+ "step": 11500
897
+ },
898
+ {
899
+ "epoch": 62.84,
900
+ "eval_loss": 0.19520752131938934,
901
+ "eval_runtime": 127.705,
902
+ "eval_samples_per_second": 20.516,
903
+ "eval_steps_per_second": 2.568,
904
+ "eval_wer": 0.2885379323708943,
905
+ "step": 11500
906
+ },
907
+ {
908
+ "epoch": 63.39,
909
+ "learning_rate": 4.1147239263803686e-05,
910
+ "loss": 0.8665,
911
+ "step": 11600
912
+ },
913
+ {
914
+ "epoch": 63.93,
915
+ "learning_rate": 4.0533742331288344e-05,
916
+ "loss": 0.8757,
917
+ "step": 11700
918
+ },
919
+ {
920
+ "epoch": 64.48,
921
+ "learning_rate": 3.992024539877301e-05,
922
+ "loss": 0.8694,
923
+ "step": 11800
924
+ },
925
+ {
926
+ "epoch": 65.03,
927
+ "learning_rate": 3.930674846625767e-05,
928
+ "loss": 0.8578,
929
+ "step": 11900
930
+ },
931
+ {
932
+ "epoch": 65.57,
933
+ "learning_rate": 3.869938650306748e-05,
934
+ "loss": 0.874,
935
+ "step": 12000
936
+ },
937
+ {
938
+ "epoch": 65.57,
939
+ "eval_loss": 0.18939977884292603,
940
+ "eval_runtime": 128.0037,
941
+ "eval_samples_per_second": 20.468,
942
+ "eval_steps_per_second": 2.562,
943
+ "eval_wer": 0.28950683073345607,
944
+ "step": 12000
945
+ },
946
+ {
947
+ "epoch": 66.12,
948
+ "learning_rate": 3.808588957055215e-05,
949
+ "loss": 0.8628,
950
+ "step": 12100
951
+ },
952
+ {
953
+ "epoch": 66.67,
954
+ "learning_rate": 3.747239263803681e-05,
955
+ "loss": 0.8564,
956
+ "step": 12200
957
+ },
958
+ {
959
+ "epoch": 67.21,
960
+ "learning_rate": 3.685889570552147e-05,
961
+ "loss": 0.8502,
962
+ "step": 12300
963
+ },
964
+ {
965
+ "epoch": 67.76,
966
+ "learning_rate": 3.6245398773006135e-05,
967
+ "loss": 0.8521,
968
+ "step": 12400
969
+ },
970
+ {
971
+ "epoch": 68.31,
972
+ "learning_rate": 3.56319018404908e-05,
973
+ "loss": 0.8457,
974
+ "step": 12500
975
+ },
976
+ {
977
+ "epoch": 68.31,
978
+ "eval_loss": 0.18946239352226257,
979
+ "eval_runtime": 128.0623,
980
+ "eval_samples_per_second": 20.459,
981
+ "eval_steps_per_second": 2.561,
982
+ "eval_wer": 0.28282143203177984,
983
+ "step": 12500
984
+ },
985
+ {
986
+ "epoch": 68.85,
987
+ "learning_rate": 3.501840490797546e-05,
988
+ "loss": 0.8624,
989
+ "step": 12600
990
+ },
991
+ {
992
+ "epoch": 69.4,
993
+ "learning_rate": 3.440490797546013e-05,
994
+ "loss": 0.8394,
995
+ "step": 12700
996
+ },
997
+ {
998
+ "epoch": 69.95,
999
+ "learning_rate": 3.379141104294479e-05,
1000
+ "loss": 0.8381,
1001
+ "step": 12800
1002
+ },
1003
+ {
1004
+ "epoch": 70.49,
1005
+ "learning_rate": 3.3177914110429445e-05,
1006
+ "loss": 0.8431,
1007
+ "step": 12900
1008
+ },
1009
+ {
1010
+ "epoch": 71.04,
1011
+ "learning_rate": 3.2564417177914117e-05,
1012
+ "loss": 0.8519,
1013
+ "step": 13000
1014
+ },
1015
+ {
1016
+ "epoch": 71.04,
1017
+ "eval_loss": 0.19119836390018463,
1018
+ "eval_runtime": 127.8936,
1019
+ "eval_samples_per_second": 20.486,
1020
+ "eval_steps_per_second": 2.565,
1021
+ "eval_wer": 0.28747214417207634,
1022
+ "step": 13000
1023
+ },
1024
+ {
1025
+ "epoch": 71.58,
1026
+ "learning_rate": 3.1950920245398774e-05,
1027
+ "loss": 0.8463,
1028
+ "step": 13100
1029
+ },
1030
+ {
1031
+ "epoch": 72.13,
1032
+ "learning_rate": 3.133742331288343e-05,
1033
+ "loss": 0.8492,
1034
+ "step": 13200
1035
+ },
1036
+ {
1037
+ "epoch": 72.68,
1038
+ "learning_rate": 3.0723926380368104e-05,
1039
+ "loss": 0.8354,
1040
+ "step": 13300
1041
+ },
1042
+ {
1043
+ "epoch": 73.22,
1044
+ "learning_rate": 3.0110429447852762e-05,
1045
+ "loss": 0.8408,
1046
+ "step": 13400
1047
+ },
1048
+ {
1049
+ "epoch": 73.77,
1050
+ "learning_rate": 2.9496932515337423e-05,
1051
+ "loss": 0.8301,
1052
+ "step": 13500
1053
+ },
1054
+ {
1055
+ "epoch": 73.77,
1056
+ "eval_loss": 0.18781304359436035,
1057
+ "eval_runtime": 127.7845,
1058
+ "eval_samples_per_second": 20.503,
1059
+ "eval_steps_per_second": 2.567,
1060
+ "eval_wer": 0.2760391434938475,
1061
+ "step": 13500
1062
+ },
1063
+ {
1064
+ "epoch": 74.32,
1065
+ "learning_rate": 2.8883435582822088e-05,
1066
+ "loss": 0.8425,
1067
+ "step": 13600
1068
+ },
1069
+ {
1070
+ "epoch": 74.86,
1071
+ "learning_rate": 2.826993865030675e-05,
1072
+ "loss": 0.8416,
1073
+ "step": 13700
1074
+ },
1075
+ {
1076
+ "epoch": 75.41,
1077
+ "learning_rate": 2.7656441717791414e-05,
1078
+ "loss": 0.8269,
1079
+ "step": 13800
1080
+ },
1081
+ {
1082
+ "epoch": 75.96,
1083
+ "learning_rate": 2.7042944785276075e-05,
1084
+ "loss": 0.8212,
1085
+ "step": 13900
1086
+ },
1087
+ {
1088
+ "epoch": 76.5,
1089
+ "learning_rate": 2.6429447852760736e-05,
1090
+ "loss": 0.8226,
1091
+ "step": 14000
1092
+ },
1093
+ {
1094
+ "epoch": 76.5,
1095
+ "eval_loss": 0.18075355887413025,
1096
+ "eval_runtime": 127.7671,
1097
+ "eval_samples_per_second": 20.506,
1098
+ "eval_steps_per_second": 2.567,
1099
+ "eval_wer": 0.2701288634822207,
1100
+ "step": 14000
1101
+ },
1102
+ {
1103
+ "epoch": 77.05,
1104
+ "learning_rate": 2.58159509202454e-05,
1105
+ "loss": 0.8265,
1106
+ "step": 14100
1107
+ },
1108
+ {
1109
+ "epoch": 77.6,
1110
+ "learning_rate": 2.5202453987730063e-05,
1111
+ "loss": 0.8279,
1112
+ "step": 14200
1113
+ },
1114
+ {
1115
+ "epoch": 78.14,
1116
+ "learning_rate": 2.4588957055214727e-05,
1117
+ "loss": 0.8273,
1118
+ "step": 14300
1119
+ },
1120
+ {
1121
+ "epoch": 78.69,
1122
+ "learning_rate": 2.3975460122699385e-05,
1123
+ "loss": 0.8067,
1124
+ "step": 14400
1125
+ },
1126
+ {
1127
+ "epoch": 79.23,
1128
+ "learning_rate": 2.33680981595092e-05,
1129
+ "loss": 0.8071,
1130
+ "step": 14500
1131
+ },
1132
+ {
1133
+ "epoch": 79.23,
1134
+ "eval_loss": 0.18486912548542023,
1135
+ "eval_runtime": 127.9551,
1136
+ "eval_samples_per_second": 20.476,
1137
+ "eval_steps_per_second": 2.563,
1138
+ "eval_wer": 0.27410134676872394,
1139
+ "step": 14500
1140
+ },
1141
+ {
1142
+ "epoch": 79.78,
1143
+ "learning_rate": 2.2754601226993866e-05,
1144
+ "loss": 0.8231,
1145
+ "step": 14600
1146
+ },
1147
+ {
1148
+ "epoch": 80.33,
1149
+ "learning_rate": 2.214110429447853e-05,
1150
+ "loss": 0.808,
1151
+ "step": 14700
1152
+ },
1153
+ {
1154
+ "epoch": 80.87,
1155
+ "learning_rate": 2.1527607361963192e-05,
1156
+ "loss": 0.8107,
1157
+ "step": 14800
1158
+ },
1159
+ {
1160
+ "epoch": 81.42,
1161
+ "learning_rate": 2.0914110429447853e-05,
1162
+ "loss": 0.7966,
1163
+ "step": 14900
1164
+ },
1165
+ {
1166
+ "epoch": 81.97,
1167
+ "learning_rate": 2.0300613496932515e-05,
1168
+ "loss": 0.7999,
1169
+ "step": 15000
1170
+ },
1171
+ {
1172
+ "epoch": 81.97,
1173
+ "eval_loss": 0.18083913624286652,
1174
+ "eval_runtime": 127.4398,
1175
+ "eval_samples_per_second": 20.559,
1176
+ "eval_steps_per_second": 2.574,
1177
+ "eval_wer": 0.2717275457804476,
1178
+ "step": 15000
1179
+ },
1180
+ {
1181
+ "epoch": 82.51,
1182
+ "learning_rate": 1.968711656441718e-05,
1183
+ "loss": 0.8171,
1184
+ "step": 15100
1185
+ },
1186
+ {
1187
+ "epoch": 83.06,
1188
+ "learning_rate": 1.907361963190184e-05,
1189
+ "loss": 0.8034,
1190
+ "step": 15200
1191
+ },
1192
+ {
1193
+ "epoch": 83.61,
1194
+ "learning_rate": 1.8460122699386502e-05,
1195
+ "loss": 0.8086,
1196
+ "step": 15300
1197
+ },
1198
+ {
1199
+ "epoch": 84.15,
1200
+ "learning_rate": 1.7846625766871167e-05,
1201
+ "loss": 0.8005,
1202
+ "step": 15400
1203
+ },
1204
+ {
1205
+ "epoch": 84.7,
1206
+ "learning_rate": 1.723312883435583e-05,
1207
+ "loss": 0.7947,
1208
+ "step": 15500
1209
+ },
1210
+ {
1211
+ "epoch": 84.7,
1212
+ "eval_loss": 0.1820572018623352,
1213
+ "eval_runtime": 127.1268,
1214
+ "eval_samples_per_second": 20.609,
1215
+ "eval_steps_per_second": 2.58,
1216
+ "eval_wer": 0.2715822110260634,
1217
+ "step": 15500
1218
+ },
1219
+ {
1220
+ "epoch": 85.25,
1221
+ "learning_rate": 1.661963190184049e-05,
1222
+ "loss": 0.7947,
1223
+ "step": 15600
1224
+ },
1225
+ {
1226
+ "epoch": 85.79,
1227
+ "learning_rate": 1.6006134969325154e-05,
1228
+ "loss": 0.7892,
1229
+ "step": 15700
1230
+ },
1231
+ {
1232
+ "epoch": 86.34,
1233
+ "learning_rate": 1.539263803680982e-05,
1234
+ "loss": 0.7969,
1235
+ "step": 15800
1236
+ },
1237
+ {
1238
+ "epoch": 86.89,
1239
+ "learning_rate": 1.477914110429448e-05,
1240
+ "loss": 0.801,
1241
+ "step": 15900
1242
+ },
1243
+ {
1244
+ "epoch": 87.43,
1245
+ "learning_rate": 1.4165644171779141e-05,
1246
+ "loss": 0.7783,
1247
+ "step": 16000
1248
+ },
1249
+ {
1250
+ "epoch": 87.43,
1251
+ "eval_loss": 0.18241995573043823,
1252
+ "eval_runtime": 127.0573,
1253
+ "eval_samples_per_second": 20.621,
1254
+ "eval_steps_per_second": 2.582,
1255
+ "eval_wer": 0.26610793527758936,
1256
+ "step": 16000
1257
+ },
1258
+ {
1259
+ "epoch": 87.98,
1260
+ "learning_rate": 1.3552147239263804e-05,
1261
+ "loss": 0.7915,
1262
+ "step": 16100
1263
+ },
1264
+ {
1265
+ "epoch": 88.52,
1266
+ "learning_rate": 1.2938650306748467e-05,
1267
+ "loss": 0.7862,
1268
+ "step": 16200
1269
+ },
1270
+ {
1271
+ "epoch": 89.07,
1272
+ "learning_rate": 1.232515337423313e-05,
1273
+ "loss": 0.7816,
1274
+ "step": 16300
1275
+ },
1276
+ {
1277
+ "epoch": 89.62,
1278
+ "learning_rate": 1.1711656441717792e-05,
1279
+ "loss": 0.7816,
1280
+ "step": 16400
1281
+ },
1282
+ {
1283
+ "epoch": 90.16,
1284
+ "learning_rate": 1.1098159509202455e-05,
1285
+ "loss": 0.7729,
1286
+ "step": 16500
1287
+ },
1288
+ {
1289
+ "epoch": 90.16,
1290
+ "eval_loss": 0.17727895081043243,
1291
+ "eval_runtime": 128.0807,
1292
+ "eval_samples_per_second": 20.456,
1293
+ "eval_steps_per_second": 2.561,
1294
+ "eval_wer": 0.2638794690436973,
1295
+ "step": 16500
1296
+ },
1297
+ {
1298
+ "epoch": 90.71,
1299
+ "learning_rate": 1.0484662576687116e-05,
1300
+ "loss": 0.7778,
1301
+ "step": 16600
1302
+ },
1303
+ {
1304
+ "epoch": 91.26,
1305
+ "learning_rate": 9.87116564417178e-06,
1306
+ "loss": 0.7844,
1307
+ "step": 16700
1308
+ },
1309
+ {
1310
+ "epoch": 91.8,
1311
+ "learning_rate": 9.257668711656442e-06,
1312
+ "loss": 0.7842,
1313
+ "step": 16800
1314
+ },
1315
+ {
1316
+ "epoch": 92.35,
1317
+ "learning_rate": 8.644171779141105e-06,
1318
+ "loss": 0.7755,
1319
+ "step": 16900
1320
+ },
1321
+ {
1322
+ "epoch": 92.9,
1323
+ "learning_rate": 8.030674846625766e-06,
1324
+ "loss": 0.7759,
1325
+ "step": 17000
1326
+ },
1327
+ {
1328
+ "epoch": 92.9,
1329
+ "eval_loss": 0.17666833102703094,
1330
+ "eval_runtime": 128.2582,
1331
+ "eval_samples_per_second": 20.428,
1332
+ "eval_steps_per_second": 2.557,
1333
+ "eval_wer": 0.26291057068113555,
1334
+ "step": 17000
1335
+ },
1336
+ {
1337
+ "epoch": 93.44,
1338
+ "learning_rate": 7.417177914110429e-06,
1339
+ "loss": 0.7672,
1340
+ "step": 17100
1341
+ },
1342
+ {
1343
+ "epoch": 93.99,
1344
+ "learning_rate": 6.8036809815950924e-06,
1345
+ "loss": 0.7813,
1346
+ "step": 17200
1347
+ },
1348
+ {
1349
+ "epoch": 94.54,
1350
+ "learning_rate": 6.1963190184049085e-06,
1351
+ "loss": 0.7781,
1352
+ "step": 17300
1353
+ },
1354
+ {
1355
+ "epoch": 95.08,
1356
+ "learning_rate": 5.582822085889571e-06,
1357
+ "loss": 0.7711,
1358
+ "step": 17400
1359
+ },
1360
+ {
1361
+ "epoch": 95.63,
1362
+ "learning_rate": 4.969325153374233e-06,
1363
+ "loss": 0.7713,
1364
+ "step": 17500
1365
+ },
1366
+ {
1367
+ "epoch": 95.63,
1368
+ "eval_loss": 0.17804710566997528,
1369
+ "eval_runtime": 127.8273,
1370
+ "eval_samples_per_second": 20.496,
1371
+ "eval_steps_per_second": 2.566,
1372
+ "eval_wer": 0.26208700707295807,
1373
+ "step": 17500
1374
+ },
1375
+ {
1376
+ "epoch": 96.17,
1377
+ "learning_rate": 4.355828220858896e-06,
1378
+ "loss": 0.7789,
1379
+ "step": 17600
1380
+ },
1381
+ {
1382
+ "epoch": 96.72,
1383
+ "learning_rate": 3.7423312883435584e-06,
1384
+ "loss": 0.7732,
1385
+ "step": 17700
1386
+ },
1387
+ {
1388
+ "epoch": 97.27,
1389
+ "learning_rate": 3.128834355828221e-06,
1390
+ "loss": 0.7688,
1391
+ "step": 17800
1392
+ },
1393
+ {
1394
+ "epoch": 97.81,
1395
+ "learning_rate": 2.5153374233128836e-06,
1396
+ "loss": 0.7724,
1397
+ "step": 17900
1398
+ },
1399
+ {
1400
+ "epoch": 98.36,
1401
+ "learning_rate": 1.9018404907975462e-06,
1402
+ "loss": 0.7628,
1403
+ "step": 18000
1404
+ },
1405
+ {
1406
+ "epoch": 98.36,
1407
+ "eval_loss": 0.17734766006469727,
1408
+ "eval_runtime": 128.0092,
1409
+ "eval_samples_per_second": 20.467,
1410
+ "eval_steps_per_second": 2.562,
1411
+ "eval_wer": 0.2594225365759132,
1412
+ "step": 18000
1413
+ },
1414
+ {
1415
+ "epoch": 98.91,
1416
+ "learning_rate": 1.2883435582822088e-06,
1417
+ "loss": 0.7634,
1418
+ "step": 18100
1419
+ },
1420
+ {
1421
+ "epoch": 99.45,
1422
+ "learning_rate": 6.748466257668713e-07,
1423
+ "loss": 0.7638,
1424
+ "step": 18200
1425
+ },
1426
+ {
1427
+ "epoch": 100.0,
1428
+ "learning_rate": 6.134969325153375e-08,
1429
+ "loss": 0.7746,
1430
+ "step": 18300
1431
+ },
1432
+ {
1433
+ "epoch": 100.0,
1434
+ "step": 18300,
1435
+ "total_flos": 1.0288191185677785e+20,
1436
+ "train_loss": 1.1684310275218526,
1437
+ "train_runtime": 46836.9916,
1438
+ "train_samples_per_second": 12.501,
1439
+ "train_steps_per_second": 0.391
1440
  }
1441
  ],
1442
+ "max_steps": 18300,
1443
+ "num_train_epochs": 100,
1444
+ "total_flos": 1.0288191185677785e+20,
1445
  "trial_name": null,
1446
  "trial_params": null
1447
  }