lucio commited on
Commit
a658482
1 Parent(s): aafe559

End of training

Browse files
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
- "epoch": 50.0,
3
- "eval_loss": 0.24299392104148865,
4
- "eval_runtime": 142.3605,
5
  "eval_samples": 2744,
6
- "eval_samples_per_second": 19.275,
7
- "eval_steps_per_second": 2.409,
8
- "eval_wer": 0.3803796021038189,
9
- "train_loss": 1.5738848333155855,
10
- "train_runtime": 24806.553,
11
  "train_samples": 6034,
12
- "train_samples_per_second": 12.162,
13
- "train_steps_per_second": 0.379
14
  }
 
1
  {
2
+ "epoch": 100.0,
3
+ "eval_loss": 0.22402019798755646,
4
+ "eval_runtime": 183.2761,
5
  "eval_samples": 2744,
6
+ "eval_samples_per_second": 14.972,
7
+ "eval_steps_per_second": 1.871,
8
+ "eval_wer": 0.3693335163075797,
9
+ "train_loss": 1.3463122907597969,
10
+ "train_runtime": 52469.8271,
11
  "train_samples": 6034,
12
+ "train_samples_per_second": 11.5,
13
+ "train_steps_per_second": 0.358
14
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 50.0,
3
- "eval_loss": 0.24299392104148865,
4
- "eval_runtime": 142.3605,
5
  "eval_samples": 2744,
6
- "eval_samples_per_second": 19.275,
7
- "eval_steps_per_second": 2.409,
8
- "eval_wer": 0.3803796021038189
9
  }
 
1
  {
2
+ "epoch": 100.0,
3
+ "eval_loss": 0.22402019798755646,
4
+ "eval_runtime": 183.2761,
5
  "eval_samples": 2744,
6
+ "eval_samples_per_second": 14.972,
7
+ "eval_steps_per_second": 1.871,
8
+ "eval_wer": 0.3693335163075797
9
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:73b90be4276eb5faf73aa36222d89249b80ec66348e9433da7efe67d53c41e0d
3
  size 1262104049
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1116f3ee11641a58b4d05b479eab62bd54209a9260c5cda2e23d53a2bde8d25
3
  size 1262104049
runs/Jan28_17-40-32_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/events.out.tfevents.1643391738.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.843454.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c05204e072a6a078c5880fef30fe2a2e48fea9f4c7ad611aec292154ed3f6db
3
- size 45634
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a387e304207b64e461be3f61d49b21fb78282fbf5573d36ade11293c8928528d
3
+ size 46474
runs/Jan28_17-40-32_job-0074bb36-c67f-4775-b1b6-176eb09b0ba4/events.out.tfevents.1643444394.job-0074bb36-c67f-4775-b1b6-176eb09b0ba4.843454.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e917187c31fe59b4ee2297eb96a7cd936adcb8107fca81be6d750744948ed068
3
+ size 364
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 50.0,
3
- "train_loss": 1.5738848333155855,
4
- "train_runtime": 24806.553,
5
  "train_samples": 6034,
6
- "train_samples_per_second": 12.162,
7
- "train_steps_per_second": 0.379
8
  }
 
1
  {
2
+ "epoch": 100.0,
3
+ "train_loss": 1.3463122907597969,
4
+ "train_runtime": 52469.8271,
5
  "train_samples": 6034,
6
+ "train_samples_per_second": 11.5,
7
+ "train_steps_per_second": 0.358
8
  }
trainer_state.json CHANGED
@@ -1,751 +1,1486 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 49.99602649006623,
5
- "global_step": 9400,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.53,
12
- "learning_rate": 3.7125e-06,
13
- "loss": 15.287,
14
  "step": 100
15
  },
16
  {
17
  "epoch": 1.06,
18
- "learning_rate": 7.4625e-06,
19
- "loss": 7.6558,
20
  "step": 200
21
  },
22
  {
23
  "epoch": 1.59,
24
- "learning_rate": 1.1212499999999998e-05,
25
- "loss": 4.9409,
26
  "step": 300
27
  },
28
  {
29
  "epoch": 2.13,
30
- "learning_rate": 1.49625e-05,
31
- "loss": 4.2283,
32
  "step": 400
33
  },
34
  {
35
  "epoch": 2.66,
36
- "learning_rate": 1.8712499999999997e-05,
37
- "loss": 3.6871,
38
  "step": 500
39
  },
40
  {
41
  "epoch": 2.66,
42
- "eval_loss": 3.5374293327331543,
43
- "eval_runtime": 140.96,
44
- "eval_samples_per_second": 19.467,
45
- "eval_steps_per_second": 2.433,
46
  "eval_wer": 1.0,
47
  "step": 500
48
  },
49
  {
50
  "epoch": 3.19,
51
- "learning_rate": 2.2462499999999997e-05,
52
- "loss": 3.4073,
53
  "step": 600
54
  },
55
  {
56
  "epoch": 3.72,
57
- "learning_rate": 2.6212499999999997e-05,
58
- "loss": 3.2613,
59
  "step": 700
60
  },
61
  {
62
  "epoch": 4.25,
63
- "learning_rate": 2.99625e-05,
64
- "loss": 3.2504,
65
  "step": 800
66
  },
67
  {
68
  "epoch": 4.78,
69
- "learning_rate": 3.37125e-05,
70
- "loss": 3.1732,
71
  "step": 900
72
  },
73
  {
74
  "epoch": 5.32,
75
- "learning_rate": 3.7462499999999996e-05,
76
- "loss": 3.1501,
77
  "step": 1000
78
  },
79
  {
80
  "epoch": 5.32,
81
- "eval_loss": 3.127795934677124,
82
- "eval_runtime": 140.7753,
83
- "eval_samples_per_second": 19.492,
84
- "eval_steps_per_second": 2.437,
85
  "eval_wer": 1.0,
86
  "step": 1000
87
  },
88
  {
89
  "epoch": 5.85,
90
- "learning_rate": 4.12125e-05,
91
- "loss": 3.0666,
92
  "step": 1100
93
  },
94
  {
95
  "epoch": 6.38,
96
- "learning_rate": 4.4962499999999995e-05,
97
- "loss": 2.7047,
98
  "step": 1200
99
  },
100
  {
101
  "epoch": 6.91,
102
- "learning_rate": 4.871249999999999e-05,
103
- "loss": 2.0895,
104
  "step": 1300
105
  },
106
  {
107
  "epoch": 7.45,
108
- "learning_rate": 5.2462499999999994e-05,
109
- "loss": 1.7459,
110
  "step": 1400
111
  },
112
  {
113
  "epoch": 7.97,
114
- "learning_rate": 5.62125e-05,
115
- "loss": 1.5843,
116
  "step": 1500
117
  },
118
  {
119
  "epoch": 7.97,
120
- "eval_loss": 0.6358404755592346,
121
- "eval_runtime": 138.8212,
122
- "eval_samples_per_second": 19.766,
123
- "eval_steps_per_second": 2.471,
124
- "eval_wer": 0.6914246512691516,
125
  "step": 1500
126
  },
127
  {
128
  "epoch": 8.51,
129
- "learning_rate": 5.9962499999999994e-05,
130
- "loss": 1.5166,
131
  "step": 1600
132
  },
133
  {
134
  "epoch": 9.04,
135
- "learning_rate": 6.37125e-05,
136
- "loss": 1.4555,
137
  "step": 1700
138
  },
139
  {
140
  "epoch": 9.57,
141
- "learning_rate": 6.746249999999999e-05,
142
- "loss": 1.4013,
143
  "step": 1800
144
  },
145
  {
146
  "epoch": 10.11,
147
- "learning_rate": 7.121249999999999e-05,
148
- "loss": 1.3841,
149
  "step": 1900
150
  },
151
  {
152
  "epoch": 10.64,
153
- "learning_rate": 7.49625e-05,
154
- "loss": 1.3378,
155
  "step": 2000
156
  },
157
  {
158
  "epoch": 10.64,
159
- "eval_loss": 0.442169189453125,
160
- "eval_runtime": 140.0078,
161
- "eval_samples_per_second": 19.599,
162
- "eval_steps_per_second": 2.45,
163
- "eval_wer": 0.5924536931168534,
164
  "step": 2000
165
  },
166
  {
167
  "epoch": 11.17,
168
- "learning_rate": 7.400675675675676e-05,
169
- "loss": 1.3249,
170
  "step": 2100
171
  },
172
  {
173
  "epoch": 11.7,
174
- "learning_rate": 7.299324324324324e-05,
175
- "loss": 1.2996,
176
  "step": 2200
177
  },
178
  {
179
  "epoch": 12.23,
180
- "learning_rate": 7.197972972972972e-05,
181
- "loss": 1.2952,
182
  "step": 2300
183
  },
184
  {
185
  "epoch": 12.76,
186
- "learning_rate": 7.096621621621621e-05,
187
- "loss": 1.2626,
188
  "step": 2400
189
  },
190
  {
191
  "epoch": 13.3,
192
- "learning_rate": 6.99527027027027e-05,
193
- "loss": 1.2595,
194
  "step": 2500
195
  },
196
  {
197
  "epoch": 13.3,
198
- "eval_loss": 0.3921487033367157,
199
- "eval_runtime": 140.1536,
200
- "eval_samples_per_second": 19.579,
201
- "eval_steps_per_second": 2.447,
202
- "eval_wer": 0.5511548136290876,
203
  "step": 2500
204
  },
205
  {
206
  "epoch": 13.83,
207
- "learning_rate": 6.893918918918919e-05,
208
- "loss": 1.2282,
209
  "step": 2600
210
  },
211
  {
212
  "epoch": 14.36,
213
- "learning_rate": 6.792567567567567e-05,
214
- "loss": 1.2352,
215
  "step": 2700
216
  },
217
  {
218
  "epoch": 14.89,
219
- "learning_rate": 6.691216216216216e-05,
220
- "loss": 1.2129,
221
  "step": 2800
222
  },
223
  {
224
  "epoch": 15.42,
225
- "learning_rate": 6.589864864864864e-05,
226
- "loss": 1.2184,
227
  "step": 2900
228
  },
229
  {
230
  "epoch": 15.95,
231
- "learning_rate": 6.488513513513514e-05,
232
- "loss": 1.1643,
233
  "step": 3000
234
  },
235
  {
236
  "epoch": 15.95,
237
- "eval_loss": 0.35074228048324585,
238
- "eval_runtime": 142.5452,
239
- "eval_samples_per_second": 19.25,
240
- "eval_steps_per_second": 2.406,
241
- "eval_wer": 0.514932540589984,
242
  "step": 3000
243
  },
244
  {
245
  "epoch": 16.49,
246
- "learning_rate": 6.388175675675675e-05,
247
- "loss": 1.1704,
248
  "step": 3100
249
  },
250
  {
251
  "epoch": 17.02,
252
- "learning_rate": 6.287837837837837e-05,
253
- "loss": 1.1774,
254
  "step": 3200
255
  },
256
  {
257
  "epoch": 17.55,
258
- "learning_rate": 6.186486486486485e-05,
259
- "loss": 1.157,
260
  "step": 3300
261
  },
262
  {
263
  "epoch": 18.08,
264
- "learning_rate": 6.085135135135135e-05,
265
- "loss": 1.1605,
266
  "step": 3400
267
  },
268
  {
269
  "epoch": 18.61,
270
- "learning_rate": 5.983783783783783e-05,
271
- "loss": 1.1352,
272
  "step": 3500
273
  },
274
  {
275
  "epoch": 18.61,
276
- "eval_loss": 0.3350585401058197,
277
- "eval_runtime": 140.4732,
278
- "eval_samples_per_second": 19.534,
279
- "eval_steps_per_second": 2.442,
280
- "eval_wer": 0.5018980105190944,
281
  "step": 3500
282
  },
283
  {
284
  "epoch": 19.15,
285
- "learning_rate": 5.882432432432432e-05,
286
- "loss": 1.1474,
287
  "step": 3600
288
  },
289
  {
290
  "epoch": 19.68,
291
- "learning_rate": 5.781081081081081e-05,
292
- "loss": 1.1273,
293
  "step": 3700
294
  },
295
  {
296
  "epoch": 20.21,
297
- "learning_rate": 5.679729729729729e-05,
298
- "loss": 1.133,
299
  "step": 3800
300
  },
301
  {
302
  "epoch": 20.74,
303
- "learning_rate": 5.578378378378378e-05,
304
- "loss": 1.1112,
305
  "step": 3900
306
  },
307
  {
308
  "epoch": 21.28,
309
- "learning_rate": 5.477027027027026e-05,
310
- "loss": 1.1113,
311
  "step": 4000
312
  },
313
  {
314
  "epoch": 21.28,
315
- "eval_loss": 0.3152759373188019,
316
- "eval_runtime": 139.3947,
317
- "eval_samples_per_second": 19.685,
318
- "eval_steps_per_second": 2.461,
319
- "eval_wer": 0.4845186370912417,
320
  "step": 4000
321
  },
322
  {
323
  "epoch": 21.81,
324
- "learning_rate": 5.375675675675675e-05,
325
- "loss": 1.1029,
326
  "step": 4100
327
  },
328
  {
329
  "epoch": 22.34,
330
- "learning_rate": 5.274324324324324e-05,
331
- "loss": 1.1124,
332
  "step": 4200
333
  },
334
  {
335
  "epoch": 22.87,
336
- "learning_rate": 5.172972972972972e-05,
337
- "loss": 1.091,
338
  "step": 4300
339
  },
340
  {
341
  "epoch": 23.4,
342
- "learning_rate": 5.071621621621621e-05,
343
- "loss": 1.097,
344
  "step": 4400
345
  },
346
  {
347
  "epoch": 23.93,
348
- "learning_rate": 4.97027027027027e-05,
349
- "loss": 1.0914,
350
  "step": 4500
351
  },
352
  {
353
  "epoch": 23.93,
354
- "eval_loss": 0.3050296902656555,
355
- "eval_runtime": 137.2603,
356
- "eval_samples_per_second": 19.991,
357
- "eval_steps_per_second": 2.499,
358
- "eval_wer": 0.4594100160073176,
359
  "step": 4500
360
  },
361
  {
362
  "epoch": 24.47,
363
- "learning_rate": 4.8689189189189184e-05,
364
- "loss": 1.0749,
365
  "step": 4600
366
  },
367
  {
368
  "epoch": 25.0,
369
- "learning_rate": 4.767567567567567e-05,
370
- "loss": 1.0807,
371
  "step": 4700
372
  },
373
  {
374
  "epoch": 25.53,
375
- "learning_rate": 4.666216216216216e-05,
376
- "loss": 1.0647,
377
  "step": 4800
378
  },
379
  {
380
  "epoch": 26.06,
381
- "learning_rate": 4.564864864864864e-05,
382
- "loss": 1.068,
383
  "step": 4900
384
  },
385
  {
386
  "epoch": 26.59,
387
- "learning_rate": 4.463513513513513e-05,
388
- "loss": 1.0468,
389
  "step": 5000
390
  },
391
  {
392
  "epoch": 26.59,
393
- "eval_loss": 0.2889558672904968,
394
- "eval_runtime": 137.6066,
395
- "eval_samples_per_second": 19.941,
396
- "eval_steps_per_second": 2.493,
397
- "eval_wer": 0.44701577864166475,
398
  "step": 5000
399
  },
400
  {
401
  "epoch": 27.13,
402
- "learning_rate": 4.3621621621621624e-05,
403
- "loss": 1.0548,
404
  "step": 5100
405
  },
406
  {
407
  "epoch": 27.66,
408
- "learning_rate": 4.26081081081081e-05,
409
- "loss": 1.0423,
410
  "step": 5200
411
  },
412
  {
413
  "epoch": 28.19,
414
- "learning_rate": 4.160472972972973e-05,
415
- "loss": 1.0436,
416
  "step": 5300
417
  },
418
  {
419
  "epoch": 28.72,
420
- "learning_rate": 4.059121621621621e-05,
421
- "loss": 1.0338,
422
  "step": 5400
423
  },
424
  {
425
  "epoch": 29.25,
426
- "learning_rate": 3.95777027027027e-05,
427
- "loss": 1.0473,
428
  "step": 5500
429
  },
430
  {
431
  "epoch": 29.25,
432
- "eval_loss": 0.27552109956741333,
433
- "eval_runtime": 139.7371,
434
- "eval_samples_per_second": 19.637,
435
- "eval_steps_per_second": 2.455,
436
- "eval_wer": 0.43306654470615136,
437
  "step": 5500
438
  },
439
  {
440
  "epoch": 29.78,
441
- "learning_rate": 3.856418918918919e-05,
442
- "loss": 1.027,
443
  "step": 5600
444
  },
445
  {
446
  "epoch": 30.32,
447
- "learning_rate": 3.755067567567568e-05,
448
- "loss": 1.0418,
449
  "step": 5700
450
  },
451
  {
452
  "epoch": 30.85,
453
- "learning_rate": 3.653716216216216e-05,
454
- "loss": 1.0147,
455
  "step": 5800
456
  },
457
  {
458
  "epoch": 31.38,
459
- "learning_rate": 3.552364864864865e-05,
460
- "loss": 1.0197,
461
  "step": 5900
462
  },
463
  {
464
  "epoch": 31.91,
465
- "learning_rate": 3.451013513513513e-05,
466
- "loss": 1.0065,
467
  "step": 6000
468
  },
469
  {
470
  "epoch": 31.91,
471
- "eval_loss": 0.27181389927864075,
472
- "eval_runtime": 138.4335,
473
- "eval_samples_per_second": 19.822,
474
- "eval_steps_per_second": 2.478,
475
- "eval_wer": 0.42638920649439743,
476
  "step": 6000
477
  },
478
  {
479
  "epoch": 32.45,
480
- "learning_rate": 3.3496621621621615e-05,
481
- "loss": 1.0138,
482
  "step": 6100
483
  },
484
  {
485
  "epoch": 32.97,
486
- "learning_rate": 3.2483108108108105e-05,
487
- "loss": 1.0027,
488
  "step": 6200
489
  },
490
  {
491
  "epoch": 33.51,
492
- "learning_rate": 3.146959459459459e-05,
493
- "loss": 1.0157,
494
  "step": 6300
495
  },
496
  {
497
  "epoch": 34.04,
498
- "learning_rate": 3.045608108108108e-05,
499
- "loss": 1.0034,
500
  "step": 6400
501
  },
502
  {
503
  "epoch": 34.57,
504
- "learning_rate": 2.9442567567567563e-05,
505
- "loss": 0.9794,
506
  "step": 6500
507
  },
508
  {
509
  "epoch": 34.57,
510
- "eval_loss": 0.2646064758300781,
511
- "eval_runtime": 138.3623,
512
- "eval_samples_per_second": 19.832,
513
- "eval_steps_per_second": 2.479,
514
- "eval_wer": 0.41934598673679396,
515
  "step": 6500
516
  },
517
  {
518
  "epoch": 35.11,
519
- "learning_rate": 2.8429054054054054e-05,
520
- "loss": 0.9911,
521
  "step": 6600
522
  },
523
  {
524
  "epoch": 35.64,
525
- "learning_rate": 2.7415540540540538e-05,
526
- "loss": 0.9807,
527
  "step": 6700
528
  },
529
  {
530
  "epoch": 36.17,
531
- "learning_rate": 2.6402027027027025e-05,
532
- "loss": 0.9843,
533
  "step": 6800
534
  },
535
  {
536
  "epoch": 36.7,
537
- "learning_rate": 2.538851351351351e-05,
538
- "loss": 0.9735,
539
  "step": 6900
540
  },
541
  {
542
  "epoch": 37.23,
543
- "learning_rate": 2.4375e-05,
544
- "loss": 0.9849,
545
  "step": 7000
546
  },
547
  {
548
  "epoch": 37.23,
549
- "eval_loss": 0.2609545886516571,
550
- "eval_runtime": 144.7382,
551
- "eval_samples_per_second": 18.958,
552
- "eval_steps_per_second": 2.37,
553
- "eval_wer": 0.4058083695403613,
554
  "step": 7000
555
  },
556
  {
557
  "epoch": 37.76,
558
- "learning_rate": 2.3361486486486483e-05,
559
- "loss": 0.983,
560
  "step": 7100
561
  },
562
  {
563
  "epoch": 38.3,
564
- "learning_rate": 2.234797297297297e-05,
565
- "loss": 0.9775,
566
  "step": 7200
567
  },
568
  {
569
  "epoch": 38.83,
570
- "learning_rate": 2.1334459459459458e-05,
571
- "loss": 0.9597,
572
  "step": 7300
573
  },
574
  {
575
  "epoch": 39.36,
576
- "learning_rate": 2.0320945945945945e-05,
577
- "loss": 0.9606,
578
  "step": 7400
579
  },
580
  {
581
  "epoch": 39.89,
582
- "learning_rate": 1.930743243243243e-05,
583
- "loss": 0.9496,
584
  "step": 7500
585
  },
586
  {
587
  "epoch": 39.89,
588
- "eval_loss": 0.2522386610507965,
589
- "eval_runtime": 139.7036,
590
- "eval_samples_per_second": 19.642,
591
- "eval_steps_per_second": 2.455,
592
- "eval_wer": 0.3984907386233707,
593
  "step": 7500
594
  },
595
  {
596
  "epoch": 40.42,
597
- "learning_rate": 1.829391891891892e-05,
598
- "loss": 0.9581,
599
  "step": 7600
600
  },
601
  {
602
  "epoch": 40.95,
603
- "learning_rate": 1.7280405405405403e-05,
604
- "loss": 0.9477,
605
  "step": 7700
606
  },
607
  {
608
  "epoch": 41.49,
609
- "learning_rate": 1.626689189189189e-05,
610
- "loss": 0.9538,
611
  "step": 7800
612
  },
613
  {
614
  "epoch": 42.02,
615
- "learning_rate": 1.5253378378378378e-05,
616
- "loss": 0.9631,
617
  "step": 7900
618
  },
619
  {
620
  "epoch": 42.55,
621
- "learning_rate": 1.4239864864864863e-05,
622
- "loss": 0.9367,
623
  "step": 8000
624
  },
625
  {
626
  "epoch": 42.55,
627
- "eval_loss": 0.25142449140548706,
628
- "eval_runtime": 138.6762,
629
- "eval_samples_per_second": 19.787,
630
- "eval_steps_per_second": 2.473,
631
- "eval_wer": 0.3946947175851818,
632
  "step": 8000
633
  },
634
  {
635
  "epoch": 43.08,
636
- "learning_rate": 1.322635135135135e-05,
637
- "loss": 0.9498,
638
  "step": 8100
639
  },
640
  {
641
  "epoch": 43.61,
642
- "learning_rate": 1.2212837837837838e-05,
643
- "loss": 0.9389,
644
  "step": 8200
645
  },
646
  {
647
  "epoch": 44.15,
648
- "learning_rate": 1.1199324324324323e-05,
649
- "loss": 0.937,
650
  "step": 8300
651
  },
652
  {
653
  "epoch": 44.68,
654
- "learning_rate": 1.0195945945945945e-05,
655
- "loss": 0.9394,
656
  "step": 8400
657
  },
658
  {
659
  "epoch": 45.21,
660
- "learning_rate": 9.182432432432432e-06,
661
- "loss": 0.9295,
662
  "step": 8500
663
  },
664
  {
665
  "epoch": 45.21,
666
- "eval_loss": 0.24582630395889282,
667
- "eval_runtime": 140.7119,
668
- "eval_samples_per_second": 19.501,
669
- "eval_steps_per_second": 2.438,
670
- "eval_wer": 0.3883375257260462,
671
  "step": 8500
672
  },
673
  {
674
  "epoch": 45.74,
675
- "learning_rate": 8.168918918918917e-06,
676
- "loss": 0.9244,
677
  "step": 8600
678
  },
679
  {
680
  "epoch": 46.28,
681
- "learning_rate": 7.1554054054054045e-06,
682
- "loss": 0.9273,
683
  "step": 8700
684
  },
685
  {
686
  "epoch": 46.81,
687
- "learning_rate": 6.141891891891891e-06,
688
- "loss": 0.9229,
689
  "step": 8800
690
  },
691
  {
692
  "epoch": 47.34,
693
- "learning_rate": 5.128378378378377e-06,
694
- "loss": 0.9392,
695
  "step": 8900
696
  },
697
  {
698
  "epoch": 47.87,
699
- "learning_rate": 4.1148648648648645e-06,
700
- "loss": 0.9187,
701
  "step": 9000
702
  },
703
  {
704
  "epoch": 47.87,
705
- "eval_loss": 0.24390903115272522,
706
- "eval_runtime": 139.6323,
707
- "eval_samples_per_second": 19.652,
708
- "eval_steps_per_second": 2.456,
709
- "eval_wer": 0.3832609192773839,
710
  "step": 9000
711
  },
712
  {
713
  "epoch": 48.4,
714
- "learning_rate": 3.1013513513513513e-06,
715
- "loss": 0.9194,
716
  "step": 9100
717
  },
718
  {
719
  "epoch": 48.93,
720
- "learning_rate": 2.0878378378378376e-06,
721
- "loss": 0.9234,
722
  "step": 9200
723
  },
724
  {
725
  "epoch": 49.47,
726
- "learning_rate": 1.0743243243243242e-06,
727
- "loss": 0.9257,
728
  "step": 9300
729
  },
730
  {
731
  "epoch": 50.0,
732
- "learning_rate": 6.08108108108108e-08,
733
- "loss": 0.9097,
734
  "step": 9400
735
  },
736
  {
737
- "epoch": 50.0,
738
- "step": 9400,
739
- "total_flos": 5.3948756860365595e+19,
740
- "train_loss": 1.5738848333155855,
741
- "train_runtime": 24806.553,
742
- "train_samples_per_second": 12.162,
743
- "train_steps_per_second": 0.379
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
744
  }
745
  ],
746
- "max_steps": 9400,
747
- "num_train_epochs": 50,
748
- "total_flos": 5.3948756860365595e+19,
749
  "trial_name": null,
750
  "trial_params": null
751
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 99.99602649006623,
5
+ "global_step": 18800,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.53,
12
+ "learning_rate": 2.0000000000000003e-06,
13
+ "loss": 15.5103,
14
  "step": 100
15
  },
16
  {
17
  "epoch": 1.06,
18
+ "learning_rate": 3.980000000000001e-06,
19
+ "loss": 10.25,
20
  "step": 200
21
  },
22
  {
23
  "epoch": 1.59,
24
+ "learning_rate": 5.98e-06,
25
+ "loss": 5.588,
26
  "step": 300
27
  },
28
  {
29
  "epoch": 2.13,
30
+ "learning_rate": 7.980000000000002e-06,
31
+ "loss": 4.6387,
32
  "step": 400
33
  },
34
  {
35
  "epoch": 2.66,
36
+ "learning_rate": 9.980000000000001e-06,
37
+ "loss": 4.1169,
38
  "step": 500
39
  },
40
  {
41
  "epoch": 2.66,
42
+ "eval_loss": 4.01455020904541,
43
+ "eval_runtime": 162.7143,
44
+ "eval_samples_per_second": 16.864,
45
+ "eval_steps_per_second": 2.108,
46
  "eval_wer": 1.0,
47
  "step": 500
48
  },
49
  {
50
  "epoch": 3.19,
51
+ "learning_rate": 1.198e-05,
52
+ "loss": 3.796,
53
  "step": 600
54
  },
55
  {
56
  "epoch": 3.72,
57
+ "learning_rate": 1.398e-05,
58
+ "loss": 3.4906,
59
  "step": 700
60
  },
61
  {
62
  "epoch": 4.25,
63
+ "learning_rate": 1.5980000000000003e-05,
64
+ "loss": 3.3596,
65
  "step": 800
66
  },
67
  {
68
  "epoch": 4.78,
69
+ "learning_rate": 1.798e-05,
70
+ "loss": 3.2609,
71
  "step": 900
72
  },
73
  {
74
  "epoch": 5.32,
75
+ "learning_rate": 1.9980000000000002e-05,
76
+ "loss": 3.2512,
77
  "step": 1000
78
  },
79
  {
80
  "epoch": 5.32,
81
+ "eval_loss": 3.234210252761841,
82
+ "eval_runtime": 160.5159,
83
+ "eval_samples_per_second": 17.095,
84
+ "eval_steps_per_second": 2.137,
85
  "eval_wer": 1.0,
86
  "step": 1000
87
  },
88
  {
89
  "epoch": 5.85,
90
+ "learning_rate": 2.1980000000000003e-05,
91
+ "loss": 3.1927,
92
  "step": 1100
93
  },
94
  {
95
  "epoch": 6.38,
96
+ "learning_rate": 2.3980000000000004e-05,
97
+ "loss": 3.1722,
98
  "step": 1200
99
  },
100
  {
101
  "epoch": 6.91,
102
+ "learning_rate": 2.5980000000000002e-05,
103
+ "loss": 3.1008,
104
  "step": 1300
105
  },
106
  {
107
  "epoch": 7.45,
108
+ "learning_rate": 2.7980000000000003e-05,
109
+ "loss": 2.9719,
110
  "step": 1400
111
  },
112
  {
113
  "epoch": 7.97,
114
+ "learning_rate": 2.9980000000000004e-05,
115
+ "loss": 2.5435,
116
  "step": 1500
117
  },
118
  {
119
  "epoch": 7.97,
120
+ "eval_loss": 1.8155322074890137,
121
+ "eval_runtime": 146.5621,
122
+ "eval_samples_per_second": 18.722,
123
+ "eval_steps_per_second": 2.34,
124
+ "eval_wer": 1.0286354695576598,
125
  "step": 1500
126
  },
127
  {
128
  "epoch": 8.51,
129
+ "learning_rate": 3.198e-05,
130
+ "loss": 2.1032,
131
  "step": 1600
132
  },
133
  {
134
  "epoch": 9.04,
135
+ "learning_rate": 3.398e-05,
136
+ "loss": 1.8413,
137
  "step": 1700
138
  },
139
  {
140
  "epoch": 9.57,
141
+ "learning_rate": 3.5980000000000004e-05,
142
+ "loss": 1.7079,
143
  "step": 1800
144
  },
145
  {
146
  "epoch": 10.11,
147
+ "learning_rate": 3.7980000000000006e-05,
148
+ "loss": 1.6319,
149
  "step": 1900
150
  },
151
  {
152
  "epoch": 10.64,
153
+ "learning_rate": 3.998000000000001e-05,
154
+ "loss": 1.5575,
155
  "step": 2000
156
  },
157
  {
158
  "epoch": 10.64,
159
+ "eval_loss": 0.6345986127853394,
160
+ "eval_runtime": 149.5836,
161
+ "eval_samples_per_second": 18.344,
162
+ "eval_steps_per_second": 2.293,
163
+ "eval_wer": 0.705777411829285,
164
  "step": 2000
165
  },
166
  {
167
  "epoch": 11.17,
168
+ "learning_rate": 3.976428571428572e-05,
169
+ "loss": 1.5137,
170
  "step": 2100
171
  },
172
  {
173
  "epoch": 11.7,
174
+ "learning_rate": 3.952857142857143e-05,
175
+ "loss": 1.468,
176
  "step": 2200
177
  },
178
  {
179
  "epoch": 12.23,
180
+ "learning_rate": 3.9290476190476196e-05,
181
+ "loss": 1.4546,
182
  "step": 2300
183
  },
184
  {
185
  "epoch": 12.76,
186
+ "learning_rate": 3.905238095238096e-05,
187
+ "loss": 1.4071,
188
  "step": 2400
189
  },
190
  {
191
  "epoch": 13.3,
192
+ "learning_rate": 3.881428571428572e-05,
193
+ "loss": 1.3979,
194
  "step": 2500
195
  },
196
  {
197
  "epoch": 13.3,
198
+ "eval_loss": 0.48850002884864807,
199
+ "eval_runtime": 146.6274,
200
+ "eval_samples_per_second": 18.714,
201
+ "eval_steps_per_second": 2.339,
202
+ "eval_wer": 0.6320387905402315,
203
  "step": 2500
204
  },
205
  {
206
  "epoch": 13.83,
207
+ "learning_rate": 3.857857142857143e-05,
208
+ "loss": 1.3532,
209
  "step": 2600
210
  },
211
  {
212
  "epoch": 14.36,
213
+ "learning_rate": 3.834047619047619e-05,
214
+ "loss": 1.3544,
215
  "step": 2700
216
  },
217
  {
218
  "epoch": 14.89,
219
+ "learning_rate": 3.810238095238096e-05,
220
+ "loss": 1.335,
221
  "step": 2800
222
  },
223
  {
224
  "epoch": 15.42,
225
+ "learning_rate": 3.7864285714285715e-05,
226
+ "loss": 1.3402,
227
  "step": 2900
228
  },
229
  {
230
  "epoch": 15.95,
231
+ "learning_rate": 3.762619047619048e-05,
232
+ "loss": 1.2874,
233
  "step": 3000
234
  },
235
  {
236
  "epoch": 15.95,
237
+ "eval_loss": 0.4270566701889038,
238
+ "eval_runtime": 150.2121,
239
+ "eval_samples_per_second": 18.268,
240
+ "eval_steps_per_second": 2.283,
241
+ "eval_wer": 0.6088468048122226,
242
  "step": 3000
243
  },
244
  {
245
  "epoch": 16.49,
246
+ "learning_rate": 3.7388095238095244e-05,
247
+ "loss": 1.2897,
248
  "step": 3100
249
  },
250
  {
251
  "epoch": 17.02,
252
+ "learning_rate": 3.715e-05,
253
+ "loss": 1.2965,
254
  "step": 3200
255
  },
256
  {
257
  "epoch": 17.55,
258
+ "learning_rate": 3.6911904761904766e-05,
259
+ "loss": 1.2752,
260
  "step": 3300
261
  },
262
  {
263
  "epoch": 18.08,
264
+ "learning_rate": 3.6673809523809524e-05,
265
+ "loss": 1.2672,
266
  "step": 3400
267
  },
268
  {
269
  "epoch": 18.61,
270
+ "learning_rate": 3.643571428571429e-05,
271
+ "loss": 1.2383,
272
  "step": 3500
273
  },
274
  {
275
  "epoch": 18.61,
276
+ "eval_loss": 0.3889118432998657,
277
+ "eval_runtime": 149.102,
278
+ "eval_samples_per_second": 18.404,
279
+ "eval_steps_per_second": 2.3,
280
+ "eval_wer": 0.586889895247244,
281
  "step": 3500
282
  },
283
  {
284
  "epoch": 19.15,
285
+ "learning_rate": 3.619761904761905e-05,
286
+ "loss": 1.2479,
287
  "step": 3600
288
  },
289
  {
290
  "epoch": 19.68,
291
+ "learning_rate": 3.595952380952381e-05,
292
+ "loss": 1.2281,
293
  "step": 3700
294
  },
295
  {
296
  "epoch": 20.21,
297
+ "learning_rate": 3.5721428571428575e-05,
298
+ "loss": 1.2223,
299
  "step": 3800
300
  },
301
  {
302
  "epoch": 20.74,
303
+ "learning_rate": 3.548333333333333e-05,
304
+ "loss": 1.2006,
305
  "step": 3900
306
  },
307
  {
308
  "epoch": 21.28,
309
+ "learning_rate": 3.52452380952381e-05,
310
+ "loss": 1.2054,
311
  "step": 4000
312
  },
313
  {
314
  "epoch": 21.28,
315
+ "eval_loss": 0.3609465956687927,
316
+ "eval_runtime": 157.7853,
317
+ "eval_samples_per_second": 17.391,
318
+ "eval_steps_per_second": 2.174,
319
+ "eval_wer": 0.5792964640226889,
320
  "step": 4000
321
  },
322
  {
323
  "epoch": 21.81,
324
+ "learning_rate": 3.500714285714286e-05,
325
+ "loss": 1.1953,
326
  "step": 4100
327
  },
328
  {
329
  "epoch": 22.34,
330
+ "learning_rate": 3.476904761904762e-05,
331
+ "loss": 1.2022,
332
  "step": 4200
333
  },
334
  {
335
  "epoch": 22.87,
336
+ "learning_rate": 3.4530952380952384e-05,
337
+ "loss": 1.1841,
338
  "step": 4300
339
  },
340
  {
341
  "epoch": 23.4,
342
+ "learning_rate": 3.429285714285715e-05,
343
+ "loss": 1.1941,
344
  "step": 4400
345
  },
346
  {
347
  "epoch": 23.93,
348
+ "learning_rate": 3.4054761904761906e-05,
349
+ "loss": 1.1866,
350
  "step": 4500
351
  },
352
  {
353
  "epoch": 23.93,
354
+ "eval_loss": 0.3450409471988678,
355
+ "eval_runtime": 147.8034,
356
+ "eval_samples_per_second": 18.565,
357
+ "eval_steps_per_second": 2.321,
358
+ "eval_wer": 0.5513471478889347,
359
  "step": 4500
360
  },
361
  {
362
  "epoch": 24.47,
363
+ "learning_rate": 3.381666666666667e-05,
364
+ "loss": 1.1688,
365
  "step": 4600
366
  },
367
  {
368
  "epoch": 25.0,
369
+ "learning_rate": 3.357857142857143e-05,
370
+ "loss": 1.1772,
371
  "step": 4700
372
  },
373
  {
374
  "epoch": 25.53,
375
+ "learning_rate": 3.3342857142857146e-05,
376
+ "loss": 1.1641,
377
  "step": 4800
378
  },
379
  {
380
  "epoch": 26.06,
381
+ "learning_rate": 3.3104761904761904e-05,
382
+ "loss": 1.1567,
383
  "step": 4900
384
  },
385
  {
386
  "epoch": 26.59,
387
+ "learning_rate": 3.286666666666667e-05,
388
+ "loss": 1.1332,
389
  "step": 5000
390
  },
391
  {
392
  "epoch": 26.59,
393
+ "eval_loss": 0.32143130898475647,
394
+ "eval_runtime": 151.6356,
395
+ "eval_samples_per_second": 18.096,
396
+ "eval_steps_per_second": 2.262,
397
+ "eval_wer": 0.5378985407803851,
398
  "step": 5000
399
  },
400
  {
401
  "epoch": 27.13,
402
+ "learning_rate": 3.262857142857143e-05,
403
+ "loss": 1.1436,
404
  "step": 5100
405
  },
406
  {
407
  "epoch": 27.66,
408
+ "learning_rate": 3.23904761904762e-05,
409
+ "loss": 1.1234,
410
  "step": 5200
411
  },
412
  {
413
  "epoch": 28.19,
414
+ "learning_rate": 3.215476190476191e-05,
415
+ "loss": 1.1273,
416
  "step": 5300
417
  },
418
  {
419
  "epoch": 28.72,
420
+ "learning_rate": 3.191666666666667e-05,
421
+ "loss": 1.1237,
422
  "step": 5400
423
  },
424
  {
425
  "epoch": 29.25,
426
+ "learning_rate": 3.167857142857143e-05,
427
+ "loss": 1.135,
428
  "step": 5500
429
  },
430
  {
431
  "epoch": 29.25,
432
+ "eval_loss": 0.3122180998325348,
433
+ "eval_runtime": 157.9044,
434
+ "eval_samples_per_second": 17.378,
435
+ "eval_steps_per_second": 2.172,
436
+ "eval_wer": 0.538401719957916,
437
  "step": 5500
438
  },
439
  {
440
  "epoch": 29.78,
441
+ "learning_rate": 3.1440476190476194e-05,
442
+ "loss": 1.1127,
443
  "step": 5600
444
  },
445
  {
446
  "epoch": 30.32,
447
+ "learning_rate": 3.120238095238095e-05,
448
+ "loss": 1.1288,
449
  "step": 5700
450
  },
451
  {
452
  "epoch": 30.85,
453
+ "learning_rate": 3.096428571428572e-05,
454
+ "loss": 1.11,
455
  "step": 5800
456
  },
457
  {
458
  "epoch": 31.38,
459
+ "learning_rate": 3.072619047619048e-05,
460
+ "loss": 1.1098,
461
  "step": 5900
462
  },
463
  {
464
  "epoch": 31.91,
465
+ "learning_rate": 3.048809523809524e-05,
466
+ "loss": 1.0992,
467
  "step": 6000
468
  },
469
  {
470
  "epoch": 31.91,
471
+ "eval_loss": 0.29478520154953003,
472
+ "eval_runtime": 149.1488,
473
+ "eval_samples_per_second": 18.398,
474
+ "eval_steps_per_second": 2.3,
475
+ "eval_wer": 0.5078450208133205,
476
  "step": 6000
477
  },
478
  {
479
  "epoch": 32.45,
480
+ "learning_rate": 3.025e-05,
481
+ "loss": 1.1043,
482
  "step": 6100
483
  },
484
  {
485
  "epoch": 32.97,
486
+ "learning_rate": 3.0011904761904765e-05,
487
+ "loss": 1.0963,
488
  "step": 6200
489
  },
490
  {
491
  "epoch": 33.51,
492
+ "learning_rate": 2.9773809523809526e-05,
493
+ "loss": 1.1059,
494
  "step": 6300
495
  },
496
  {
497
  "epoch": 34.04,
498
+ "learning_rate": 2.953571428571429e-05,
499
+ "loss": 1.095,
500
  "step": 6400
501
  },
502
  {
503
  "epoch": 34.57,
504
+ "learning_rate": 2.9297619047619048e-05,
505
+ "loss": 1.0707,
506
  "step": 6500
507
  },
508
  {
509
  "epoch": 34.57,
510
+ "eval_loss": 0.2927539348602295,
511
+ "eval_runtime": 153.3467,
512
+ "eval_samples_per_second": 17.894,
513
+ "eval_steps_per_second": 2.237,
514
+ "eval_wer": 0.5127853254654408,
515
  "step": 6500
516
  },
517
  {
518
  "epoch": 35.11,
519
+ "learning_rate": 2.9059523809523812e-05,
520
+ "loss": 1.0887,
521
  "step": 6600
522
  },
523
  {
524
  "epoch": 35.64,
525
+ "learning_rate": 2.8821428571428574e-05,
526
+ "loss": 1.0763,
527
  "step": 6700
528
  },
529
  {
530
  "epoch": 36.17,
531
+ "learning_rate": 2.8583333333333335e-05,
532
+ "loss": 1.0805,
533
  "step": 6800
534
  },
535
  {
536
  "epoch": 36.7,
537
+ "learning_rate": 2.83452380952381e-05,
538
+ "loss": 1.0675,
539
  "step": 6900
540
  },
541
  {
542
  "epoch": 37.23,
543
+ "learning_rate": 2.8107142857142857e-05,
544
+ "loss": 1.0754,
545
  "step": 7000
546
  },
547
  {
548
  "epoch": 37.23,
549
+ "eval_loss": 0.28569138050079346,
550
+ "eval_runtime": 156.3448,
551
+ "eval_samples_per_second": 17.551,
552
+ "eval_steps_per_second": 2.194,
553
+ "eval_wer": 0.5016696399981703,
554
  "step": 7000
555
  },
556
  {
557
  "epoch": 37.76,
558
+ "learning_rate": 2.786904761904762e-05,
559
+ "loss": 1.0695,
560
  "step": 7100
561
  },
562
  {
563
  "epoch": 38.3,
564
+ "learning_rate": 2.7630952380952383e-05,
565
+ "loss": 1.0747,
566
  "step": 7200
567
  },
568
  {
569
  "epoch": 38.83,
570
+ "learning_rate": 2.7392857142857147e-05,
571
+ "loss": 1.0551,
572
  "step": 7300
573
  },
574
  {
575
  "epoch": 39.36,
576
+ "learning_rate": 2.715714285714286e-05,
577
+ "loss": 1.0511,
578
  "step": 7400
579
  },
580
  {
581
  "epoch": 39.89,
582
+ "learning_rate": 2.6919047619047622e-05,
583
+ "loss": 1.0461,
584
  "step": 7500
585
  },
586
  {
587
  "epoch": 39.89,
588
+ "eval_loss": 0.27913743257522583,
589
+ "eval_runtime": 146.6834,
590
+ "eval_samples_per_second": 18.707,
591
+ "eval_steps_per_second": 2.338,
592
+ "eval_wer": 0.509949224646631,
593
  "step": 7500
594
  },
595
  {
596
  "epoch": 40.42,
597
+ "learning_rate": 2.6680952380952387e-05,
598
+ "loss": 1.0547,
599
  "step": 7600
600
  },
601
  {
602
  "epoch": 40.95,
603
+ "learning_rate": 2.6442857142857144e-05,
604
+ "loss": 1.0443,
605
  "step": 7700
606
  },
607
  {
608
  "epoch": 41.49,
609
+ "learning_rate": 2.6204761904761905e-05,
610
+ "loss": 1.0582,
611
  "step": 7800
612
  },
613
  {
614
  "epoch": 42.02,
615
+ "learning_rate": 2.596666666666667e-05,
616
+ "loss": 1.0588,
617
  "step": 7900
618
  },
619
  {
620
  "epoch": 42.55,
621
+ "learning_rate": 2.572857142857143e-05,
622
+ "loss": 1.0328,
623
  "step": 8000
624
  },
625
  {
626
  "epoch": 42.55,
627
+ "eval_loss": 0.2728850841522217,
628
+ "eval_runtime": 150.8954,
629
+ "eval_samples_per_second": 18.185,
630
+ "eval_steps_per_second": 2.273,
631
+ "eval_wer": 0.511961941356754,
632
  "step": 8000
633
  },
634
  {
635
  "epoch": 43.08,
636
+ "learning_rate": 2.5490476190476196e-05,
637
+ "loss": 1.0464,
638
  "step": 8100
639
  },
640
  {
641
  "epoch": 43.61,
642
+ "learning_rate": 2.5252380952380953e-05,
643
+ "loss": 1.0289,
644
  "step": 8200
645
  },
646
  {
647
  "epoch": 44.15,
648
+ "learning_rate": 2.5014285714285714e-05,
649
+ "loss": 1.026,
650
  "step": 8300
651
  },
652
  {
653
  "epoch": 44.68,
654
+ "learning_rate": 2.477619047619048e-05,
655
+ "loss": 1.0288,
656
  "step": 8400
657
  },
658
  {
659
  "epoch": 45.21,
660
+ "learning_rate": 2.453809523809524e-05,
661
+ "loss": 1.0201,
662
  "step": 8500
663
  },
664
  {
665
  "epoch": 45.21,
666
+ "eval_loss": 0.2654191255569458,
667
+ "eval_runtime": 150.9056,
668
+ "eval_samples_per_second": 18.184,
669
+ "eval_steps_per_second": 2.273,
670
+ "eval_wer": 0.47198206852385527,
671
  "step": 8500
672
  },
673
  {
674
  "epoch": 45.74,
675
+ "learning_rate": 2.4300000000000005e-05,
676
+ "loss": 1.0167,
677
  "step": 8600
678
  },
679
  {
680
  "epoch": 46.28,
681
+ "learning_rate": 2.4061904761904762e-05,
682
+ "loss": 1.0183,
683
  "step": 8700
684
  },
685
  {
686
  "epoch": 46.81,
687
+ "learning_rate": 2.3823809523809523e-05,
688
+ "loss": 1.0093,
689
  "step": 8800
690
  },
691
  {
692
  "epoch": 47.34,
693
+ "learning_rate": 2.3585714285714288e-05,
694
+ "loss": 1.0236,
695
  "step": 8900
696
  },
697
  {
698
  "epoch": 47.87,
699
+ "learning_rate": 2.334761904761905e-05,
700
+ "loss": 1.0035,
701
  "step": 9000
702
  },
703
  {
704
  "epoch": 47.87,
705
+ "eval_loss": 0.26225191354751587,
706
+ "eval_runtime": 146.6476,
707
+ "eval_samples_per_second": 18.712,
708
+ "eval_steps_per_second": 2.339,
709
+ "eval_wer": 0.4658981748318924,
710
  "step": 9000
711
  },
712
  {
713
  "epoch": 48.4,
714
+ "learning_rate": 2.3109523809523813e-05,
715
+ "loss": 1.0059,
716
  "step": 9100
717
  },
718
  {
719
  "epoch": 48.93,
720
+ "learning_rate": 2.287142857142857e-05,
721
+ "loss": 1.0031,
722
  "step": 9200
723
  },
724
  {
725
  "epoch": 49.47,
726
+ "learning_rate": 2.2633333333333332e-05,
727
+ "loss": 1.0092,
728
  "step": 9300
729
  },
730
  {
731
  "epoch": 50.0,
732
+ "learning_rate": 2.2395238095238097e-05,
733
+ "loss": 0.9908,
734
  "step": 9400
735
  },
736
  {
737
+ "epoch": 50.53,
738
+ "learning_rate": 2.215952380952381e-05,
739
+ "loss": 1.0069,
740
+ "step": 9500
741
+ },
742
+ {
743
+ "epoch": 50.53,
744
+ "eval_loss": 0.25685444474220276,
745
+ "eval_runtime": 152.7548,
746
+ "eval_samples_per_second": 17.963,
747
+ "eval_steps_per_second": 2.245,
748
+ "eval_wer": 0.45931110196239877,
749
+ "step": 9500
750
+ },
751
+ {
752
+ "epoch": 51.06,
753
+ "learning_rate": 2.1921428571428572e-05,
754
+ "loss": 1.0056,
755
+ "step": 9600
756
+ },
757
+ {
758
+ "epoch": 51.59,
759
+ "learning_rate": 2.1683333333333336e-05,
760
+ "loss": 0.9943,
761
+ "step": 9700
762
+ },
763
+ {
764
+ "epoch": 52.13,
765
+ "learning_rate": 2.1445238095238097e-05,
766
+ "loss": 0.9985,
767
+ "step": 9800
768
+ },
769
+ {
770
+ "epoch": 52.66,
771
+ "learning_rate": 2.120714285714286e-05,
772
+ "loss": 0.9858,
773
+ "step": 9900
774
+ },
775
+ {
776
+ "epoch": 53.19,
777
+ "learning_rate": 2.096904761904762e-05,
778
+ "loss": 0.9998,
779
+ "step": 10000
780
+ },
781
+ {
782
+ "epoch": 53.19,
783
+ "eval_loss": 0.251886785030365,
784
+ "eval_runtime": 152.7789,
785
+ "eval_samples_per_second": 17.961,
786
+ "eval_steps_per_second": 2.245,
787
+ "eval_wer": 0.44051049814738574,
788
+ "step": 10000
789
+ },
790
+ {
791
+ "epoch": 53.72,
792
+ "learning_rate": 2.0730952380952384e-05,
793
+ "loss": 0.9829,
794
+ "step": 10100
795
+ },
796
+ {
797
+ "epoch": 54.25,
798
+ "learning_rate": 2.0492857142857145e-05,
799
+ "loss": 0.9944,
800
+ "step": 10200
801
+ },
802
+ {
803
+ "epoch": 54.78,
804
+ "learning_rate": 2.025476190476191e-05,
805
+ "loss": 0.9777,
806
+ "step": 10300
807
+ },
808
+ {
809
+ "epoch": 55.32,
810
+ "learning_rate": 2.0019047619047624e-05,
811
+ "loss": 0.9828,
812
+ "step": 10400
813
+ },
814
+ {
815
+ "epoch": 55.85,
816
+ "learning_rate": 1.978095238095238e-05,
817
+ "loss": 0.9762,
818
+ "step": 10500
819
+ },
820
+ {
821
+ "epoch": 55.85,
822
+ "eval_loss": 0.2504919767379761,
823
+ "eval_runtime": 153.619,
824
+ "eval_samples_per_second": 17.862,
825
+ "eval_steps_per_second": 2.233,
826
+ "eval_wer": 0.45876217922327434,
827
+ "step": 10500
828
+ },
829
+ {
830
+ "epoch": 56.38,
831
+ "learning_rate": 1.9542857142857143e-05,
832
+ "loss": 0.9843,
833
+ "step": 10600
834
+ },
835
+ {
836
+ "epoch": 56.91,
837
+ "learning_rate": 1.9304761904761907e-05,
838
+ "loss": 0.9686,
839
+ "step": 10700
840
+ },
841
+ {
842
+ "epoch": 57.45,
843
+ "learning_rate": 1.9066666666666668e-05,
844
+ "loss": 0.9776,
845
+ "step": 10800
846
+ },
847
+ {
848
+ "epoch": 57.97,
849
+ "learning_rate": 1.882857142857143e-05,
850
+ "loss": 0.9743,
851
+ "step": 10900
852
+ },
853
+ {
854
+ "epoch": 58.51,
855
+ "learning_rate": 1.859047619047619e-05,
856
+ "loss": 0.9755,
857
+ "step": 11000
858
+ },
859
+ {
860
+ "epoch": 58.51,
861
+ "eval_loss": 0.2478867620229721,
862
+ "eval_runtime": 157.921,
863
+ "eval_samples_per_second": 17.376,
864
+ "eval_steps_per_second": 2.172,
865
+ "eval_wer": 0.4563835140204016,
866
+ "step": 11000
867
+ },
868
+ {
869
+ "epoch": 59.04,
870
+ "learning_rate": 1.8352380952380955e-05,
871
+ "loss": 0.9598,
872
+ "step": 11100
873
+ },
874
+ {
875
+ "epoch": 59.57,
876
+ "learning_rate": 1.8114285714285716e-05,
877
+ "loss": 0.9621,
878
+ "step": 11200
879
+ },
880
+ {
881
+ "epoch": 60.11,
882
+ "learning_rate": 1.7876190476190477e-05,
883
+ "loss": 0.9711,
884
+ "step": 11300
885
+ },
886
+ {
887
+ "epoch": 60.64,
888
+ "learning_rate": 1.7638095238095238e-05,
889
+ "loss": 0.9584,
890
+ "step": 11400
891
+ },
892
+ {
893
+ "epoch": 61.17,
894
+ "learning_rate": 1.7400000000000003e-05,
895
+ "loss": 0.9624,
896
+ "step": 11500
897
+ },
898
+ {
899
+ "epoch": 61.17,
900
+ "eval_loss": 0.24601028859615326,
901
+ "eval_runtime": 151.7998,
902
+ "eval_samples_per_second": 18.076,
903
+ "eval_steps_per_second": 2.26,
904
+ "eval_wer": 0.42976076117286494,
905
+ "step": 11500
906
+ },
907
+ {
908
+ "epoch": 61.7,
909
+ "learning_rate": 1.7161904761904764e-05,
910
+ "loss": 0.9489,
911
+ "step": 11600
912
+ },
913
+ {
914
+ "epoch": 62.23,
915
+ "learning_rate": 1.6923809523809525e-05,
916
+ "loss": 0.9563,
917
+ "step": 11700
918
+ },
919
+ {
920
+ "epoch": 62.76,
921
+ "learning_rate": 1.6685714285714286e-05,
922
+ "loss": 0.9483,
923
+ "step": 11800
924
+ },
925
+ {
926
+ "epoch": 63.3,
927
+ "learning_rate": 1.644761904761905e-05,
928
+ "loss": 0.9703,
929
+ "step": 11900
930
+ },
931
+ {
932
+ "epoch": 63.83,
933
+ "learning_rate": 1.6209523809523812e-05,
934
+ "loss": 0.9494,
935
+ "step": 12000
936
+ },
937
+ {
938
+ "epoch": 63.83,
939
+ "eval_loss": 0.2402362823486328,
940
+ "eval_runtime": 160.0311,
941
+ "eval_samples_per_second": 17.147,
942
+ "eval_steps_per_second": 2.143,
943
+ "eval_wer": 0.41823338365125107,
944
+ "step": 12000
945
+ },
946
+ {
947
+ "epoch": 64.36,
948
+ "learning_rate": 1.5971428571428573e-05,
949
+ "loss": 0.9528,
950
+ "step": 12100
951
+ },
952
+ {
953
+ "epoch": 64.89,
954
+ "learning_rate": 1.5733333333333334e-05,
955
+ "loss": 0.9422,
956
+ "step": 12200
957
+ },
958
+ {
959
+ "epoch": 65.42,
960
+ "learning_rate": 1.5497619047619048e-05,
961
+ "loss": 0.9481,
962
+ "step": 12300
963
+ },
964
+ {
965
+ "epoch": 65.95,
966
+ "learning_rate": 1.5259523809523812e-05,
967
+ "loss": 0.9464,
968
+ "step": 12400
969
+ },
970
+ {
971
+ "epoch": 66.49,
972
+ "learning_rate": 1.5021428571428574e-05,
973
+ "loss": 0.948,
974
+ "step": 12500
975
+ },
976
+ {
977
+ "epoch": 66.49,
978
+ "eval_loss": 0.2412397861480713,
979
+ "eval_runtime": 153.0299,
980
+ "eval_samples_per_second": 17.931,
981
+ "eval_steps_per_second": 2.241,
982
+ "eval_wer": 0.4211609715932482,
983
+ "step": 12500
984
+ },
985
+ {
986
+ "epoch": 67.02,
987
+ "learning_rate": 1.4783333333333335e-05,
988
+ "loss": 0.9524,
989
+ "step": 12600
990
+ },
991
+ {
992
+ "epoch": 67.55,
993
+ "learning_rate": 1.4545238095238097e-05,
994
+ "loss": 0.9389,
995
+ "step": 12700
996
+ },
997
+ {
998
+ "epoch": 68.08,
999
+ "learning_rate": 1.4307142857142857e-05,
1000
+ "loss": 0.936,
1001
+ "step": 12800
1002
+ },
1003
+ {
1004
+ "epoch": 68.61,
1005
+ "learning_rate": 1.406904761904762e-05,
1006
+ "loss": 0.9284,
1007
+ "step": 12900
1008
+ },
1009
+ {
1010
+ "epoch": 69.15,
1011
+ "learning_rate": 1.3830952380952383e-05,
1012
+ "loss": 0.9312,
1013
+ "step": 13000
1014
+ },
1015
+ {
1016
+ "epoch": 69.15,
1017
+ "eval_loss": 0.2352364957332611,
1018
+ "eval_runtime": 155.983,
1019
+ "eval_samples_per_second": 17.592,
1020
+ "eval_steps_per_second": 2.199,
1021
+ "eval_wer": 0.39700837107177167,
1022
+ "step": 13000
1023
+ },
1024
+ {
1025
+ "epoch": 69.68,
1026
+ "learning_rate": 1.3592857142857144e-05,
1027
+ "loss": 0.9291,
1028
+ "step": 13100
1029
+ },
1030
+ {
1031
+ "epoch": 70.21,
1032
+ "learning_rate": 1.3354761904761906e-05,
1033
+ "loss": 0.9319,
1034
+ "step": 13200
1035
+ },
1036
+ {
1037
+ "epoch": 70.74,
1038
+ "learning_rate": 1.311666666666667e-05,
1039
+ "loss": 0.9168,
1040
+ "step": 13300
1041
+ },
1042
+ {
1043
+ "epoch": 71.28,
1044
+ "learning_rate": 1.2878571428571429e-05,
1045
+ "loss": 0.9359,
1046
+ "step": 13400
1047
+ },
1048
+ {
1049
+ "epoch": 71.81,
1050
+ "learning_rate": 1.2640476190476192e-05,
1051
+ "loss": 0.9172,
1052
+ "step": 13500
1053
+ },
1054
+ {
1055
+ "epoch": 71.81,
1056
+ "eval_loss": 0.23573005199432373,
1057
+ "eval_runtime": 152.9039,
1058
+ "eval_samples_per_second": 17.946,
1059
+ "eval_steps_per_second": 2.243,
1060
+ "eval_wer": 0.3926169891587759,
1061
+ "step": 13500
1062
+ },
1063
+ {
1064
+ "epoch": 72.34,
1065
+ "learning_rate": 1.2402380952380953e-05,
1066
+ "loss": 0.938,
1067
+ "step": 13600
1068
+ },
1069
+ {
1070
+ "epoch": 72.87,
1071
+ "learning_rate": 1.2164285714285715e-05,
1072
+ "loss": 0.9146,
1073
+ "step": 13700
1074
+ },
1075
+ {
1076
+ "epoch": 73.4,
1077
+ "learning_rate": 1.1926190476190478e-05,
1078
+ "loss": 0.9253,
1079
+ "step": 13800
1080
+ },
1081
+ {
1082
+ "epoch": 73.93,
1083
+ "learning_rate": 1.1688095238095238e-05,
1084
+ "loss": 0.9192,
1085
+ "step": 13900
1086
+ },
1087
+ {
1088
+ "epoch": 74.47,
1089
+ "learning_rate": 1.145e-05,
1090
+ "loss": 0.9101,
1091
+ "step": 14000
1092
+ },
1093
+ {
1094
+ "epoch": 74.47,
1095
+ "eval_loss": 0.23052847385406494,
1096
+ "eval_runtime": 144.6085,
1097
+ "eval_samples_per_second": 18.975,
1098
+ "eval_steps_per_second": 2.372,
1099
+ "eval_wer": 0.39046704176387176,
1100
+ "step": 14000
1101
+ },
1102
+ {
1103
+ "epoch": 75.0,
1104
+ "learning_rate": 1.1211904761904763e-05,
1105
+ "loss": 0.9204,
1106
+ "step": 14100
1107
+ },
1108
+ {
1109
+ "epoch": 75.53,
1110
+ "learning_rate": 1.0973809523809524e-05,
1111
+ "loss": 0.9244,
1112
+ "step": 14200
1113
+ },
1114
+ {
1115
+ "epoch": 76.06,
1116
+ "learning_rate": 1.0735714285714287e-05,
1117
+ "loss": 0.9251,
1118
+ "step": 14300
1119
+ },
1120
+ {
1121
+ "epoch": 76.59,
1122
+ "learning_rate": 1.0497619047619048e-05,
1123
+ "loss": 0.9149,
1124
+ "step": 14400
1125
+ },
1126
+ {
1127
+ "epoch": 77.13,
1128
+ "learning_rate": 1.0259523809523811e-05,
1129
+ "loss": 0.9177,
1130
+ "step": 14500
1131
+ },
1132
+ {
1133
+ "epoch": 77.13,
1134
+ "eval_loss": 0.23065772652626038,
1135
+ "eval_runtime": 154.0486,
1136
+ "eval_samples_per_second": 17.813,
1137
+ "eval_steps_per_second": 2.227,
1138
+ "eval_wer": 0.3837884817711907,
1139
+ "step": 14500
1140
+ },
1141
+ {
1142
+ "epoch": 77.66,
1143
+ "learning_rate": 1.0021428571428572e-05,
1144
+ "loss": 0.902,
1145
+ "step": 14600
1146
+ },
1147
+ {
1148
+ "epoch": 78.19,
1149
+ "learning_rate": 9.783333333333335e-06,
1150
+ "loss": 0.9127,
1151
+ "step": 14700
1152
+ },
1153
+ {
1154
+ "epoch": 78.72,
1155
+ "learning_rate": 9.545238095238096e-06,
1156
+ "loss": 0.9078,
1157
+ "step": 14800
1158
+ },
1159
+ {
1160
+ "epoch": 79.25,
1161
+ "learning_rate": 9.307142857142857e-06,
1162
+ "loss": 0.9066,
1163
+ "step": 14900
1164
+ },
1165
+ {
1166
+ "epoch": 79.78,
1167
+ "learning_rate": 9.06904761904762e-06,
1168
+ "loss": 0.9083,
1169
+ "step": 15000
1170
+ },
1171
+ {
1172
+ "epoch": 79.78,
1173
+ "eval_loss": 0.2312641590833664,
1174
+ "eval_runtime": 159.1357,
1175
+ "eval_samples_per_second": 17.243,
1176
+ "eval_steps_per_second": 2.155,
1177
+ "eval_wer": 0.3799917661589131,
1178
+ "step": 15000
1179
+ },
1180
+ {
1181
+ "epoch": 80.32,
1182
+ "learning_rate": 8.830952380952381e-06,
1183
+ "loss": 0.9123,
1184
+ "step": 15100
1185
+ },
1186
+ {
1187
+ "epoch": 80.85,
1188
+ "learning_rate": 8.592857142857144e-06,
1189
+ "loss": 0.902,
1190
+ "step": 15200
1191
+ },
1192
+ {
1193
+ "epoch": 81.38,
1194
+ "learning_rate": 8.354761904761905e-06,
1195
+ "loss": 0.9114,
1196
+ "step": 15300
1197
+ },
1198
+ {
1199
+ "epoch": 81.91,
1200
+ "learning_rate": 8.116666666666666e-06,
1201
+ "loss": 0.8936,
1202
+ "step": 15400
1203
+ },
1204
+ {
1205
+ "epoch": 82.45,
1206
+ "learning_rate": 7.878571428571429e-06,
1207
+ "loss": 0.9068,
1208
+ "step": 15500
1209
+ },
1210
+ {
1211
+ "epoch": 82.45,
1212
+ "eval_loss": 0.22753386199474335,
1213
+ "eval_runtime": 149.5268,
1214
+ "eval_samples_per_second": 18.351,
1215
+ "eval_steps_per_second": 2.294,
1216
+ "eval_wer": 0.3742280773981062,
1217
+ "step": 15500
1218
+ },
1219
+ {
1220
+ "epoch": 82.97,
1221
+ "learning_rate": 7.640476190476192e-06,
1222
+ "loss": 0.8974,
1223
+ "step": 15600
1224
+ },
1225
+ {
1226
+ "epoch": 83.51,
1227
+ "learning_rate": 7.402380952380953e-06,
1228
+ "loss": 0.9025,
1229
+ "step": 15700
1230
+ },
1231
+ {
1232
+ "epoch": 84.04,
1233
+ "learning_rate": 7.164285714285715e-06,
1234
+ "loss": 0.9008,
1235
+ "step": 15800
1236
+ },
1237
+ {
1238
+ "epoch": 84.57,
1239
+ "learning_rate": 6.926190476190476e-06,
1240
+ "loss": 0.8975,
1241
+ "step": 15900
1242
+ },
1243
+ {
1244
+ "epoch": 85.11,
1245
+ "learning_rate": 6.688095238095239e-06,
1246
+ "loss": 0.9087,
1247
+ "step": 16000
1248
+ },
1249
+ {
1250
+ "epoch": 85.11,
1251
+ "eval_loss": 0.22831058502197266,
1252
+ "eval_runtime": 151.3152,
1253
+ "eval_samples_per_second": 18.134,
1254
+ "eval_steps_per_second": 2.267,
1255
+ "eval_wer": 0.3746855130140433,
1256
+ "step": 16000
1257
+ },
1258
+ {
1259
+ "epoch": 85.64,
1260
+ "learning_rate": 6.450000000000001e-06,
1261
+ "loss": 0.8852,
1262
+ "step": 16100
1263
+ },
1264
+ {
1265
+ "epoch": 86.17,
1266
+ "learning_rate": 6.211904761904762e-06,
1267
+ "loss": 0.8967,
1268
+ "step": 16200
1269
+ },
1270
+ {
1271
+ "epoch": 86.7,
1272
+ "learning_rate": 5.973809523809524e-06,
1273
+ "loss": 0.8862,
1274
+ "step": 16300
1275
+ },
1276
+ {
1277
+ "epoch": 87.23,
1278
+ "learning_rate": 5.735714285714287e-06,
1279
+ "loss": 0.9042,
1280
+ "step": 16400
1281
+ },
1282
+ {
1283
+ "epoch": 87.76,
1284
+ "learning_rate": 5.497619047619048e-06,
1285
+ "loss": 0.8838,
1286
+ "step": 16500
1287
+ },
1288
+ {
1289
+ "epoch": 87.76,
1290
+ "eval_loss": 0.2285744994878769,
1291
+ "eval_runtime": 157.9121,
1292
+ "eval_samples_per_second": 17.377,
1293
+ "eval_steps_per_second": 2.172,
1294
+ "eval_wer": 0.37770458807922785,
1295
+ "step": 16500
1296
+ },
1297
+ {
1298
+ "epoch": 88.3,
1299
+ "learning_rate": 5.25952380952381e-06,
1300
+ "loss": 0.8952,
1301
+ "step": 16600
1302
+ },
1303
+ {
1304
+ "epoch": 88.83,
1305
+ "learning_rate": 5.021428571428572e-06,
1306
+ "loss": 0.8893,
1307
+ "step": 16700
1308
+ },
1309
+ {
1310
+ "epoch": 89.36,
1311
+ "learning_rate": 4.783333333333334e-06,
1312
+ "loss": 0.8933,
1313
+ "step": 16800
1314
+ },
1315
+ {
1316
+ "epoch": 89.89,
1317
+ "learning_rate": 4.545238095238095e-06,
1318
+ "loss": 0.8796,
1319
+ "step": 16900
1320
+ },
1321
+ {
1322
+ "epoch": 90.42,
1323
+ "learning_rate": 4.307142857142858e-06,
1324
+ "loss": 0.8868,
1325
+ "step": 17000
1326
+ },
1327
+ {
1328
+ "epoch": 90.42,
1329
+ "eval_loss": 0.22693119943141937,
1330
+ "eval_runtime": 145.6646,
1331
+ "eval_samples_per_second": 18.838,
1332
+ "eval_steps_per_second": 2.355,
1333
+ "eval_wer": 0.37216961712638946,
1334
+ "step": 17000
1335
+ },
1336
+ {
1337
+ "epoch": 90.95,
1338
+ "learning_rate": 4.069047619047619e-06,
1339
+ "loss": 0.8852,
1340
+ "step": 17100
1341
+ },
1342
+ {
1343
+ "epoch": 91.49,
1344
+ "learning_rate": 3.830952380952382e-06,
1345
+ "loss": 0.8895,
1346
+ "step": 17200
1347
+ },
1348
+ {
1349
+ "epoch": 92.02,
1350
+ "learning_rate": 3.592857142857143e-06,
1351
+ "loss": 0.8858,
1352
+ "step": 17300
1353
+ },
1354
+ {
1355
+ "epoch": 92.55,
1356
+ "learning_rate": 3.3547619047619052e-06,
1357
+ "loss": 0.8852,
1358
+ "step": 17400
1359
+ },
1360
+ {
1361
+ "epoch": 93.08,
1362
+ "learning_rate": 3.1166666666666668e-06,
1363
+ "loss": 0.8895,
1364
+ "step": 17500
1365
+ },
1366
+ {
1367
+ "epoch": 93.08,
1368
+ "eval_loss": 0.22457998991012573,
1369
+ "eval_runtime": 151.9978,
1370
+ "eval_samples_per_second": 18.053,
1371
+ "eval_steps_per_second": 2.257,
1372
+ "eval_wer": 0.37143772014089016,
1373
+ "step": 17500
1374
+ },
1375
+ {
1376
+ "epoch": 93.61,
1377
+ "learning_rate": 2.8785714285714287e-06,
1378
+ "loss": 0.8835,
1379
+ "step": 17600
1380
+ },
1381
+ {
1382
+ "epoch": 94.15,
1383
+ "learning_rate": 2.6404761904761907e-06,
1384
+ "loss": 0.8846,
1385
+ "step": 17700
1386
+ },
1387
+ {
1388
+ "epoch": 94.68,
1389
+ "learning_rate": 2.4023809523809527e-06,
1390
+ "loss": 0.8826,
1391
+ "step": 17800
1392
+ },
1393
+ {
1394
+ "epoch": 95.21,
1395
+ "learning_rate": 2.1642857142857146e-06,
1396
+ "loss": 0.8916,
1397
+ "step": 17900
1398
+ },
1399
+ {
1400
+ "epoch": 95.74,
1401
+ "learning_rate": 1.926190476190476e-06,
1402
+ "loss": 0.8926,
1403
+ "step": 18000
1404
+ },
1405
+ {
1406
+ "epoch": 95.74,
1407
+ "eval_loss": 0.2240794152021408,
1408
+ "eval_runtime": 144.8784,
1409
+ "eval_samples_per_second": 18.94,
1410
+ "eval_steps_per_second": 2.368,
1411
+ "eval_wer": 0.37052284890901604,
1412
+ "step": 18000
1413
+ },
1414
+ {
1415
+ "epoch": 96.28,
1416
+ "learning_rate": 1.6880952380952381e-06,
1417
+ "loss": 0.8885,
1418
+ "step": 18100
1419
+ },
1420
+ {
1421
+ "epoch": 96.81,
1422
+ "learning_rate": 1.45e-06,
1423
+ "loss": 0.8728,
1424
+ "step": 18200
1425
+ },
1426
+ {
1427
+ "epoch": 97.34,
1428
+ "learning_rate": 1.211904761904762e-06,
1429
+ "loss": 0.887,
1430
+ "step": 18300
1431
+ },
1432
+ {
1433
+ "epoch": 97.87,
1434
+ "learning_rate": 9.73809523809524e-07,
1435
+ "loss": 0.8809,
1436
+ "step": 18400
1437
+ },
1438
+ {
1439
+ "epoch": 98.4,
1440
+ "learning_rate": 7.404761904761905e-07,
1441
+ "loss": 0.8856,
1442
+ "step": 18500
1443
+ },
1444
+ {
1445
+ "epoch": 98.4,
1446
+ "eval_loss": 0.22416575253009796,
1447
+ "eval_runtime": 184.721,
1448
+ "eval_samples_per_second": 14.855,
1449
+ "eval_steps_per_second": 1.857,
1450
+ "eval_wer": 0.3693335163075797,
1451
+ "step": 18500
1452
+ },
1453
+ {
1454
+ "epoch": 98.93,
1455
+ "learning_rate": 5.023809523809524e-07,
1456
+ "loss": 0.8738,
1457
+ "step": 18600
1458
+ },
1459
+ {
1460
+ "epoch": 99.47,
1461
+ "learning_rate": 2.6428571428571433e-07,
1462
+ "loss": 0.8831,
1463
+ "step": 18700
1464
+ },
1465
+ {
1466
+ "epoch": 100.0,
1467
+ "learning_rate": 2.619047619047619e-08,
1468
+ "loss": 0.877,
1469
+ "step": 18800
1470
+ },
1471
+ {
1472
+ "epoch": 100.0,
1473
+ "step": 18800,
1474
+ "total_flos": 1.0790071156798875e+20,
1475
+ "train_loss": 1.3463122907597969,
1476
+ "train_runtime": 52469.8271,
1477
+ "train_samples_per_second": 11.5,
1478
+ "train_steps_per_second": 0.358
1479
  }
1480
  ],
1481
+ "max_steps": 18800,
1482
+ "num_train_epochs": 100,
1483
+ "total_flos": 1.0790071156798875e+20,
1484
  "trial_name": null,
1485
  "trial_params": null
1486
  }