lucio commited on
Commit
233c538
1 Parent(s): 399d0d4

End of training

Browse files
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
- "epoch": 100.0,
3
- "eval_loss": 0.21627500653266907,
4
- "eval_runtime": 133.3587,
5
- "eval_samples": 2742,
6
- "eval_samples_per_second": 20.561,
7
- "eval_steps_per_second": 2.572,
8
- "eval_wer": 0.32487632188081517,
9
- "train_loss": 1.3660302423273476,
10
- "train_runtime": 26867.6077,
11
- "train_samples": 3292,
12
- "train_samples_per_second": 12.253,
13
- "train_steps_per_second": 0.383
14
  }
 
1
  {
2
+ "epoch": 99.99,
3
+ "eval_loss": 0.20258904993534088,
4
+ "eval_runtime": 135.1965,
5
+ "eval_samples": 2744,
6
+ "eval_samples_per_second": 20.296,
7
+ "eval_steps_per_second": 2.537,
8
+ "eval_wer": 0.3247906274312388,
9
+ "train_loss": 1.3946523244330225,
10
+ "train_runtime": 41836.5581,
11
+ "train_samples": 6034,
12
+ "train_samples_per_second": 14.423,
13
+ "train_steps_per_second": 0.225
14
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 100.0,
3
- "eval_loss": 0.21627500653266907,
4
- "eval_runtime": 133.3587,
5
- "eval_samples": 2742,
6
- "eval_samples_per_second": 20.561,
7
- "eval_steps_per_second": 2.572,
8
- "eval_wer": 0.32487632188081517
9
  }
 
1
  {
2
+ "epoch": 99.99,
3
+ "eval_loss": 0.20258904993534088,
4
+ "eval_runtime": 135.1965,
5
+ "eval_samples": 2744,
6
+ "eval_samples_per_second": 20.296,
7
+ "eval_steps_per_second": 2.537,
8
+ "eval_wer": 0.3247906274312388
9
  }
runs/Feb03_17-57-48_job-699ba53c-fea9-4eb2-81af-a97f440eaa45/events.out.tfevents.1643953286.job-699ba53c-fea9-4eb2-81af-a97f440eaa45.861419.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc6d3bd6de783de478dded36396b456f500e13b482db6f63358a0c954a46424d
3
+ size 358
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 100.0,
3
- "train_loss": 1.3660302423273476,
4
- "train_runtime": 26867.6077,
5
- "train_samples": 3292,
6
- "train_samples_per_second": 12.253,
7
- "train_steps_per_second": 0.383
8
  }
 
1
  {
2
+ "epoch": 99.99,
3
+ "train_loss": 1.3946523244330225,
4
+ "train_runtime": 41836.5581,
5
+ "train_samples": 6034,
6
+ "train_samples_per_second": 14.423,
7
+ "train_steps_per_second": 0.225
8
  }
trainer_state.json CHANGED
@@ -1,823 +1,751 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 100.0,
5
- "global_step": 10300,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.97,
12
  "learning_rate": 4.950000000000001e-06,
13
- "loss": 13.4586,
14
  "step": 100
15
  },
16
  {
17
- "epoch": 1.94,
18
  "learning_rate": 9.950000000000001e-06,
19
- "loss": 5.8722,
20
  "step": 200
21
  },
22
  {
23
- "epoch": 2.91,
24
  "learning_rate": 1.4950000000000001e-05,
25
- "loss": 4.0954,
26
  "step": 300
27
  },
28
  {
29
- "epoch": 3.88,
30
  "learning_rate": 1.995e-05,
31
- "loss": 3.572,
32
  "step": 400
33
  },
34
  {
35
- "epoch": 4.85,
36
  "learning_rate": 2.495e-05,
37
- "loss": 3.2914,
38
  "step": 500
39
  },
40
  {
41
- "epoch": 4.85,
42
- "eval_loss": 3.2282841205596924,
43
- "eval_runtime": 135.7815,
44
- "eval_samples_per_second": 20.194,
45
- "eval_steps_per_second": 2.526,
46
  "eval_wer": 1.0,
47
  "step": 500
48
  },
49
  {
50
- "epoch": 5.83,
51
  "learning_rate": 2.995e-05,
52
- "loss": 3.2087,
53
  "step": 600
54
  },
55
  {
56
- "epoch": 6.8,
57
  "learning_rate": 3.495e-05,
58
- "loss": 3.1802,
59
  "step": 700
60
  },
61
  {
62
- "epoch": 7.77,
63
  "learning_rate": 3.995e-05,
64
- "loss": 3.1307,
65
  "step": 800
66
  },
67
  {
68
- "epoch": 8.74,
69
  "learning_rate": 4.495e-05,
70
- "loss": 3.0922,
71
  "step": 900
72
  },
73
  {
74
- "epoch": 9.71,
75
  "learning_rate": 4.995e-05,
76
- "loss": 3.0068,
77
  "step": 1000
78
  },
79
  {
80
- "epoch": 9.71,
81
- "eval_loss": 2.7939300537109375,
82
- "eval_runtime": 134.6432,
83
- "eval_samples_per_second": 20.365,
84
- "eval_steps_per_second": 2.547,
85
- "eval_wer": 0.997957609040984,
86
  "step": 1000
87
  },
88
  {
89
- "epoch": 10.68,
90
  "learning_rate": 5.495e-05,
91
- "loss": 2.5073,
92
  "step": 1100
93
  },
94
  {
95
- "epoch": 11.65,
96
  "learning_rate": 5.995000000000001e-05,
97
- "loss": 1.8326,
98
  "step": 1200
99
  },
100
  {
101
- "epoch": 12.62,
102
  "learning_rate": 6.494999999999999e-05,
103
- "loss": 1.598,
104
  "step": 1300
105
  },
106
  {
107
- "epoch": 13.59,
108
  "learning_rate": 6.995e-05,
109
- "loss": 1.5016,
110
  "step": 1400
111
  },
112
  {
113
- "epoch": 14.56,
114
  "learning_rate": 7.495e-05,
115
- "loss": 1.4306,
116
  "step": 1500
117
  },
118
  {
119
- "epoch": 14.56,
120
- "eval_loss": 0.48574715852737427,
121
- "eval_runtime": 133.5239,
122
- "eval_samples_per_second": 20.536,
123
- "eval_steps_per_second": 2.569,
124
- "eval_wer": 0.6313711251304861,
125
  "step": 1500
126
  },
127
  {
128
- "epoch": 15.53,
129
  "learning_rate": 7.995e-05,
130
- "loss": 1.3756,
131
  "step": 1600
132
  },
133
  {
134
- "epoch": 16.5,
135
  "learning_rate": 8.495e-05,
136
- "loss": 1.3583,
137
  "step": 1700
138
  },
139
  {
140
- "epoch": 17.48,
141
  "learning_rate": 8.995e-05,
142
- "loss": 1.3058,
143
  "step": 1800
144
  },
145
  {
146
- "epoch": 18.45,
147
  "learning_rate": 9.495e-05,
148
- "loss": 1.2949,
149
  "step": 1900
150
  },
151
  {
152
- "epoch": 19.42,
153
- "learning_rate": 9.995e-05,
154
- "loss": 1.2831,
155
  "step": 2000
156
  },
157
  {
158
- "epoch": 19.42,
159
- "eval_loss": 0.3678707182407379,
160
- "eval_runtime": 134.1491,
161
- "eval_samples_per_second": 20.44,
162
- "eval_steps_per_second": 2.557,
163
- "eval_wer": 0.6065901148277584,
164
  "step": 2000
165
  },
166
  {
167
- "epoch": 20.39,
168
- "learning_rate": 9.880722891566265e-05,
169
- "loss": 1.2725,
170
  "step": 2100
171
  },
172
  {
173
- "epoch": 21.36,
174
- "learning_rate": 9.76144578313253e-05,
175
- "loss": 1.2436,
176
  "step": 2200
177
  },
178
  {
179
- "epoch": 22.33,
180
- "learning_rate": 9.640963855421687e-05,
181
- "loss": 1.2363,
182
  "step": 2300
183
  },
184
  {
185
- "epoch": 23.3,
186
- "learning_rate": 9.521686746987952e-05,
187
- "loss": 1.2243,
188
  "step": 2400
189
  },
190
  {
191
- "epoch": 24.27,
192
- "learning_rate": 9.402409638554217e-05,
193
- "loss": 1.2065,
194
  "step": 2500
195
  },
196
  {
197
- "epoch": 24.27,
198
- "eval_loss": 0.33028003573417664,
199
- "eval_runtime": 134.2277,
200
- "eval_samples_per_second": 20.428,
201
- "eval_steps_per_second": 2.555,
202
- "eval_wer": 0.5559842055099169,
203
  "step": 2500
204
  },
205
  {
206
- "epoch": 25.24,
207
- "learning_rate": 9.281927710843374e-05,
208
- "loss": 1.192,
209
  "step": 2600
210
  },
211
  {
212
- "epoch": 26.21,
213
- "learning_rate": 9.161445783132531e-05,
214
- "loss": 1.1816,
215
  "step": 2700
216
  },
217
  {
218
- "epoch": 27.18,
219
- "learning_rate": 9.040963855421686e-05,
220
- "loss": 1.1869,
221
  "step": 2800
222
  },
223
  {
224
- "epoch": 28.16,
225
- "learning_rate": 8.920481927710844e-05,
226
- "loss": 1.1728,
227
  "step": 2900
228
  },
229
  {
230
- "epoch": 29.13,
231
- "learning_rate": 8.800000000000001e-05,
232
- "loss": 1.1449,
233
  "step": 3000
234
  },
235
  {
236
- "epoch": 29.13,
237
- "eval_loss": 0.3007894456386566,
238
- "eval_runtime": 133.5503,
239
- "eval_samples_per_second": 20.532,
240
- "eval_steps_per_second": 2.568,
241
- "eval_wer": 0.46902373712159035,
242
  "step": 3000
243
  },
244
  {
245
- "epoch": 30.1,
246
- "learning_rate": 8.679518072289157e-05,
247
- "loss": 1.1408,
248
  "step": 3100
249
  },
250
  {
251
- "epoch": 31.07,
252
- "learning_rate": 8.559036144578315e-05,
253
- "loss": 1.1319,
254
  "step": 3200
255
  },
256
  {
257
- "epoch": 32.04,
258
- "learning_rate": 8.43855421686747e-05,
259
- "loss": 1.1178,
260
  "step": 3300
261
  },
262
  {
263
- "epoch": 33.01,
264
- "learning_rate": 8.318072289156627e-05,
265
- "loss": 1.1122,
266
  "step": 3400
267
  },
268
  {
269
- "epoch": 33.98,
270
- "learning_rate": 8.197590361445784e-05,
271
- "loss": 1.0926,
272
  "step": 3500
273
  },
274
  {
275
- "epoch": 33.98,
276
- "eval_loss": 0.28173714876174927,
277
- "eval_runtime": 132.429,
278
- "eval_samples_per_second": 20.705,
279
- "eval_steps_per_second": 2.59,
280
- "eval_wer": 0.4618980619979122,
281
  "step": 3500
282
  },
283
  {
284
- "epoch": 34.95,
285
- "learning_rate": 8.07710843373494e-05,
286
- "loss": 1.0935,
287
  "step": 3600
288
  },
289
  {
290
- "epoch": 35.92,
291
- "learning_rate": 7.956626506024096e-05,
292
- "loss": 1.0815,
293
  "step": 3700
294
  },
295
  {
296
- "epoch": 36.89,
297
- "learning_rate": 7.836144578313254e-05,
298
- "loss": 1.0856,
299
  "step": 3800
300
  },
301
  {
302
- "epoch": 37.86,
303
- "learning_rate": 7.71566265060241e-05,
304
- "loss": 1.0732,
305
  "step": 3900
306
  },
307
  {
308
- "epoch": 38.83,
309
- "learning_rate": 7.595180722891566e-05,
310
- "loss": 1.0635,
311
  "step": 4000
312
  },
313
  {
314
- "epoch": 38.83,
315
- "eval_loss": 0.2665168046951294,
316
- "eval_runtime": 133.7977,
317
- "eval_samples_per_second": 20.494,
318
- "eval_steps_per_second": 2.564,
319
- "eval_wer": 0.4391140561884446,
320
  "step": 4000
321
  },
322
  {
323
- "epoch": 39.81,
324
- "learning_rate": 7.474698795180723e-05,
325
- "loss": 1.0614,
326
  "step": 4100
327
  },
328
  {
329
- "epoch": 40.78,
330
- "learning_rate": 7.35421686746988e-05,
331
- "loss": 1.0457,
332
  "step": 4200
333
  },
334
  {
335
- "epoch": 41.75,
336
- "learning_rate": 7.233734939759036e-05,
337
- "loss": 1.039,
338
  "step": 4300
339
  },
340
  {
341
- "epoch": 42.72,
342
- "learning_rate": 7.113253012048193e-05,
343
- "loss": 1.0151,
344
  "step": 4400
345
  },
346
  {
347
- "epoch": 43.69,
348
- "learning_rate": 6.99277108433735e-05,
349
- "loss": 1.029,
350
  "step": 4500
351
  },
352
  {
353
- "epoch": 43.69,
354
- "eval_loss": 0.26156488060951233,
355
- "eval_runtime": 133.7699,
356
- "eval_samples_per_second": 20.498,
357
- "eval_steps_per_second": 2.564,
358
- "eval_wer": 0.4175100984886307,
359
  "step": 4500
360
  },
361
  {
362
- "epoch": 44.66,
363
- "learning_rate": 6.873493975903614e-05,
364
- "loss": 1.0254,
365
  "step": 4600
366
  },
367
  {
368
- "epoch": 45.63,
369
- "learning_rate": 6.753012048192771e-05,
370
- "loss": 1.0328,
371
  "step": 4700
372
  },
373
  {
374
- "epoch": 46.6,
375
- "learning_rate": 6.632530120481928e-05,
376
- "loss": 1.022,
377
  "step": 4800
378
  },
379
  {
380
- "epoch": 47.57,
381
- "learning_rate": 6.512048192771085e-05,
382
- "loss": 1.0021,
383
  "step": 4900
384
  },
385
  {
386
- "epoch": 48.54,
387
- "learning_rate": 6.391566265060241e-05,
388
- "loss": 1.0064,
389
  "step": 5000
390
  },
391
  {
392
- "epoch": 48.54,
393
- "eval_loss": 0.24684669077396393,
394
- "eval_runtime": 133.5,
395
- "eval_samples_per_second": 20.539,
396
- "eval_steps_per_second": 2.569,
397
- "eval_wer": 0.4051195933372668,
398
  "step": 5000
399
  },
400
  {
401
- "epoch": 49.51,
402
- "learning_rate": 6.271084337349398e-05,
403
- "loss": 0.9791,
404
  "step": 5100
405
  },
406
  {
407
- "epoch": 50.49,
408
- "learning_rate": 6.150602409638555e-05,
409
- "loss": 0.9722,
410
  "step": 5200
411
  },
412
  {
413
- "epoch": 51.46,
414
- "learning_rate": 6.030120481927711e-05,
415
- "loss": 0.9815,
416
  "step": 5300
417
  },
418
  {
419
- "epoch": 52.43,
420
- "learning_rate": 5.909638554216868e-05,
421
- "loss": 0.9633,
422
  "step": 5400
423
  },
424
  {
425
- "epoch": 53.4,
426
- "learning_rate": 5.789156626506025e-05,
427
- "loss": 0.9659,
428
  "step": 5500
429
  },
430
  {
431
- "epoch": 53.4,
432
- "eval_loss": 0.2394031286239624,
433
- "eval_runtime": 133.1725,
434
- "eval_samples_per_second": 20.59,
435
- "eval_steps_per_second": 2.576,
436
- "eval_wer": 0.38596650478827216,
437
  "step": 5500
438
  },
439
  {
440
- "epoch": 54.37,
441
- "learning_rate": 5.668674698795181e-05,
442
- "loss": 0.9544,
443
  "step": 5600
444
  },
445
  {
446
- "epoch": 55.34,
447
- "learning_rate": 5.5481927710843374e-05,
448
- "loss": 0.9581,
449
  "step": 5700
450
  },
451
  {
452
- "epoch": 56.31,
453
- "learning_rate": 5.427710843373495e-05,
454
- "loss": 0.9437,
455
  "step": 5800
456
  },
457
  {
458
- "epoch": 57.28,
459
- "learning_rate": 5.307228915662651e-05,
460
- "loss": 0.9378,
461
  "step": 5900
462
  },
463
  {
464
- "epoch": 58.25,
465
- "learning_rate": 5.186746987951807e-05,
466
- "loss": 0.9254,
467
  "step": 6000
468
  },
469
  {
470
- "epoch": 58.25,
471
- "eval_loss": 0.2373155653476715,
472
- "eval_runtime": 133.3175,
473
- "eval_samples_per_second": 20.567,
474
- "eval_steps_per_second": 2.573,
475
- "eval_wer": 0.3688558071982935,
476
  "step": 6000
477
  },
478
  {
479
- "epoch": 59.22,
480
- "learning_rate": 5.0662650602409644e-05,
481
- "loss": 0.9321,
482
  "step": 6100
483
  },
484
  {
485
- "epoch": 60.19,
486
- "learning_rate": 4.9457831325301205e-05,
487
- "loss": 0.9122,
488
  "step": 6200
489
  },
490
  {
491
- "epoch": 61.17,
492
- "learning_rate": 4.825301204819277e-05,
493
- "loss": 0.9148,
494
  "step": 6300
495
  },
496
  {
497
- "epoch": 62.14,
498
- "learning_rate": 4.704819277108434e-05,
499
- "loss": 0.9177,
500
  "step": 6400
501
  },
502
  {
503
- "epoch": 63.11,
504
- "learning_rate": 4.584337349397591e-05,
505
- "loss": 0.9209,
506
  "step": 6500
507
  },
508
  {
509
- "epoch": 63.11,
510
- "eval_loss": 0.23466718196868896,
511
- "eval_runtime": 134.3014,
512
- "eval_samples_per_second": 20.417,
513
- "eval_steps_per_second": 2.554,
514
- "eval_wer": 0.367040348568057,
515
  "step": 6500
516
  },
517
  {
518
- "epoch": 64.08,
519
- "learning_rate": 4.4638554216867476e-05,
520
- "loss": 0.8981,
521
  "step": 6600
522
  },
523
  {
524
- "epoch": 65.05,
525
- "learning_rate": 4.344578313253012e-05,
526
- "loss": 0.8927,
527
  "step": 6700
528
  },
529
  {
530
- "epoch": 66.02,
531
- "learning_rate": 4.224096385542169e-05,
532
- "loss": 0.8986,
533
  "step": 6800
534
  },
535
  {
536
- "epoch": 66.99,
537
- "learning_rate": 4.1036144578313255e-05,
538
- "loss": 0.8867,
539
  "step": 6900
540
  },
541
  {
542
- "epoch": 67.96,
543
- "learning_rate": 3.983132530120482e-05,
544
- "loss": 0.889,
545
  "step": 7000
546
  },
547
  {
548
- "epoch": 67.96,
549
- "eval_loss": 0.22911565005779266,
550
- "eval_runtime": 133.5899,
551
- "eval_samples_per_second": 20.526,
552
- "eval_steps_per_second": 2.568,
553
- "eval_wer": 0.36871964780102573,
554
  "step": 7000
555
  },
556
  {
557
- "epoch": 68.93,
558
- "learning_rate": 3.862650602409639e-05,
559
- "loss": 0.885,
560
  "step": 7100
561
  },
562
  {
563
- "epoch": 69.9,
564
- "learning_rate": 3.742168674698796e-05,
565
- "loss": 0.8772,
566
  "step": 7200
567
  },
568
  {
569
- "epoch": 70.87,
570
- "learning_rate": 3.62289156626506e-05,
571
- "loss": 0.8798,
572
  "step": 7300
573
  },
574
  {
575
- "epoch": 71.84,
576
- "learning_rate": 3.502409638554217e-05,
577
- "loss": 0.8808,
578
  "step": 7400
579
  },
580
  {
581
- "epoch": 72.82,
582
- "learning_rate": 3.3819277108433736e-05,
583
- "loss": 0.8859,
584
  "step": 7500
585
  },
586
  {
587
- "epoch": 72.82,
588
- "eval_loss": 0.22717151045799255,
589
- "eval_runtime": 134.7148,
590
- "eval_samples_per_second": 20.354,
591
- "eval_steps_per_second": 2.546,
592
- "eval_wer": 0.3615939726773476,
593
  "step": 7500
594
  },
595
  {
596
- "epoch": 73.79,
597
- "learning_rate": 3.2614457831325304e-05,
598
- "loss": 0.8713,
599
  "step": 7600
600
  },
601
  {
602
- "epoch": 74.76,
603
- "learning_rate": 3.140963855421687e-05,
604
- "loss": 0.8734,
605
  "step": 7700
606
  },
607
  {
608
- "epoch": 75.73,
609
- "learning_rate": 3.0204819277108436e-05,
610
- "loss": 0.8565,
611
  "step": 7800
612
  },
613
  {
614
- "epoch": 76.7,
615
- "learning_rate": 2.9e-05,
616
- "loss": 0.8492,
617
  "step": 7900
618
  },
619
  {
620
- "epoch": 77.67,
621
- "learning_rate": 2.7795180722891568e-05,
622
- "loss": 0.8441,
623
  "step": 8000
624
  },
625
  {
626
- "epoch": 77.67,
627
- "eval_loss": 0.22322185337543488,
628
- "eval_runtime": 134.4634,
629
- "eval_samples_per_second": 20.392,
630
- "eval_steps_per_second": 2.551,
631
- "eval_wer": 0.35383288703308674,
632
  "step": 8000
633
  },
634
  {
635
- "epoch": 78.64,
636
- "learning_rate": 2.6590361445783136e-05,
637
- "loss": 0.8516,
638
  "step": 8100
639
  },
640
  {
641
- "epoch": 79.61,
642
- "learning_rate": 2.5385542168674696e-05,
643
- "loss": 0.8451,
644
  "step": 8200
645
  },
646
  {
647
- "epoch": 80.58,
648
- "learning_rate": 2.4180722891566264e-05,
649
- "loss": 0.8346,
650
  "step": 8300
651
  },
652
  {
653
- "epoch": 81.55,
654
- "learning_rate": 2.2975903614457832e-05,
655
- "loss": 0.8378,
656
  "step": 8400
657
  },
658
  {
659
- "epoch": 82.52,
660
- "learning_rate": 2.17710843373494e-05,
661
- "loss": 0.8284,
662
  "step": 8500
663
  },
664
  {
665
- "epoch": 82.52,
666
- "eval_loss": 0.22235004603862762,
667
- "eval_runtime": 133.9778,
668
- "eval_samples_per_second": 20.466,
669
- "eval_steps_per_second": 2.56,
670
- "eval_wer": 0.33817455634729726,
671
  "step": 8500
672
  },
673
  {
674
- "epoch": 83.5,
675
- "learning_rate": 2.0566265060240967e-05,
676
- "loss": 0.8269,
677
  "step": 8600
678
  },
679
  {
680
- "epoch": 84.47,
681
- "learning_rate": 1.936144578313253e-05,
682
- "loss": 0.8186,
683
  "step": 8700
684
  },
685
  {
686
- "epoch": 85.44,
687
- "learning_rate": 1.8156626506024096e-05,
688
- "loss": 0.8243,
689
  "step": 8800
690
  },
691
  {
692
- "epoch": 86.41,
693
- "learning_rate": 1.6951807228915663e-05,
694
- "loss": 0.8279,
695
  "step": 8900
696
  },
697
  {
698
- "epoch": 87.38,
699
- "learning_rate": 1.574698795180723e-05,
700
- "loss": 0.8142,
701
  "step": 9000
702
  },
703
  {
704
- "epoch": 87.38,
705
- "eval_loss": 0.2192818820476532,
706
- "eval_runtime": 132.2621,
707
- "eval_samples_per_second": 20.732,
708
- "eval_steps_per_second": 2.593,
709
- "eval_wer": 0.33104888122361914,
710
  "step": 9000
711
  },
712
  {
713
- "epoch": 88.35,
714
- "learning_rate": 1.4542168674698795e-05,
715
- "loss": 0.8071,
716
  "step": 9100
717
  },
718
  {
719
- "epoch": 89.32,
720
- "learning_rate": 1.3337349397590363e-05,
721
- "loss": 0.8075,
722
  "step": 9200
723
  },
724
  {
725
- "epoch": 90.29,
726
- "learning_rate": 1.2132530120481929e-05,
727
- "loss": 0.8042,
728
  "step": 9300
729
  },
730
  {
731
- "epoch": 91.26,
732
- "learning_rate": 1.0927710843373493e-05,
733
- "loss": 0.7916,
734
  "step": 9400
735
  },
736
  {
737
- "epoch": 92.23,
738
- "learning_rate": 9.722891566265061e-06,
739
- "loss": 0.8012,
740
- "step": 9500
741
- },
742
- {
743
- "epoch": 92.23,
744
- "eval_loss": 0.21682003140449524,
745
- "eval_runtime": 133.9404,
746
- "eval_samples_per_second": 20.472,
747
- "eval_steps_per_second": 2.561,
748
- "eval_wer": 0.3276448962919257,
749
- "step": 9500
750
- },
751
- {
752
- "epoch": 93.2,
753
- "learning_rate": 8.518072289156627e-06,
754
- "loss": 0.8055,
755
- "step": 9600
756
- },
757
- {
758
- "epoch": 94.17,
759
- "learning_rate": 7.313253012048194e-06,
760
- "loss": 0.7955,
761
- "step": 9700
762
- },
763
- {
764
- "epoch": 95.15,
765
- "learning_rate": 6.108433734939759e-06,
766
- "loss": 0.7961,
767
- "step": 9800
768
- },
769
- {
770
- "epoch": 96.12,
771
- "learning_rate": 4.903614457831326e-06,
772
- "loss": 0.7843,
773
- "step": 9900
774
- },
775
- {
776
- "epoch": 97.09,
777
- "learning_rate": 3.6987951807228917e-06,
778
- "loss": 0.7781,
779
- "step": 10000
780
- },
781
- {
782
- "epoch": 97.09,
783
- "eval_loss": 0.21628263592720032,
784
- "eval_runtime": 133.5255,
785
- "eval_samples_per_second": 20.535,
786
- "eval_steps_per_second": 2.569,
787
- "eval_wer": 0.3240593654972087,
788
- "step": 10000
789
- },
790
- {
791
- "epoch": 98.06,
792
- "learning_rate": 2.493975903614458e-06,
793
- "loss": 0.7842,
794
- "step": 10100
795
- },
796
- {
797
- "epoch": 99.03,
798
- "learning_rate": 1.2891566265060241e-06,
799
- "loss": 0.7821,
800
- "step": 10200
801
- },
802
- {
803
- "epoch": 100.0,
804
- "learning_rate": 9.638554216867469e-08,
805
- "loss": 0.7779,
806
- "step": 10300
807
- },
808
- {
809
- "epoch": 100.0,
810
- "step": 10300,
811
- "total_flos": 5.823193156406256e+19,
812
- "train_loss": 1.3660302423273476,
813
- "train_runtime": 26867.6077,
814
- "train_samples_per_second": 12.253,
815
- "train_steps_per_second": 0.383
816
  }
817
  ],
818
- "max_steps": 10300,
819
  "num_train_epochs": 100,
820
- "total_flos": 5.823193156406256e+19,
821
  "trial_name": null,
822
  "trial_params": null
823
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 99.994708994709,
5
+ "global_step": 9400,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 1.06,
12
  "learning_rate": 4.950000000000001e-06,
13
+ "loss": 13.5577,
14
  "step": 100
15
  },
16
  {
17
+ "epoch": 2.13,
18
  "learning_rate": 9.950000000000001e-06,
19
+ "loss": 5.868,
20
  "step": 200
21
  },
22
  {
23
+ "epoch": 3.19,
24
  "learning_rate": 1.4950000000000001e-05,
25
+ "loss": 4.1355,
26
  "step": 300
27
  },
28
  {
29
+ "epoch": 4.25,
30
  "learning_rate": 1.995e-05,
31
+ "loss": 3.5988,
32
  "step": 400
33
  },
34
  {
35
+ "epoch": 5.32,
36
  "learning_rate": 2.495e-05,
37
+ "loss": 3.3036,
38
  "step": 500
39
  },
40
  {
41
+ "epoch": 5.32,
42
+ "eval_loss": 3.262770652770996,
43
+ "eval_runtime": 136.0729,
44
+ "eval_samples_per_second": 20.166,
45
+ "eval_steps_per_second": 2.521,
46
  "eval_wer": 1.0,
47
  "step": 500
48
  },
49
  {
50
+ "epoch": 6.38,
51
  "learning_rate": 2.995e-05,
52
+ "loss": 3.2243,
53
  "step": 600
54
  },
55
  {
56
+ "epoch": 7.44,
57
  "learning_rate": 3.495e-05,
58
+ "loss": 3.1823,
59
  "step": 700
60
  },
61
  {
62
+ "epoch": 8.51,
63
  "learning_rate": 3.995e-05,
64
+ "loss": 3.1371,
65
  "step": 800
66
  },
67
  {
68
+ "epoch": 9.57,
69
  "learning_rate": 4.495e-05,
70
+ "loss": 3.0942,
71
  "step": 900
72
  },
73
  {
74
+ "epoch": 10.63,
75
  "learning_rate": 4.995e-05,
76
+ "loss": 2.9734,
77
  "step": 1000
78
  },
79
  {
80
+ "epoch": 10.63,
81
+ "eval_loss": 2.5676724910736084,
82
+ "eval_runtime": 135.3086,
83
+ "eval_samples_per_second": 20.28,
84
+ "eval_steps_per_second": 2.535,
85
+ "eval_wer": 0.9980321266761246,
86
  "step": 1000
87
  },
88
  {
89
+ "epoch": 11.7,
90
  "learning_rate": 5.495e-05,
91
+ "loss": 2.2815,
92
  "step": 1100
93
  },
94
  {
95
+ "epoch": 12.76,
96
  "learning_rate": 5.995000000000001e-05,
97
+ "loss": 1.6698,
98
  "step": 1200
99
  },
100
  {
101
+ "epoch": 13.83,
102
  "learning_rate": 6.494999999999999e-05,
103
+ "loss": 1.4895,
104
  "step": 1300
105
  },
106
  {
107
+ "epoch": 14.89,
108
  "learning_rate": 6.995e-05,
109
+ "loss": 1.3959,
110
  "step": 1400
111
  },
112
  {
113
+ "epoch": 15.95,
114
  "learning_rate": 7.495e-05,
115
+ "loss": 1.3466,
116
  "step": 1500
117
  },
118
  {
119
+ "epoch": 15.95,
120
+ "eval_loss": 0.44553351402282715,
121
+ "eval_runtime": 134.1872,
122
+ "eval_samples_per_second": 20.449,
123
+ "eval_steps_per_second": 2.556,
124
+ "eval_wer": 0.6306347535581895,
125
  "step": 1500
126
  },
127
  {
128
+ "epoch": 17.02,
129
  "learning_rate": 7.995e-05,
130
+ "loss": 1.3288,
131
  "step": 1600
132
  },
133
  {
134
+ "epoch": 18.08,
135
  "learning_rate": 8.495e-05,
136
+ "loss": 1.2866,
137
  "step": 1700
138
  },
139
  {
140
+ "epoch": 19.15,
141
  "learning_rate": 8.995e-05,
142
+ "loss": 1.2619,
143
  "step": 1800
144
  },
145
  {
146
+ "epoch": 20.21,
147
  "learning_rate": 9.495e-05,
148
+ "loss": 1.2496,
149
  "step": 1900
150
  },
151
  {
152
+ "epoch": 21.28,
153
+ "learning_rate": 9.99e-05,
154
+ "loss": 1.2424,
155
  "step": 2000
156
  },
157
  {
158
+ "epoch": 21.28,
159
+ "eval_loss": 0.3603059649467468,
160
+ "eval_runtime": 134.7251,
161
+ "eval_samples_per_second": 20.367,
162
+ "eval_steps_per_second": 2.546,
163
+ "eval_wer": 0.5301359205528351,
164
  "step": 2000
165
  },
166
  {
167
+ "epoch": 22.34,
168
+ "learning_rate": 9.867567567567569e-05,
169
+ "loss": 1.2253,
170
  "step": 2100
171
  },
172
  {
173
+ "epoch": 23.4,
174
+ "learning_rate": 9.732432432432433e-05,
175
+ "loss": 1.209,
176
  "step": 2200
177
  },
178
  {
179
+ "epoch": 24.47,
180
+ "learning_rate": 9.597297297297298e-05,
181
+ "loss": 1.1984,
182
  "step": 2300
183
  },
184
  {
185
+ "epoch": 25.53,
186
+ "learning_rate": 9.462162162162162e-05,
187
+ "loss": 1.1735,
188
  "step": 2400
189
  },
190
  {
191
+ "epoch": 26.59,
192
+ "learning_rate": 9.327027027027028e-05,
193
+ "loss": 1.1655,
194
  "step": 2500
195
  },
196
  {
197
+ "epoch": 26.59,
198
+ "eval_loss": 0.3164927661418915,
199
+ "eval_runtime": 135.1028,
200
+ "eval_samples_per_second": 20.31,
201
+ "eval_steps_per_second": 2.539,
202
+ "eval_wer": 0.4739828840785319,
203
  "step": 2500
204
  },
205
  {
206
+ "epoch": 27.66,
207
+ "learning_rate": 9.191891891891893e-05,
208
+ "loss": 1.1529,
209
  "step": 2600
210
  },
211
  {
212
+ "epoch": 28.72,
213
+ "learning_rate": 9.056756756756757e-05,
214
+ "loss": 1.1408,
215
  "step": 2700
216
  },
217
  {
218
+ "epoch": 29.78,
219
+ "learning_rate": 8.921621621621622e-05,
220
+ "loss": 1.1201,
221
  "step": 2800
222
  },
223
  {
224
+ "epoch": 30.85,
225
+ "learning_rate": 8.787837837837838e-05,
226
+ "loss": 1.1108,
227
  "step": 2900
228
  },
229
  {
230
+ "epoch": 31.91,
231
+ "learning_rate": 8.652702702702703e-05,
232
+ "loss": 1.1026,
233
  "step": 3000
234
  },
235
  {
236
+ "epoch": 31.91,
237
+ "eval_loss": 0.2930183410644531,
238
+ "eval_runtime": 135.7738,
239
+ "eval_samples_per_second": 20.21,
240
+ "eval_steps_per_second": 2.526,
241
+ "eval_wer": 0.4400256281177063,
242
  "step": 3000
243
  },
244
  {
245
+ "epoch": 32.97,
246
+ "learning_rate": 8.517567567567568e-05,
247
+ "loss": 1.1035,
248
  "step": 3100
249
  },
250
  {
251
+ "epoch": 34.04,
252
+ "learning_rate": 8.382432432432433e-05,
253
+ "loss": 1.0976,
254
  "step": 3200
255
  },
256
  {
257
+ "epoch": 35.11,
258
+ "learning_rate": 8.247297297297298e-05,
259
+ "loss": 1.0717,
260
  "step": 3300
261
  },
262
  {
263
+ "epoch": 36.17,
264
+ "learning_rate": 8.112162162162162e-05,
265
+ "loss": 1.0778,
266
  "step": 3400
267
  },
268
  {
269
+ "epoch": 37.23,
270
+ "learning_rate": 7.977027027027028e-05,
271
+ "loss": 1.0655,
272
  "step": 3500
273
  },
274
  {
275
+ "epoch": 37.23,
276
+ "eval_loss": 0.26754099130630493,
277
+ "eval_runtime": 134.1895,
278
+ "eval_samples_per_second": 20.449,
279
+ "eval_steps_per_second": 2.556,
280
+ "eval_wer": 0.41590773877625736,
281
  "step": 3500
282
  },
283
  {
284
+ "epoch": 38.3,
285
+ "learning_rate": 7.841891891891892e-05,
286
+ "loss": 1.0588,
287
  "step": 3600
288
  },
289
  {
290
+ "epoch": 39.36,
291
+ "learning_rate": 7.706756756756757e-05,
292
+ "loss": 1.0401,
293
  "step": 3700
294
  },
295
  {
296
+ "epoch": 40.42,
297
+ "learning_rate": 7.571621621621621e-05,
298
+ "loss": 1.0296,
299
  "step": 3800
300
  },
301
  {
302
+ "epoch": 41.49,
303
+ "learning_rate": 7.436486486486487e-05,
304
+ "loss": 1.0218,
305
  "step": 3900
306
  },
307
  {
308
+ "epoch": 42.55,
309
+ "learning_rate": 7.301351351351352e-05,
310
+ "loss": 1.0239,
311
  "step": 4000
312
  },
313
  {
314
+ "epoch": 42.55,
315
+ "eval_loss": 0.25800037384033203,
316
+ "eval_runtime": 134.1224,
317
+ "eval_samples_per_second": 20.459,
318
+ "eval_steps_per_second": 2.557,
319
+ "eval_wer": 0.3912864399798636,
320
  "step": 4000
321
  },
322
  {
323
+ "epoch": 43.61,
324
+ "learning_rate": 7.166216216216216e-05,
325
+ "loss": 1.0226,
326
  "step": 4100
327
  },
328
  {
329
+ "epoch": 44.68,
330
+ "learning_rate": 7.031081081081081e-05,
331
+ "loss": 1.0165,
332
  "step": 4200
333
  },
334
  {
335
+ "epoch": 45.74,
336
+ "learning_rate": 6.895945945945947e-05,
337
+ "loss": 1.0083,
338
  "step": 4300
339
  },
340
  {
341
+ "epoch": 46.8,
342
+ "learning_rate": 6.760810810810811e-05,
343
+ "loss": 0.9981,
344
  "step": 4400
345
  },
346
  {
347
+ "epoch": 47.87,
348
+ "learning_rate": 6.625675675675676e-05,
349
+ "loss": 0.9938,
350
  "step": 4500
351
  },
352
  {
353
+ "epoch": 47.87,
354
+ "eval_loss": 0.23732751607894897,
355
+ "eval_runtime": 135.9562,
356
+ "eval_samples_per_second": 20.183,
357
+ "eval_steps_per_second": 2.523,
358
+ "eval_wer": 0.36977712690494713,
359
  "step": 4500
360
  },
361
  {
362
+ "epoch": 48.93,
363
+ "learning_rate": 6.49054054054054e-05,
364
+ "loss": 0.9717,
365
  "step": 4600
366
  },
367
  {
368
+ "epoch": 49.99,
369
+ "learning_rate": 6.355405405405406e-05,
370
+ "loss": 0.9795,
371
  "step": 4700
372
  },
373
  {
374
+ "epoch": 51.06,
375
+ "learning_rate": 6.22027027027027e-05,
376
+ "loss": 0.9804,
377
  "step": 4800
378
  },
379
  {
380
+ "epoch": 52.13,
381
+ "learning_rate": 6.085135135135135e-05,
382
+ "loss": 0.9655,
383
  "step": 4900
384
  },
385
  {
386
+ "epoch": 53.19,
387
+ "learning_rate": 5.95e-05,
388
+ "loss": 0.9655,
389
  "step": 5000
390
  },
391
  {
392
+ "epoch": 53.19,
393
+ "eval_loss": 0.2379022240638733,
394
+ "eval_runtime": 134.3498,
395
+ "eval_samples_per_second": 20.424,
396
+ "eval_steps_per_second": 2.553,
397
+ "eval_wer": 0.3674889021097433,
398
  "step": 5000
399
  },
400
  {
401
+ "epoch": 54.25,
402
+ "learning_rate": 5.8148648648648655e-05,
403
+ "loss": 0.9616,
404
  "step": 5100
405
  },
406
  {
407
+ "epoch": 55.32,
408
+ "learning_rate": 5.67972972972973e-05,
409
+ "loss": 0.9633,
410
  "step": 5200
411
  },
412
  {
413
+ "epoch": 56.38,
414
+ "learning_rate": 5.544594594594595e-05,
415
+ "loss": 0.94,
416
  "step": 5300
417
  },
418
  {
419
+ "epoch": 57.44,
420
+ "learning_rate": 5.40945945945946e-05,
421
+ "loss": 0.9355,
422
  "step": 5400
423
  },
424
  {
425
+ "epoch": 58.51,
426
+ "learning_rate": 5.274324324324325e-05,
427
+ "loss": 0.9374,
428
  "step": 5500
429
  },
430
  {
431
+ "epoch": 58.51,
432
+ "eval_loss": 0.24859154224395752,
433
+ "eval_runtime": 135.3597,
434
+ "eval_samples_per_second": 20.272,
435
+ "eval_steps_per_second": 2.534,
436
+ "eval_wer": 0.3794792000366116,
437
  "step": 5500
438
  },
439
  {
440
+ "epoch": 59.57,
441
+ "learning_rate": 5.1391891891891894e-05,
442
+ "loss": 0.93,
443
  "step": 5600
444
  },
445
  {
446
+ "epoch": 60.63,
447
+ "learning_rate": 5.0040540540540546e-05,
448
+ "loss": 0.9212,
449
  "step": 5700
450
  },
451
  {
452
+ "epoch": 61.7,
453
+ "learning_rate": 4.868918918918919e-05,
454
+ "loss": 0.9233,
455
  "step": 5800
456
  },
457
  {
458
+ "epoch": 62.76,
459
+ "learning_rate": 4.733783783783784e-05,
460
+ "loss": 0.914,
461
  "step": 5900
462
  },
463
  {
464
+ "epoch": 63.83,
465
+ "learning_rate": 4.598648648648649e-05,
466
+ "loss": 0.9065,
467
  "step": 6000
468
  },
469
  {
470
+ "epoch": 63.83,
471
+ "eval_loss": 0.22428132593631744,
472
+ "eval_runtime": 136.639,
473
+ "eval_samples_per_second": 20.082,
474
+ "eval_steps_per_second": 2.51,
475
+ "eval_wer": 0.3405336140222415,
476
  "step": 6000
477
  },
478
  {
479
+ "epoch": 64.89,
480
+ "learning_rate": 4.463513513513514e-05,
481
+ "loss": 0.9023,
482
  "step": 6100
483
  },
484
  {
485
+ "epoch": 65.95,
486
+ "learning_rate": 4.3283783783783785e-05,
487
+ "loss": 0.9105,
488
  "step": 6200
489
  },
490
  {
491
+ "epoch": 67.02,
492
+ "learning_rate": 4.193243243243244e-05,
493
+ "loss": 0.907,
494
  "step": 6300
495
  },
496
  {
497
+ "epoch": 68.08,
498
+ "learning_rate": 4.058108108108108e-05,
499
+ "loss": 0.8939,
500
  "step": 6400
501
  },
502
  {
503
+ "epoch": 69.15,
504
+ "learning_rate": 3.9229729729729734e-05,
505
+ "loss": 0.888,
506
  "step": 6500
507
  },
508
  {
509
+ "epoch": 69.15,
510
+ "eval_loss": 0.21568605303764343,
511
+ "eval_runtime": 139.7942,
512
+ "eval_samples_per_second": 19.629,
513
+ "eval_steps_per_second": 2.454,
514
+ "eval_wer": 0.3277195551690998,
515
  "step": 6500
516
  },
517
  {
518
+ "epoch": 70.21,
519
+ "learning_rate": 3.78918918918919e-05,
520
+ "loss": 0.8866,
521
  "step": 6600
522
  },
523
  {
524
+ "epoch": 71.28,
525
+ "learning_rate": 3.654054054054054e-05,
526
+ "loss": 0.8881,
527
  "step": 6700
528
  },
529
  {
530
+ "epoch": 72.34,
531
+ "learning_rate": 3.5189189189189195e-05,
532
+ "loss": 0.8849,
533
  "step": 6800
534
  },
535
  {
536
+ "epoch": 73.4,
537
+ "learning_rate": 3.383783783783784e-05,
538
+ "loss": 0.8648,
539
  "step": 6900
540
  },
541
  {
542
+ "epoch": 74.47,
543
+ "learning_rate": 3.2500000000000004e-05,
544
+ "loss": 0.8646,
545
  "step": 7000
546
  },
547
  {
548
+ "epoch": 74.47,
549
+ "eval_loss": 0.21030458807945251,
550
+ "eval_runtime": 135.9762,
551
+ "eval_samples_per_second": 20.18,
552
+ "eval_steps_per_second": 2.522,
553
+ "eval_wer": 0.3287721385748936,
554
  "step": 7000
555
  },
556
  {
557
+ "epoch": 75.53,
558
+ "learning_rate": 3.114864864864865e-05,
559
+ "loss": 0.8767,
560
  "step": 7100
561
  },
562
  {
563
+ "epoch": 76.59,
564
+ "learning_rate": 2.97972972972973e-05,
565
+ "loss": 0.8655,
566
  "step": 7200
567
  },
568
  {
569
+ "epoch": 77.66,
570
+ "learning_rate": 2.8445945945945946e-05,
571
+ "loss": 0.8672,
572
  "step": 7300
573
  },
574
  {
575
+ "epoch": 78.72,
576
+ "learning_rate": 2.7094594594594598e-05,
577
+ "loss": 0.8625,
578
  "step": 7400
579
  },
580
  {
581
+ "epoch": 79.78,
582
+ "learning_rate": 2.5743243243243243e-05,
583
+ "loss": 0.8602,
584
  "step": 7500
585
  },
586
  {
587
+ "epoch": 79.78,
588
+ "eval_loss": 0.20880180597305298,
589
+ "eval_runtime": 135.4639,
590
+ "eval_samples_per_second": 20.256,
591
+ "eval_steps_per_second": 2.532,
592
+ "eval_wer": 0.32378380852134914,
593
  "step": 7500
594
  },
595
  {
596
+ "epoch": 80.85,
597
+ "learning_rate": 2.4391891891891895e-05,
598
+ "loss": 0.8515,
599
  "step": 7600
600
  },
601
  {
602
+ "epoch": 81.91,
603
+ "learning_rate": 2.3040540540540543e-05,
604
+ "loss": 0.8554,
605
  "step": 7700
606
  },
607
  {
608
+ "epoch": 82.97,
609
+ "learning_rate": 2.1689189189189192e-05,
610
+ "loss": 0.8563,
611
  "step": 7800
612
  },
613
  {
614
+ "epoch": 84.04,
615
+ "learning_rate": 2.033783783783784e-05,
616
+ "loss": 0.8655,
617
  "step": 7900
618
  },
619
  {
620
+ "epoch": 85.11,
621
+ "learning_rate": 1.898648648648649e-05,
622
+ "loss": 0.8442,
623
  "step": 8000
624
  },
625
  {
626
+ "epoch": 85.11,
627
+ "eval_loss": 0.2045026570558548,
628
+ "eval_runtime": 138.8412,
629
+ "eval_samples_per_second": 19.764,
630
+ "eval_steps_per_second": 2.47,
631
+ "eval_wer": 0.32657544277149786,
632
  "step": 8000
633
  },
634
  {
635
+ "epoch": 86.17,
636
+ "learning_rate": 1.7635135135135137e-05,
637
+ "loss": 0.8323,
638
  "step": 8100
639
  },
640
  {
641
+ "epoch": 87.23,
642
+ "learning_rate": 1.6283783783783786e-05,
643
+ "loss": 0.8384,
644
  "step": 8200
645
  },
646
  {
647
+ "epoch": 88.3,
648
+ "learning_rate": 1.4932432432432433e-05,
649
+ "loss": 0.8391,
650
  "step": 8300
651
  },
652
  {
653
+ "epoch": 89.36,
654
+ "learning_rate": 1.3581081081081081e-05,
655
+ "loss": 0.8292,
656
  "step": 8400
657
  },
658
  {
659
+ "epoch": 90.42,
660
+ "learning_rate": 1.222972972972973e-05,
661
+ "loss": 0.8335,
662
  "step": 8500
663
  },
664
  {
665
+ "epoch": 90.42,
666
+ "eval_loss": 0.20376762747764587,
667
+ "eval_runtime": 136.8377,
668
+ "eval_samples_per_second": 20.053,
669
+ "eval_steps_per_second": 2.507,
670
+ "eval_wer": 0.3240583954967736,
671
  "step": 8500
672
  },
673
  {
674
+ "epoch": 91.49,
675
+ "learning_rate": 1.0878378378378378e-05,
676
+ "loss": 0.8314,
677
  "step": 8600
678
  },
679
  {
680
+ "epoch": 92.55,
681
+ "learning_rate": 9.527027027027027e-06,
682
+ "loss": 0.8254,
683
  "step": 8700
684
  },
685
  {
686
+ "epoch": 93.61,
687
+ "learning_rate": 8.175675675675675e-06,
688
+ "loss": 0.8231,
689
  "step": 8800
690
  },
691
  {
692
+ "epoch": 94.68,
693
+ "learning_rate": 6.8243243243243244e-06,
694
+ "loss": 0.8164,
695
  "step": 8900
696
  },
697
  {
698
+ "epoch": 95.74,
699
+ "learning_rate": 5.472972972972974e-06,
700
+ "loss": 0.8288,
701
  "step": 9000
702
  },
703
  {
704
+ "epoch": 95.74,
705
+ "eval_loss": 0.20242640376091003,
706
+ "eval_runtime": 135.0305,
707
+ "eval_samples_per_second": 20.321,
708
+ "eval_steps_per_second": 2.54,
709
+ "eval_wer": 0.32799414214452427,
710
  "step": 9000
711
  },
712
  {
713
+ "epoch": 96.8,
714
+ "learning_rate": 4.121621621621622e-06,
715
+ "loss": 0.816,
716
  "step": 9100
717
  },
718
  {
719
+ "epoch": 97.87,
720
+ "learning_rate": 2.7702702702702708e-06,
721
+ "loss": 0.8163,
722
  "step": 9200
723
  },
724
  {
725
+ "epoch": 98.93,
726
+ "learning_rate": 1.418918918918919e-06,
727
+ "loss": 0.8126,
728
  "step": 9300
729
  },
730
  {
731
+ "epoch": 99.99,
732
+ "learning_rate": 6.756756756756757e-08,
733
+ "loss": 0.8084,
734
  "step": 9400
735
  },
736
  {
737
+ "epoch": 99.99,
738
+ "step": 9400,
739
+ "total_flos": 1.0839754269306731e+20,
740
+ "train_loss": 1.3946523244330225,
741
+ "train_runtime": 41836.5581,
742
+ "train_samples_per_second": 14.423,
743
+ "train_steps_per_second": 0.225
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
744
  }
745
  ],
746
+ "max_steps": 9400,
747
  "num_train_epochs": 100,
748
+ "total_flos": 1.0839754269306731e+20,
749
  "trial_name": null,
750
  "trial_params": null
751
  }