|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 10.0, |
|
"global_step": 25780, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00024390399999999997, |
|
"loss": 10.7678, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 3.0923821926116943, |
|
"eval_runtime": 369.7958, |
|
"eval_samples_per_second": 14.681, |
|
"eval_wer": 1.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0004113554106910039, |
|
"loss": 1.4821, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 0.4990185499191284, |
|
"eval_runtime": 398.0715, |
|
"eval_samples_per_second": 13.638, |
|
"eval_wer": 0.6425968693867077, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0004093410691003911, |
|
"loss": 0.4925, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 0.3599244952201843, |
|
"eval_runtime": 395.2724, |
|
"eval_samples_per_second": 13.735, |
|
"eval_wer": 0.522966384398255, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00040732672750977834, |
|
"loss": 0.3926, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 0.27538031339645386, |
|
"eval_runtime": 370.4342, |
|
"eval_samples_per_second": 14.656, |
|
"eval_wer": 0.4267898383371824, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00040531238591916556, |
|
"loss": 0.3404, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_loss": 0.25353798270225525, |
|
"eval_runtime": 396.0187, |
|
"eval_samples_per_second": 13.709, |
|
"eval_wer": 0.3899409802412112, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0004032980443285528, |
|
"loss": 0.3148, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_loss": 0.23486939072608948, |
|
"eval_runtime": 394.9332, |
|
"eval_samples_per_second": 13.747, |
|
"eval_wer": 0.38388503977418525, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.00040128370273794, |
|
"loss": 0.2853, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_loss": 0.20920990407466888, |
|
"eval_runtime": 393.4003, |
|
"eval_samples_per_second": 13.8, |
|
"eval_wer": 0.32840646651270206, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.00039926936114732727, |
|
"loss": 0.2404, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"eval_loss": 0.19739878177642822, |
|
"eval_runtime": 367.4384, |
|
"eval_samples_per_second": 14.775, |
|
"eval_wer": 0.3072106748781114, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.00039725501955671443, |
|
"loss": 0.2325, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_loss": 0.19151932001113892, |
|
"eval_runtime": 398.5555, |
|
"eval_samples_per_second": 13.622, |
|
"eval_wer": 0.3065434949961509, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.00039524067796610165, |
|
"loss": 0.2325, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"eval_loss": 0.18169228732585907, |
|
"eval_runtime": 393.6421, |
|
"eval_samples_per_second": 13.792, |
|
"eval_wer": 0.2896587118296125, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.00039322633637548887, |
|
"loss": 0.2271, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_loss": 0.1825951784849167, |
|
"eval_runtime": 368.1876, |
|
"eval_samples_per_second": 14.745, |
|
"eval_wer": 0.29001796253528356, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.00039121199478487614, |
|
"loss": 0.2263, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_loss": 0.1739877462387085, |
|
"eval_runtime": 397.804, |
|
"eval_samples_per_second": 13.647, |
|
"eval_wer": 0.2704131383115217, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.00038919765319426336, |
|
"loss": 0.2187, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_loss": 0.16721388697624207, |
|
"eval_runtime": 396.6468, |
|
"eval_samples_per_second": 13.687, |
|
"eval_wer": 0.2596869386707724, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 0.0003871833116036506, |
|
"loss": 0.2015, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"eval_loss": 0.1631636619567871, |
|
"eval_runtime": 391.6354, |
|
"eval_samples_per_second": 13.862, |
|
"eval_wer": 0.25517064408519374, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 0.0003851689700130378, |
|
"loss": 0.1792, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"eval_loss": 0.17116306722164154, |
|
"eval_runtime": 368.9065, |
|
"eval_samples_per_second": 14.716, |
|
"eval_wer": 0.26646138054914037, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 0.000383154628422425, |
|
"loss": 0.1773, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"eval_loss": 0.16152945160865784, |
|
"eval_runtime": 398.4133, |
|
"eval_samples_per_second": 13.627, |
|
"eval_wer": 0.24146779574031307, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 0.00038114028683181223, |
|
"loss": 0.179, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"eval_loss": 0.15521417558193207, |
|
"eval_runtime": 396.2358, |
|
"eval_samples_per_second": 13.701, |
|
"eval_wer": 0.24613805491403642, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 0.00037912594524119945, |
|
"loss": 0.1717, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"eval_loss": 0.15384772419929504, |
|
"eval_runtime": 393.2934, |
|
"eval_samples_per_second": 13.804, |
|
"eval_wer": 0.2374133949191686, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 0.0003771116036505867, |
|
"loss": 0.1744, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"eval_loss": 0.14799486100673676, |
|
"eval_runtime": 370.8447, |
|
"eval_samples_per_second": 14.64, |
|
"eval_wer": 0.23828586091865536, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 0.0003750972620599739, |
|
"loss": 0.166, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"eval_loss": 0.1445951759815216, |
|
"eval_runtime": 393.1105, |
|
"eval_samples_per_second": 13.81, |
|
"eval_wer": 0.22992045162945857, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 0.0003730829204693611, |
|
"loss": 0.1532, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"eval_loss": 0.1482468694448471, |
|
"eval_runtime": 395.8029, |
|
"eval_samples_per_second": 13.716, |
|
"eval_wer": 0.22037464716448552, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 0.00037106857887874833, |
|
"loss": 0.142, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"eval_loss": 0.15104271471500397, |
|
"eval_runtime": 369.3329, |
|
"eval_samples_per_second": 14.699, |
|
"eval_wer": 0.228175519630485, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 0.0003690542372881356, |
|
"loss": 0.1406, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"eval_loss": 0.1345045417547226, |
|
"eval_runtime": 401.0837, |
|
"eval_samples_per_second": 13.536, |
|
"eval_wer": 0.20795483705414422, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 0.0003670398956975228, |
|
"loss": 0.1458, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"eval_loss": 0.13937360048294067, |
|
"eval_runtime": 390.779, |
|
"eval_samples_per_second": 13.893, |
|
"eval_wer": 0.2111367718758019, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 0.00036502555410691004, |
|
"loss": 0.1449, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"eval_loss": 0.14066852629184723, |
|
"eval_runtime": 395.9112, |
|
"eval_samples_per_second": 13.713, |
|
"eval_wer": 0.20846805234795998, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 0.00036301121251629726, |
|
"loss": 0.137, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"eval_loss": 0.1394476294517517, |
|
"eval_runtime": 367.4977, |
|
"eval_samples_per_second": 14.773, |
|
"eval_wer": 0.20744162176032846, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 0.0003609968709256845, |
|
"loss": 0.1388, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"eval_loss": 0.1351165473461151, |
|
"eval_runtime": 397.2254, |
|
"eval_samples_per_second": 13.667, |
|
"eval_wer": 0.19964074929432898, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 0.0003589825293350717, |
|
"loss": 0.1307, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"eval_loss": 0.1401081681251526, |
|
"eval_runtime": 396.6542, |
|
"eval_samples_per_second": 13.687, |
|
"eval_wer": 0.20446497305619707, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 0.0003569681877444589, |
|
"loss": 0.122, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"eval_loss": 0.14438064396381378, |
|
"eval_runtime": 389.1513, |
|
"eval_samples_per_second": 13.951, |
|
"eval_wer": 0.20369515011547343, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 0.0003549538461538462, |
|
"loss": 0.1193, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"eval_loss": 0.14555728435516357, |
|
"eval_runtime": 367.57, |
|
"eval_samples_per_second": 14.77, |
|
"eval_wer": 0.21524249422632794, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 0.00035293950456323335, |
|
"loss": 0.1344, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"eval_loss": 0.1305903196334839, |
|
"eval_runtime": 395.5909, |
|
"eval_samples_per_second": 13.724, |
|
"eval_wer": 0.19522709776751349, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 0.00035092516297262057, |
|
"loss": 0.1267, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"eval_loss": 0.1303185522556305, |
|
"eval_runtime": 392.9371, |
|
"eval_samples_per_second": 13.816, |
|
"eval_wer": 0.18963305106492173, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 0.0003489108213820078, |
|
"loss": 0.1214, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"eval_loss": 0.1294514238834381, |
|
"eval_runtime": 369.1477, |
|
"eval_samples_per_second": 14.707, |
|
"eval_wer": 0.19240441365152683, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 0.00034689647979139506, |
|
"loss": 0.1179, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"eval_loss": 0.12323788553476334, |
|
"eval_runtime": 391.972, |
|
"eval_samples_per_second": 13.85, |
|
"eval_wer": 0.19183987682832948, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"learning_rate": 0.0003448821382007823, |
|
"loss": 0.0998, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"eval_loss": 0.12908631563186646, |
|
"eval_runtime": 393.594, |
|
"eval_samples_per_second": 13.793, |
|
"eval_wer": 0.18516807800872467, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 0.0003428677966101695, |
|
"loss": 0.1017, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"eval_loss": 0.1282510757446289, |
|
"eval_runtime": 393.5495, |
|
"eval_samples_per_second": 13.795, |
|
"eval_wer": 0.1841929689504747, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 5.38, |
|
"learning_rate": 0.00034085345501955666, |
|
"loss": 0.1024, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 5.38, |
|
"eval_loss": 0.12217988073825836, |
|
"eval_runtime": 368.9647, |
|
"eval_samples_per_second": 14.714, |
|
"eval_wer": 0.1739286630741596, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 5.53, |
|
"learning_rate": 0.00033883911342894393, |
|
"loss": 0.1042, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 5.53, |
|
"eval_loss": 0.1310078352689743, |
|
"eval_runtime": 394.8352, |
|
"eval_samples_per_second": 13.75, |
|
"eval_wer": 0.18137028483448805, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 5.67, |
|
"learning_rate": 0.00033682477183833115, |
|
"loss": 0.1096, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 5.67, |
|
"eval_loss": 0.12405824661254883, |
|
"eval_runtime": 392.553, |
|
"eval_samples_per_second": 13.83, |
|
"eval_wer": 0.17890685142417245, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 5.82, |
|
"learning_rate": 0.0003348171447196871, |
|
"loss": 0.1091, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 5.82, |
|
"eval_loss": 0.12054669111967087, |
|
"eval_runtime": 396.4385, |
|
"eval_samples_per_second": 13.694, |
|
"eval_wer": 0.17942006671798819, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"learning_rate": 0.0003328028031290743, |
|
"loss": 0.1039, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"eval_loss": 0.11941063404083252, |
|
"eval_runtime": 368.032, |
|
"eval_samples_per_second": 14.751, |
|
"eval_wer": 0.17829099307159355, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 6.11, |
|
"learning_rate": 0.0003307884615384615, |
|
"loss": 0.0963, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 6.11, |
|
"eval_loss": 0.13149844110012054, |
|
"eval_runtime": 395.5282, |
|
"eval_samples_per_second": 13.726, |
|
"eval_wer": 0.17823967154221196, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"learning_rate": 0.00032877411994784874, |
|
"loss": 0.0947, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"eval_loss": 0.11894866079092026, |
|
"eval_runtime": 394.9332, |
|
"eval_samples_per_second": 13.747, |
|
"eval_wer": 0.16925840390043623, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 0.00032675977835723596, |
|
"loss": 0.0946, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"eval_loss": 0.12667880952358246, |
|
"eval_runtime": 365.722, |
|
"eval_samples_per_second": 14.845, |
|
"eval_wer": 0.1762894534257121, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 6.55, |
|
"learning_rate": 0.0003247454367666232, |
|
"loss": 0.0935, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 6.55, |
|
"eval_loss": 0.12555569410324097, |
|
"eval_runtime": 401.4704, |
|
"eval_samples_per_second": 13.523, |
|
"eval_wer": 0.1729022324865281, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 6.69, |
|
"learning_rate": 0.00032273780964797916, |
|
"loss": 0.088, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 6.69, |
|
"eval_loss": 0.12047957628965378, |
|
"eval_runtime": 389.5023, |
|
"eval_samples_per_second": 13.938, |
|
"eval_wer": 0.16546061072619964, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 6.84, |
|
"learning_rate": 0.00032072346805736633, |
|
"loss": 0.0947, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 6.84, |
|
"eval_loss": 0.1205919086933136, |
|
"eval_runtime": 397.9241, |
|
"eval_samples_per_second": 13.643, |
|
"eval_wer": 0.17202976648704132, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 6.98, |
|
"learning_rate": 0.00031870912646675355, |
|
"loss": 0.0897, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 6.98, |
|
"eval_loss": 0.1126471757888794, |
|
"eval_runtime": 364.5953, |
|
"eval_samples_per_second": 14.89, |
|
"eval_wer": 0.16556325378496278, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 7.13, |
|
"learning_rate": 0.00031669478487614077, |
|
"loss": 0.0853, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 7.13, |
|
"eval_loss": 0.1229565218091011, |
|
"eval_runtime": 386.8026, |
|
"eval_samples_per_second": 14.036, |
|
"eval_wer": 0.1657685399024891, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 7.27, |
|
"learning_rate": 0.00031468044328552804, |
|
"loss": 0.0844, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 7.27, |
|
"eval_loss": 0.1236405223608017, |
|
"eval_runtime": 399.2408, |
|
"eval_samples_per_second": 13.598, |
|
"eval_wer": 0.15991788555298947, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 7.42, |
|
"learning_rate": 0.00031266610169491526, |
|
"loss": 0.0836, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 7.42, |
|
"eval_loss": 0.12277714908123016, |
|
"eval_runtime": 395.0189, |
|
"eval_samples_per_second": 13.744, |
|
"eval_wer": 0.17233769566333076, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"learning_rate": 0.00016603860759493668, |
|
"loss": 0.0827, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"eval_loss": 0.11456963419914246, |
|
"eval_runtime": 391.5783, |
|
"eval_samples_per_second": 13.864, |
|
"eval_wer": 0.14801129073646394, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 6.17, |
|
"learning_rate": 0.00016114936708860758, |
|
"loss": 0.0774, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 6.17, |
|
"eval_loss": 0.10517676174640656, |
|
"eval_runtime": 387.996, |
|
"eval_samples_per_second": 13.992, |
|
"eval_wer": 0.14251988709263536, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 6.28, |
|
"learning_rate": 0.00015626012658227848, |
|
"loss": 0.0662, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 6.28, |
|
"eval_loss": 0.10210742056369781, |
|
"eval_runtime": 393.9886, |
|
"eval_samples_per_second": 13.78, |
|
"eval_wer": 0.13456505003849115, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 0.00015138718354430379, |
|
"loss": 0.0709, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"eval_loss": 0.09889358282089233, |
|
"eval_runtime": 373.1692, |
|
"eval_samples_per_second": 14.548, |
|
"eval_wer": 0.13451372850910956, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"learning_rate": 0.00014649794303797466, |
|
"loss": 0.068, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"eval_loss": 0.09926428645849228, |
|
"eval_runtime": 387.0555, |
|
"eval_samples_per_second": 14.026, |
|
"eval_wer": 0.1367718758018989, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 6.63, |
|
"learning_rate": 0.00014160870253164559, |
|
"loss": 0.0641, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 6.63, |
|
"eval_loss": 0.09896103292703629, |
|
"eval_runtime": 394.68, |
|
"eval_samples_per_second": 13.755, |
|
"eval_wer": 0.12948421862971515, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 6.75, |
|
"learning_rate": 0.00013671946202531646, |
|
"loss": 0.0633, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 6.75, |
|
"eval_loss": 0.09418534487485886, |
|
"eval_runtime": 389.6282, |
|
"eval_samples_per_second": 13.934, |
|
"eval_wer": 0.12799589427764949, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 6.87, |
|
"learning_rate": 0.00013183022151898733, |
|
"loss": 0.0662, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 6.87, |
|
"eval_loss": 0.09439662098884583, |
|
"eval_runtime": 385.8863, |
|
"eval_samples_per_second": 14.069, |
|
"eval_wer": 0.12553246086733386, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 6.98, |
|
"learning_rate": 0.00012694098101265823, |
|
"loss": 0.0626, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 6.98, |
|
"eval_loss": 0.09515263140201569, |
|
"eval_runtime": 386.6219, |
|
"eval_samples_per_second": 14.042, |
|
"eval_wer": 0.12199127534000513, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"learning_rate": 0.00012205174050632911, |
|
"loss": 0.0554, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"eval_loss": 0.09619747847318649, |
|
"eval_runtime": 388.8787, |
|
"eval_samples_per_second": 13.961, |
|
"eval_wer": 0.12158070310495253, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"learning_rate": 0.0001171625, |
|
"loss": 0.0532, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"eval_loss": 0.10246041417121887, |
|
"eval_runtime": 392.3678, |
|
"eval_samples_per_second": 13.837, |
|
"eval_wer": 0.12178598922247882, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 7.33, |
|
"learning_rate": 0.00011228955696202532, |
|
"loss": 0.0522, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 7.33, |
|
"eval_loss": 0.09738663583993912, |
|
"eval_runtime": 387.5929, |
|
"eval_samples_per_second": 14.007, |
|
"eval_wer": 0.1191172696946369, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 7.45, |
|
"learning_rate": 0.0001074003164556962, |
|
"loss": 0.0514, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 7.45, |
|
"eval_loss": 0.096110999584198, |
|
"eval_runtime": 385.6979, |
|
"eval_samples_per_second": 14.076, |
|
"eval_wer": 0.12045162945855786, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 7.56, |
|
"learning_rate": 0.00010251107594936708, |
|
"loss": 0.0507, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 7.56, |
|
"eval_loss": 0.09606499969959259, |
|
"eval_runtime": 388.8775, |
|
"eval_samples_per_second": 13.961, |
|
"eval_wer": 0.11778290993071594, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"learning_rate": 9.76381329113924e-05, |
|
"loss": 0.0512, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"eval_loss": 0.0907408595085144, |
|
"eval_runtime": 369.8847, |
|
"eval_samples_per_second": 14.678, |
|
"eval_wer": 0.11311265075699256, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 7.8, |
|
"learning_rate": 9.274889240506329e-05, |
|
"loss": 0.0488, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 7.8, |
|
"eval_loss": 0.08931271731853485, |
|
"eval_runtime": 392.0795, |
|
"eval_samples_per_second": 13.847, |
|
"eval_wer": 0.1107005388760585, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 7.91, |
|
"learning_rate": 8.785965189873417e-05, |
|
"loss": 0.0489, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 7.91, |
|
"eval_loss": 0.08885154128074646, |
|
"eval_runtime": 387.663, |
|
"eval_samples_per_second": 14.004, |
|
"eval_wer": 0.1098793944059533, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"learning_rate": 8.297041139240506e-05, |
|
"loss": 0.0458, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"eval_loss": 0.0913764014840126, |
|
"eval_runtime": 388.0349, |
|
"eval_samples_per_second": 13.991, |
|
"eval_wer": 0.10644085193738774, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"learning_rate": 7.808117088607595e-05, |
|
"loss": 0.0432, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"eval_loss": 0.0885995477437973, |
|
"eval_runtime": 390.3347, |
|
"eval_samples_per_second": 13.909, |
|
"eval_wer": 0.10746728252501925, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 8.26, |
|
"learning_rate": 7.319193037974683e-05, |
|
"loss": 0.0402, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 8.26, |
|
"eval_loss": 0.09285327792167664, |
|
"eval_runtime": 390.0106, |
|
"eval_samples_per_second": 13.92, |
|
"eval_wer": 0.1060816012317167, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 8.38, |
|
"learning_rate": 6.830268987341772e-05, |
|
"loss": 0.043, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 8.38, |
|
"eval_loss": 0.09266022592782974, |
|
"eval_runtime": 390.9718, |
|
"eval_samples_per_second": 13.886, |
|
"eval_wer": 0.10515781370284835, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"learning_rate": 6.34134493670886e-05, |
|
"loss": 0.041, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"eval_loss": 0.0903313010931015, |
|
"eval_runtime": 392.2575, |
|
"eval_samples_per_second": 13.84, |
|
"eval_wer": 0.10325891711573004, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 8.61, |
|
"learning_rate": 5.8524208860759495e-05, |
|
"loss": 0.0387, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 8.61, |
|
"eval_loss": 0.09009859710931778, |
|
"eval_runtime": 381.4418, |
|
"eval_samples_per_second": 14.233, |
|
"eval_wer": 0.10212984346933539, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 8.73, |
|
"learning_rate": 5.365126582278481e-05, |
|
"loss": 0.0381, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 8.73, |
|
"eval_loss": 0.0909804031252861, |
|
"eval_runtime": 387.8645, |
|
"eval_samples_per_second": 13.997, |
|
"eval_wer": 0.10028226841159867, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 8.84, |
|
"learning_rate": 4.87620253164557e-05, |
|
"loss": 0.0368, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 8.84, |
|
"eval_loss": 0.08640129864215851, |
|
"eval_runtime": 389.071, |
|
"eval_samples_per_second": 13.954, |
|
"eval_wer": 0.09899923017705928, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"learning_rate": 4.387278481012658e-05, |
|
"loss": 0.0393, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"eval_loss": 0.08331574499607086, |
|
"eval_runtime": 370.5127, |
|
"eval_samples_per_second": 14.653, |
|
"eval_wer": 0.0987426225301514, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"learning_rate": 3.898354430379747e-05, |
|
"loss": 0.0384, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"eval_loss": 0.08789246529340744, |
|
"eval_runtime": 384.7677, |
|
"eval_samples_per_second": 14.11, |
|
"eval_wer": 0.09905055170644085, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 9.19, |
|
"learning_rate": 3.409430379746835e-05, |
|
"loss": 0.0335, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 9.19, |
|
"eval_loss": 0.0857023224234581, |
|
"eval_runtime": 380.952, |
|
"eval_samples_per_second": 14.251, |
|
"eval_wer": 0.09612522453169105, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 9.31, |
|
"learning_rate": 2.9205063291139242e-05, |
|
"loss": 0.0356, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 9.31, |
|
"eval_loss": 0.08656106889247894, |
|
"eval_runtime": 388.804, |
|
"eval_samples_per_second": 13.963, |
|
"eval_wer": 0.0953554015909674, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"learning_rate": 2.4315822784810126e-05, |
|
"loss": 0.0354, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"eval_loss": 0.08513284474611282, |
|
"eval_runtime": 388.1691, |
|
"eval_samples_per_second": 13.986, |
|
"eval_wer": 0.09597125994354631, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 9.54, |
|
"learning_rate": 1.9426582278481013e-05, |
|
"loss": 0.0311, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 9.54, |
|
"eval_loss": 0.08604401350021362, |
|
"eval_runtime": 387.2711, |
|
"eval_samples_per_second": 14.019, |
|
"eval_wer": 0.09402104182704644, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 9.66, |
|
"learning_rate": 1.4537341772151898e-05, |
|
"loss": 0.0328, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 9.66, |
|
"eval_loss": 0.08348560333251953, |
|
"eval_runtime": 387.9108, |
|
"eval_samples_per_second": 13.995, |
|
"eval_wer": 0.09412368488580959, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 9.78, |
|
"learning_rate": 9.648101265822784e-06, |
|
"loss": 0.0284, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 9.78, |
|
"eval_loss": 0.0844394788146019, |
|
"eval_runtime": 389.4719, |
|
"eval_samples_per_second": 13.939, |
|
"eval_wer": 0.09319989735694123, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 9.89, |
|
"learning_rate": 4.758860759493671e-06, |
|
"loss": 0.0305, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 9.89, |
|
"eval_loss": 0.08396408706903458, |
|
"eval_runtime": 389.8127, |
|
"eval_samples_per_second": 13.927, |
|
"eval_wer": 0.09196818065178343, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 25780, |
|
"total_flos": 1.6778924993531563e+20, |
|
"train_runtime": 66940.1986, |
|
"train_samples_per_second": 0.385 |
|
} |
|
], |
|
"max_steps": 25780, |
|
"num_train_epochs": 10, |
|
"total_flos": 1.6778924993531563e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|