|
{ |
|
"best_metric": 0.09384384384384384, |
|
"best_model_checkpoint": "fine-w2v2base-bs16-ep100-lr2e-05-linguistic-rmsnorm-focal_ctc_a0.5_g0.5-0.05_10_0.004_40/checkpoint-2550", |
|
"epoch": 100.0, |
|
"eval_steps": 50, |
|
"global_step": 5300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 7.547169811320755e-07, |
|
"loss": 1068.6505, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_loss": 527.9987182617188, |
|
"eval_runtime": 3.3391, |
|
"eval_samples_per_second": 214.726, |
|
"eval_steps_per_second": 6.888, |
|
"eval_wer": 15.883967300633968, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 2.6037735849056606e-06, |
|
"loss": 924.9194, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_loss": 365.2997741699219, |
|
"eval_runtime": 2.3955, |
|
"eval_samples_per_second": 299.317, |
|
"eval_steps_per_second": 9.602, |
|
"eval_wer": 15.72897897897898, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 4.49056603773585e-06, |
|
"loss": 268.1159, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"eval_loss": 45.30757141113281, |
|
"eval_runtime": 2.3434, |
|
"eval_samples_per_second": 305.972, |
|
"eval_steps_per_second": 9.815, |
|
"eval_wer": 1.0, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 6.377358490566038e-06, |
|
"loss": 56.3914, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"eval_loss": 42.26533508300781, |
|
"eval_runtime": 2.4101, |
|
"eval_samples_per_second": 297.501, |
|
"eval_steps_per_second": 9.543, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 8.264150943396228e-06, |
|
"loss": 54.5992, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"eval_loss": 41.22721481323242, |
|
"eval_runtime": 2.3201, |
|
"eval_samples_per_second": 309.032, |
|
"eval_steps_per_second": 9.913, |
|
"eval_wer": 1.0, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"learning_rate": 1.0150943396226416e-05, |
|
"loss": 52.7823, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"eval_loss": 40.21255111694336, |
|
"eval_runtime": 2.2946, |
|
"eval_samples_per_second": 312.473, |
|
"eval_steps_per_second": 10.024, |
|
"eval_wer": 1.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 1.2037735849056605e-05, |
|
"loss": 51.1032, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"eval_loss": 39.62540817260742, |
|
"eval_runtime": 2.3293, |
|
"eval_samples_per_second": 307.813, |
|
"eval_steps_per_second": 9.874, |
|
"eval_wer": 1.0, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"learning_rate": 1.3924528301886793e-05, |
|
"loss": 49.2081, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"eval_loss": 38.79893493652344, |
|
"eval_runtime": 2.3075, |
|
"eval_samples_per_second": 310.729, |
|
"eval_steps_per_second": 9.968, |
|
"eval_wer": 1.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"learning_rate": 1.5811320754716985e-05, |
|
"loss": 48.3538, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"eval_loss": 38.57915115356445, |
|
"eval_runtime": 2.3577, |
|
"eval_samples_per_second": 304.109, |
|
"eval_steps_per_second": 9.755, |
|
"eval_wer": 1.0, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"learning_rate": 1.7698113207547173e-05, |
|
"loss": 48.8615, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"eval_loss": 38.462188720703125, |
|
"eval_runtime": 2.3275, |
|
"eval_samples_per_second": 308.057, |
|
"eval_steps_per_second": 9.882, |
|
"eval_wer": 1.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 10.38, |
|
"learning_rate": 1.9584905660377362e-05, |
|
"loss": 48.1912, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 10.38, |
|
"eval_loss": 38.14221954345703, |
|
"eval_runtime": 2.241, |
|
"eval_samples_per_second": 319.943, |
|
"eval_steps_per_second": 10.263, |
|
"eval_wer": 1.0, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 11.32, |
|
"learning_rate": 1.9996701334124693e-05, |
|
"loss": 48.3589, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 11.32, |
|
"eval_loss": 38.614410400390625, |
|
"eval_runtime": 2.3533, |
|
"eval_samples_per_second": 304.679, |
|
"eval_steps_per_second": 9.774, |
|
"eval_wer": 1.0, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 12.26, |
|
"learning_rate": 1.9982825320106917e-05, |
|
"loss": 46.5985, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 12.26, |
|
"eval_loss": 39.63941192626953, |
|
"eval_runtime": 2.2466, |
|
"eval_samples_per_second": 319.155, |
|
"eval_steps_per_second": 10.238, |
|
"eval_wer": 1.0472972972972974, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 13.21, |
|
"learning_rate": 1.995812457240187e-05, |
|
"loss": 45.5769, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 13.21, |
|
"eval_loss": 37.75796127319336, |
|
"eval_runtime": 2.3563, |
|
"eval_samples_per_second": 304.287, |
|
"eval_steps_per_second": 9.761, |
|
"eval_wer": 0.9992492492492493, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 14.15, |
|
"learning_rate": 1.9922625874911624e-05, |
|
"loss": 44.1749, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 14.15, |
|
"eval_loss": 36.06916427612305, |
|
"eval_runtime": 2.294, |
|
"eval_samples_per_second": 312.554, |
|
"eval_steps_per_second": 10.026, |
|
"eval_wer": 0.9990824157490824, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 15.09, |
|
"learning_rate": 1.987636772014047e-05, |
|
"loss": 41.8932, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 15.09, |
|
"eval_loss": 27.94037437438965, |
|
"eval_runtime": 2.4164, |
|
"eval_samples_per_second": 296.718, |
|
"eval_steps_per_second": 9.518, |
|
"eval_wer": 0.9315982649315983, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 16.04, |
|
"learning_rate": 1.981940026745616e-05, |
|
"loss": 29.8551, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 16.04, |
|
"eval_loss": 14.121084213256836, |
|
"eval_runtime": 2.3404, |
|
"eval_samples_per_second": 306.353, |
|
"eval_steps_per_second": 9.827, |
|
"eval_wer": 0.3929763096429763, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 16.98, |
|
"learning_rate": 1.9751785288700255e-05, |
|
"loss": 16.9135, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 16.98, |
|
"eval_loss": 7.982431411743164, |
|
"eval_runtime": 2.3673, |
|
"eval_samples_per_second": 302.882, |
|
"eval_steps_per_second": 9.716, |
|
"eval_wer": 0.22280613947280614, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 17.92, |
|
"learning_rate": 1.9673596101206766e-05, |
|
"loss": 11.5569, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 17.92, |
|
"eval_loss": 5.807270050048828, |
|
"eval_runtime": 2.351, |
|
"eval_samples_per_second": 304.982, |
|
"eval_steps_per_second": 9.783, |
|
"eval_wer": 0.16925258591925257, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 18.87, |
|
"learning_rate": 1.9584917488301524e-05, |
|
"loss": 9.1965, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 18.87, |
|
"eval_loss": 4.689126491546631, |
|
"eval_runtime": 2.2997, |
|
"eval_samples_per_second": 311.783, |
|
"eval_steps_per_second": 10.001, |
|
"eval_wer": 0.15765765765765766, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 19.81, |
|
"learning_rate": 1.9485845607368606e-05, |
|
"loss": 7.6846, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 19.81, |
|
"eval_loss": 4.0937886238098145, |
|
"eval_runtime": 2.2421, |
|
"eval_samples_per_second": 319.793, |
|
"eval_steps_per_second": 10.258, |
|
"eval_wer": 0.1443943943943944, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 20.75, |
|
"learning_rate": 1.937648788558344e-05, |
|
"loss": 6.6186, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 20.75, |
|
"eval_loss": 3.7074310779571533, |
|
"eval_runtime": 2.2452, |
|
"eval_samples_per_second": 319.355, |
|
"eval_steps_per_second": 10.244, |
|
"eval_wer": 0.13371705038371706, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 21.7, |
|
"learning_rate": 1.925696290342571e-05, |
|
"loss": 6.1733, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 21.7, |
|
"eval_loss": 3.376906156539917, |
|
"eval_runtime": 2.3462, |
|
"eval_samples_per_second": 305.602, |
|
"eval_steps_per_second": 9.803, |
|
"eval_wer": 0.12787787787787788, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 22.64, |
|
"learning_rate": 1.912740026609828e-05, |
|
"loss": 5.5833, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 22.64, |
|
"eval_loss": 3.193333864212036, |
|
"eval_runtime": 2.4175, |
|
"eval_samples_per_second": 296.587, |
|
"eval_steps_per_second": 9.514, |
|
"eval_wer": 0.12879546212879547, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 23.58, |
|
"learning_rate": 1.8987940462991673e-05, |
|
"loss": 5.1097, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 23.58, |
|
"eval_loss": 3.078519582748413, |
|
"eval_runtime": 2.3455, |
|
"eval_samples_per_second": 305.697, |
|
"eval_steps_per_second": 9.806, |
|
"eval_wer": 0.12320653987320654, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 24.53, |
|
"learning_rate": 1.8838734715346398e-05, |
|
"loss": 4.8098, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 24.53, |
|
"eval_loss": 3.068678140640259, |
|
"eval_runtime": 2.2608, |
|
"eval_samples_per_second": 317.143, |
|
"eval_steps_per_second": 10.173, |
|
"eval_wer": 0.12103770437103771, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 25.47, |
|
"learning_rate": 1.867994481227837e-05, |
|
"loss": 4.784, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 25.47, |
|
"eval_loss": 2.777139186859131, |
|
"eval_runtime": 2.4013, |
|
"eval_samples_per_second": 298.593, |
|
"eval_steps_per_second": 9.578, |
|
"eval_wer": 0.11519853186519853, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 26.42, |
|
"learning_rate": 1.8511742935345198e-05, |
|
"loss": 4.3574, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 26.42, |
|
"eval_loss": 2.7346842288970947, |
|
"eval_runtime": 2.2397, |
|
"eval_samples_per_second": 320.13, |
|
"eval_steps_per_second": 10.269, |
|
"eval_wer": 0.1200367033700367, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 27.36, |
|
"learning_rate": 1.8334311471843574e-05, |
|
"loss": 4.2972, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 27.36, |
|
"eval_loss": 2.685316324234009, |
|
"eval_runtime": 2.3028, |
|
"eval_samples_per_second": 311.361, |
|
"eval_steps_per_second": 9.988, |
|
"eval_wer": 0.11469803136469803, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 28.3, |
|
"learning_rate": 1.814784281704023e-05, |
|
"loss": 4.1072, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 28.3, |
|
"eval_loss": 2.5679965019226074, |
|
"eval_runtime": 2.294, |
|
"eval_samples_per_second": 312.557, |
|
"eval_steps_per_second": 10.026, |
|
"eval_wer": 0.11845178511845178, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 29.25, |
|
"learning_rate": 1.7952539165550863e-05, |
|
"loss": 3.9651, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 29.25, |
|
"eval_loss": 2.5938329696655273, |
|
"eval_runtime": 2.4099, |
|
"eval_samples_per_second": 297.522, |
|
"eval_steps_per_second": 9.544, |
|
"eval_wer": 0.1200367033700367, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 30.19, |
|
"learning_rate": 1.7748612292093336e-05, |
|
"loss": 4.0325, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 30.19, |
|
"eval_loss": 2.5324084758758545, |
|
"eval_runtime": 2.2799, |
|
"eval_samples_per_second": 314.482, |
|
"eval_steps_per_second": 10.088, |
|
"eval_wer": 0.1180347013680347, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 31.13, |
|
"learning_rate": 1.753628332185275e-05, |
|
"loss": 3.6586, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 31.13, |
|
"eval_loss": 2.584696054458618, |
|
"eval_runtime": 2.4425, |
|
"eval_samples_per_second": 293.556, |
|
"eval_steps_per_second": 9.417, |
|
"eval_wer": 0.11127794461127795, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 32.08, |
|
"learning_rate": 1.731578249070756e-05, |
|
"loss": 3.7213, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 32.08, |
|
"eval_loss": 2.5886008739471436, |
|
"eval_runtime": 2.2464, |
|
"eval_samples_per_second": 319.179, |
|
"eval_steps_per_second": 10.239, |
|
"eval_wer": 0.11161161161161161, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 33.02, |
|
"learning_rate": 1.7087348895576564e-05, |
|
"loss": 3.4746, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 33.02, |
|
"eval_loss": 2.428494453430176, |
|
"eval_runtime": 2.4929, |
|
"eval_samples_per_second": 287.612, |
|
"eval_steps_per_second": 9.226, |
|
"eval_wer": 0.10051718385051718, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 33.96, |
|
"learning_rate": 1.68512302351576e-05, |
|
"loss": 3.3572, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 33.96, |
|
"eval_loss": 2.4606618881225586, |
|
"eval_runtime": 2.2482, |
|
"eval_samples_per_second": 318.92, |
|
"eval_steps_per_second": 10.23, |
|
"eval_wer": 0.10727394060727394, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 34.91, |
|
"learning_rate": 1.6607682541338998e-05, |
|
"loss": 3.2202, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 34.91, |
|
"eval_loss": 2.4459075927734375, |
|
"eval_runtime": 2.406, |
|
"eval_samples_per_second": 298.009, |
|
"eval_steps_per_second": 9.56, |
|
"eval_wer": 0.11027694361027694, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 35.85, |
|
"learning_rate": 1.6356969901575094e-05, |
|
"loss": 3.2437, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 35.85, |
|
"eval_loss": 2.3630285263061523, |
|
"eval_runtime": 2.2948, |
|
"eval_samples_per_second": 312.446, |
|
"eval_steps_per_second": 10.023, |
|
"eval_wer": 0.10268601935268602, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 36.79, |
|
"learning_rate": 1.6099364172526732e-05, |
|
"loss": 3.1303, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 36.79, |
|
"eval_loss": 2.32814884185791, |
|
"eval_runtime": 2.3311, |
|
"eval_samples_per_second": 307.582, |
|
"eval_steps_per_second": 9.867, |
|
"eval_wer": 0.10251918585251918, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 37.74, |
|
"learning_rate": 1.583514468527744e-05, |
|
"loss": 3.0037, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 37.74, |
|
"eval_loss": 2.3129348754882812, |
|
"eval_runtime": 2.4145, |
|
"eval_samples_per_second": 296.962, |
|
"eval_steps_per_second": 9.526, |
|
"eval_wer": 0.10185185185185185, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 38.68, |
|
"learning_rate": 1.5564597942444743e-05, |
|
"loss": 3.0523, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 38.68, |
|
"eval_loss": 2.2962000370025635, |
|
"eval_runtime": 2.4793, |
|
"eval_samples_per_second": 289.191, |
|
"eval_steps_per_second": 9.277, |
|
"eval_wer": 0.09884884884884886, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 39.62, |
|
"learning_rate": 1.5288017307515142e-05, |
|
"loss": 2.8943, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 39.62, |
|
"eval_loss": 2.323789119720459, |
|
"eval_runtime": 2.2485, |
|
"eval_samples_per_second": 318.884, |
|
"eval_steps_per_second": 10.229, |
|
"eval_wer": 0.1021021021021021, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 40.57, |
|
"learning_rate": 1.500570268673965e-05, |
|
"loss": 2.8502, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 40.57, |
|
"eval_loss": 2.3549041748046875, |
|
"eval_runtime": 2.4526, |
|
"eval_samples_per_second": 292.339, |
|
"eval_steps_per_second": 9.378, |
|
"eval_wer": 0.10435435435435435, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 41.51, |
|
"learning_rate": 1.4717960203934704e-05, |
|
"loss": 2.7045, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 41.51, |
|
"eval_loss": 2.367952346801758, |
|
"eval_runtime": 2.2393, |
|
"eval_samples_per_second": 320.195, |
|
"eval_steps_per_second": 10.271, |
|
"eval_wer": 0.10176843510176843, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 42.45, |
|
"learning_rate": 1.4425101868541228e-05, |
|
"loss": 2.7291, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 42.45, |
|
"eval_loss": 2.4171645641326904, |
|
"eval_runtime": 2.2967, |
|
"eval_samples_per_second": 312.191, |
|
"eval_steps_per_second": 10.015, |
|
"eval_wer": 0.11286286286286286, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 43.4, |
|
"learning_rate": 1.412744523730163e-05, |
|
"loss": 2.6162, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 43.4, |
|
"eval_loss": 2.321629524230957, |
|
"eval_runtime": 2.3019, |
|
"eval_samples_per_second": 311.476, |
|
"eval_steps_per_second": 9.992, |
|
"eval_wer": 0.10176843510176843, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 44.34, |
|
"learning_rate": 1.3825313069921713e-05, |
|
"loss": 2.5643, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 44.34, |
|
"eval_loss": 2.2663228511810303, |
|
"eval_runtime": 2.4493, |
|
"eval_samples_per_second": 292.741, |
|
"eval_steps_per_second": 9.391, |
|
"eval_wer": 0.09793126459793126, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 45.28, |
|
"learning_rate": 1.3519032979090816e-05, |
|
"loss": 2.5842, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 45.28, |
|
"eval_loss": 2.240830898284912, |
|
"eval_runtime": 2.2616, |
|
"eval_samples_per_second": 317.033, |
|
"eval_steps_per_second": 10.17, |
|
"eval_wer": 0.0985985985985986, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 46.23, |
|
"learning_rate": 1.3208937075239663e-05, |
|
"loss": 2.4498, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 46.23, |
|
"eval_loss": 2.2695207595825195, |
|
"eval_runtime": 2.3803, |
|
"eval_samples_per_second": 301.222, |
|
"eval_steps_per_second": 9.663, |
|
"eval_wer": 0.10168501835168502, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 47.17, |
|
"learning_rate": 1.289536160642119e-05, |
|
"loss": 2.4177, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 47.17, |
|
"eval_loss": 2.202859878540039, |
|
"eval_runtime": 2.2399, |
|
"eval_samples_per_second": 320.105, |
|
"eval_steps_per_second": 10.268, |
|
"eval_wer": 0.09801468134801468, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 48.11, |
|
"learning_rate": 1.2578646593704786e-05, |
|
"loss": 2.3297, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 48.11, |
|
"eval_loss": 2.225446939468384, |
|
"eval_runtime": 2.3113, |
|
"eval_samples_per_second": 310.217, |
|
"eval_steps_per_second": 9.951, |
|
"eval_wer": 0.09384384384384384, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 49.06, |
|
"learning_rate": 1.2259135462479306e-05, |
|
"loss": 2.3637, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 49.06, |
|
"eval_loss": 2.2550511360168457, |
|
"eval_runtime": 2.3163, |
|
"eval_samples_per_second": 309.542, |
|
"eval_steps_per_second": 9.93, |
|
"eval_wer": 0.1011011011011011, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 1.1937174670064665e-05, |
|
"loss": 2.2528, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_loss": 2.235013723373413, |
|
"eval_runtime": 2.297, |
|
"eval_samples_per_second": 312.152, |
|
"eval_steps_per_second": 10.013, |
|
"eval_wer": 0.10118451785118451, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 50.94, |
|
"learning_rate": 1.1613113330035816e-05, |
|
"loss": 2.2221, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 50.94, |
|
"eval_loss": 2.2253198623657227, |
|
"eval_runtime": 2.4122, |
|
"eval_samples_per_second": 297.24, |
|
"eval_steps_per_second": 9.535, |
|
"eval_wer": 0.09676343009676343, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 51.89, |
|
"learning_rate": 1.1287302833666442e-05, |
|
"loss": 2.3083, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 51.89, |
|
"eval_loss": 2.242628335952759, |
|
"eval_runtime": 2.4199, |
|
"eval_samples_per_second": 296.292, |
|
"eval_steps_per_second": 9.504, |
|
"eval_wer": 0.09584584584584585, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 52.83, |
|
"learning_rate": 1.0960096468902856e-05, |
|
"loss": 2.0585, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 52.83, |
|
"eval_loss": 2.216921806335449, |
|
"eval_runtime": 2.2934, |
|
"eval_samples_per_second": 312.636, |
|
"eval_steps_per_second": 10.029, |
|
"eval_wer": 0.09718051384718052, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 53.77, |
|
"learning_rate": 1.0631849037281267e-05, |
|
"loss": 2.2349, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 53.77, |
|
"eval_loss": 2.215132236480713, |
|
"eval_runtime": 2.3656, |
|
"eval_samples_per_second": 303.097, |
|
"eval_steps_per_second": 9.723, |
|
"eval_wer": 0.10035035035035035, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 54.72, |
|
"learning_rate": 1.0302916469203831e-05, |
|
"loss": 2.1969, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 54.72, |
|
"eval_loss": 2.2561628818511963, |
|
"eval_runtime": 2.2347, |
|
"eval_samples_per_second": 320.851, |
|
"eval_steps_per_second": 10.292, |
|
"eval_wer": 0.10235235235235235, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 55.66, |
|
"learning_rate": 9.973655437990618e-06, |
|
"loss": 2.0415, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 55.66, |
|
"eval_loss": 2.2862448692321777, |
|
"eval_runtime": 2.3242, |
|
"eval_samples_per_second": 308.491, |
|
"eval_steps_per_second": 9.896, |
|
"eval_wer": 0.10268601935268602, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 56.6, |
|
"learning_rate": 9.644422973125977e-06, |
|
"loss": 2.0126, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 56.6, |
|
"eval_loss": 2.2166805267333984, |
|
"eval_runtime": 2.2438, |
|
"eval_samples_per_second": 319.545, |
|
"eval_steps_per_second": 10.25, |
|
"eval_wer": 0.10151818485151819, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 57.55, |
|
"learning_rate": 9.31557607311876e-06, |
|
"loss": 2.1, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 57.55, |
|
"eval_loss": 2.2359628677368164, |
|
"eval_runtime": 2.3078, |
|
"eval_samples_per_second": 310.684, |
|
"eval_steps_per_second": 9.966, |
|
"eval_wer": 0.10243576910243576, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 58.49, |
|
"learning_rate": 8.987471318396079e-06, |
|
"loss": 2.0739, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 58.49, |
|
"eval_loss": 2.219820976257324, |
|
"eval_runtime": 2.2533, |
|
"eval_samples_per_second": 318.193, |
|
"eval_steps_per_second": 10.207, |
|
"eval_wer": 0.10560560560560561, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 59.43, |
|
"learning_rate": 8.660464484650442e-06, |
|
"loss": 1.9875, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 59.43, |
|
"eval_loss": 2.1715681552886963, |
|
"eval_runtime": 2.3021, |
|
"eval_samples_per_second": 311.449, |
|
"eval_steps_per_second": 9.991, |
|
"eval_wer": 0.09868201534868201, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 60.38, |
|
"learning_rate": 8.334910157059459e-06, |
|
"loss": 2.0259, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 60.38, |
|
"eval_loss": 2.214280128479004, |
|
"eval_runtime": 2.2789, |
|
"eval_samples_per_second": 314.622, |
|
"eval_steps_per_second": 10.092, |
|
"eval_wer": 0.09993326659993326, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 61.32, |
|
"learning_rate": 8.011161345796495e-06, |
|
"loss": 1.8519, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 61.32, |
|
"eval_loss": 2.1836636066436768, |
|
"eval_runtime": 2.345, |
|
"eval_samples_per_second": 305.758, |
|
"eval_steps_per_second": 9.808, |
|
"eval_wer": 0.09584584584584585, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 62.26, |
|
"learning_rate": 7.689569103249139e-06, |
|
"loss": 1.9733, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 62.26, |
|
"eval_loss": 2.1865181922912598, |
|
"eval_runtime": 2.3454, |
|
"eval_samples_per_second": 305.699, |
|
"eval_steps_per_second": 9.806, |
|
"eval_wer": 0.10076743410076744, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 63.21, |
|
"learning_rate": 7.3704821433605685e-06, |
|
"loss": 1.8496, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 63.21, |
|
"eval_loss": 2.2044992446899414, |
|
"eval_runtime": 2.4785, |
|
"eval_samples_per_second": 289.29, |
|
"eval_steps_per_second": 9.28, |
|
"eval_wer": 0.10535535535535535, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 64.15, |
|
"learning_rate": 7.054246463506596e-06, |
|
"loss": 1.9354, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 64.15, |
|
"eval_loss": 2.1783363819122314, |
|
"eval_runtime": 2.2431, |
|
"eval_samples_per_second": 319.643, |
|
"eval_steps_per_second": 10.254, |
|
"eval_wer": 0.10018351685018352, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 65.09, |
|
"learning_rate": 6.741204969318343e-06, |
|
"loss": 1.8247, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 65.09, |
|
"eval_loss": 2.166977882385254, |
|
"eval_runtime": 2.2716, |
|
"eval_samples_per_second": 315.638, |
|
"eval_steps_per_second": 10.125, |
|
"eval_wer": 0.09893226559893227, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 66.04, |
|
"learning_rate": 6.43169710285745e-06, |
|
"loss": 1.8418, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 66.04, |
|
"eval_loss": 2.182258129119873, |
|
"eval_runtime": 2.4022, |
|
"eval_samples_per_second": 298.482, |
|
"eval_steps_per_second": 9.575, |
|
"eval_wer": 0.09926593259926593, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 66.98, |
|
"learning_rate": 6.126058474546936e-06, |
|
"loss": 1.8259, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 66.98, |
|
"eval_loss": 2.1875205039978027, |
|
"eval_runtime": 2.3977, |
|
"eval_samples_per_second": 299.039, |
|
"eval_steps_per_second": 9.593, |
|
"eval_wer": 0.09901568234901569, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 67.92, |
|
"learning_rate": 5.82462049925683e-06, |
|
"loss": 1.8458, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 67.92, |
|
"eval_loss": 2.2048122882843018, |
|
"eval_runtime": 2.2439, |
|
"eval_samples_per_second": 319.528, |
|
"eval_steps_per_second": 10.25, |
|
"eval_wer": 0.10001668335001668, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 68.87, |
|
"learning_rate": 5.527710036939207e-06, |
|
"loss": 1.7796, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 68.87, |
|
"eval_loss": 2.201897382736206, |
|
"eval_runtime": 2.3586, |
|
"eval_samples_per_second": 303.996, |
|
"eval_steps_per_second": 9.752, |
|
"eval_wer": 0.09751418084751418, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 69.81, |
|
"learning_rate": 5.235649038202294e-06, |
|
"loss": 1.7931, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 69.81, |
|
"eval_loss": 2.1672749519348145, |
|
"eval_runtime": 2.3721, |
|
"eval_samples_per_second": 302.258, |
|
"eval_steps_per_second": 9.696, |
|
"eval_wer": 0.09551217884551218, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 70.75, |
|
"learning_rate": 4.948754195207908e-06, |
|
"loss": 1.789, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 70.75, |
|
"eval_loss": 2.1924376487731934, |
|
"eval_runtime": 2.242, |
|
"eval_samples_per_second": 319.808, |
|
"eval_steps_per_second": 10.259, |
|
"eval_wer": 0.09851518184851518, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 71.7, |
|
"learning_rate": 4.6673365982708805e-06, |
|
"loss": 1.8166, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 71.7, |
|
"eval_loss": 2.183880567550659, |
|
"eval_runtime": 2.3389, |
|
"eval_samples_per_second": 306.558, |
|
"eval_steps_per_second": 9.834, |
|
"eval_wer": 0.09642976309642977, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 72.64, |
|
"learning_rate": 4.3917013985327075e-06, |
|
"loss": 1.692, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 72.64, |
|
"eval_loss": 2.1770949363708496, |
|
"eval_runtime": 2.2882, |
|
"eval_samples_per_second": 313.348, |
|
"eval_steps_per_second": 10.052, |
|
"eval_wer": 0.09501167834501167, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 73.58, |
|
"learning_rate": 4.12214747707527e-06, |
|
"loss": 1.6898, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 73.58, |
|
"eval_loss": 2.162118911743164, |
|
"eval_runtime": 2.4655, |
|
"eval_samples_per_second": 290.811, |
|
"eval_steps_per_second": 9.329, |
|
"eval_wer": 0.09442776109442776, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 74.53, |
|
"learning_rate": 3.8589671208334414e-06, |
|
"loss": 1.5916, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 74.53, |
|
"eval_loss": 2.171839714050293, |
|
"eval_runtime": 2.2934, |
|
"eval_samples_per_second": 312.641, |
|
"eval_steps_per_second": 10.029, |
|
"eval_wer": 0.09734734734734735, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 75.47, |
|
"learning_rate": 3.6024457056579186e-06, |
|
"loss": 1.7778, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 75.47, |
|
"eval_loss": 2.1617255210876465, |
|
"eval_runtime": 2.3683, |
|
"eval_samples_per_second": 302.752, |
|
"eval_steps_per_second": 9.712, |
|
"eval_wer": 0.09734734734734735, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 76.42, |
|
"learning_rate": 3.352861386871993e-06, |
|
"loss": 1.6884, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 76.42, |
|
"eval_loss": 2.1565628051757812, |
|
"eval_runtime": 2.2914, |
|
"eval_samples_per_second": 312.914, |
|
"eval_steps_per_second": 10.038, |
|
"eval_wer": 0.09818151484818151, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 77.36, |
|
"learning_rate": 3.1104847976578332e-06, |
|
"loss": 1.7182, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 77.36, |
|
"eval_loss": 2.1698944568634033, |
|
"eval_runtime": 2.4574, |
|
"eval_samples_per_second": 291.777, |
|
"eval_steps_per_second": 9.36, |
|
"eval_wer": 0.09676343009676343, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 78.3, |
|
"learning_rate": 2.8755787555992578e-06, |
|
"loss": 1.6774, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 78.3, |
|
"eval_loss": 2.184875011444092, |
|
"eval_runtime": 2.4088, |
|
"eval_samples_per_second": 297.655, |
|
"eval_steps_per_second": 9.548, |
|
"eval_wer": 0.09642976309642977, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 79.25, |
|
"learning_rate": 2.6483979776992406e-06, |
|
"loss": 1.5921, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 79.25, |
|
"eval_loss": 2.1785097122192383, |
|
"eval_runtime": 2.2929, |
|
"eval_samples_per_second": 312.709, |
|
"eval_steps_per_second": 10.031, |
|
"eval_wer": 0.09617951284617951, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 80.19, |
|
"learning_rate": 2.429188804181195e-06, |
|
"loss": 1.7108, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 80.19, |
|
"eval_loss": 2.1641573905944824, |
|
"eval_runtime": 2.4247, |
|
"eval_samples_per_second": 295.704, |
|
"eval_steps_per_second": 9.486, |
|
"eval_wer": 0.0980980980980981, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 81.13, |
|
"learning_rate": 2.2181889313734763e-06, |
|
"loss": 1.7039, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 81.13, |
|
"eval_loss": 2.18363094329834, |
|
"eval_runtime": 2.3288, |
|
"eval_samples_per_second": 307.881, |
|
"eval_steps_per_second": 9.876, |
|
"eval_wer": 0.09968301634968302, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 82.08, |
|
"learning_rate": 2.0156271539667517e-06, |
|
"loss": 1.6068, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 82.08, |
|
"eval_loss": 2.1923670768737793, |
|
"eval_runtime": 2.2451, |
|
"eval_samples_per_second": 319.365, |
|
"eval_steps_per_second": 10.245, |
|
"eval_wer": 0.10018351685018352, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 83.02, |
|
"learning_rate": 1.8217231169237837e-06, |
|
"loss": 1.6267, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 83.02, |
|
"eval_loss": 2.1807827949523926, |
|
"eval_runtime": 2.2451, |
|
"eval_samples_per_second": 319.363, |
|
"eval_steps_per_second": 10.245, |
|
"eval_wer": 0.09793126459793126, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 83.96, |
|
"learning_rate": 1.6366870773105415e-06, |
|
"loss": 1.6209, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 83.96, |
|
"eval_loss": 2.1807706356048584, |
|
"eval_runtime": 2.2844, |
|
"eval_samples_per_second": 313.863, |
|
"eval_steps_per_second": 10.068, |
|
"eval_wer": 0.09759759759759759, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 84.91, |
|
"learning_rate": 1.460719676306962e-06, |
|
"loss": 1.6989, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 84.91, |
|
"eval_loss": 2.1661221981048584, |
|
"eval_runtime": 2.4206, |
|
"eval_samples_per_second": 296.21, |
|
"eval_steps_per_second": 9.502, |
|
"eval_wer": 0.09759759759759759, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 85.85, |
|
"learning_rate": 1.294011721644568e-06, |
|
"loss": 1.6126, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 85.85, |
|
"eval_loss": 2.1738200187683105, |
|
"eval_runtime": 2.2807, |
|
"eval_samples_per_second": 314.376, |
|
"eval_steps_per_second": 10.085, |
|
"eval_wer": 0.09884884884884886, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 86.79, |
|
"learning_rate": 1.1367439807068337e-06, |
|
"loss": 1.6623, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 86.79, |
|
"eval_loss": 2.1695382595062256, |
|
"eval_runtime": 2.2977, |
|
"eval_samples_per_second": 312.057, |
|
"eval_steps_per_second": 10.01, |
|
"eval_wer": 0.09793126459793126, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 87.74, |
|
"learning_rate": 9.890869845166518e-07, |
|
"loss": 1.637, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 87.74, |
|
"eval_loss": 2.1701598167419434, |
|
"eval_runtime": 2.291, |
|
"eval_samples_per_second": 312.966, |
|
"eval_steps_per_second": 10.039, |
|
"eval_wer": 0.09893226559893227, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 88.68, |
|
"learning_rate": 8.512008428234775e-07, |
|
"loss": 1.63, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 88.68, |
|
"eval_loss": 2.163140296936035, |
|
"eval_runtime": 2.3179, |
|
"eval_samples_per_second": 309.332, |
|
"eval_steps_per_second": 9.923, |
|
"eval_wer": 0.09734734734734735, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 89.62, |
|
"learning_rate": 7.232350704906032e-07, |
|
"loss": 1.6153, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 89.62, |
|
"eval_loss": 2.165863037109375, |
|
"eval_runtime": 2.2389, |
|
"eval_samples_per_second": 320.252, |
|
"eval_steps_per_second": 10.273, |
|
"eval_wer": 0.09851518184851518, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 90.57, |
|
"learning_rate": 6.053284253708547e-07, |
|
"loss": 1.4989, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 90.57, |
|
"eval_loss": 2.169069528579712, |
|
"eval_runtime": 2.3639, |
|
"eval_samples_per_second": 303.318, |
|
"eval_steps_per_second": 9.73, |
|
"eval_wer": 0.0990990990990991, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 91.51, |
|
"learning_rate": 4.976087578465116e-07, |
|
"loss": 1.7316, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 91.51, |
|
"eval_loss": 2.1687872409820557, |
|
"eval_runtime": 2.4114, |
|
"eval_samples_per_second": 297.337, |
|
"eval_steps_per_second": 9.538, |
|
"eval_wer": 0.0985985985985986, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 92.45, |
|
"learning_rate": 4.0019287219656646e-07, |
|
"loss": 1.4623, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 92.45, |
|
"eval_loss": 2.163541316986084, |
|
"eval_runtime": 2.4367, |
|
"eval_samples_per_second": 294.251, |
|
"eval_steps_per_second": 9.439, |
|
"eval_wer": 0.09801468134801468, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 93.4, |
|
"learning_rate": 3.1318639994168176e-07, |
|
"loss": 1.6932, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 93.4, |
|
"eval_loss": 2.1671087741851807, |
|
"eval_runtime": 2.3756, |
|
"eval_samples_per_second": 301.824, |
|
"eval_steps_per_second": 9.682, |
|
"eval_wer": 0.0985985985985986, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 94.34, |
|
"learning_rate": 2.366836853041621e-07, |
|
"loss": 1.5762, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 94.34, |
|
"eval_loss": 2.167776107788086, |
|
"eval_runtime": 2.3981, |
|
"eval_samples_per_second": 298.987, |
|
"eval_steps_per_second": 9.591, |
|
"eval_wer": 0.09901568234901569, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 95.28, |
|
"learning_rate": 1.7076768290714806e-07, |
|
"loss": 1.5346, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 95.28, |
|
"eval_loss": 2.1653971672058105, |
|
"eval_runtime": 2.4313, |
|
"eval_samples_per_second": 294.9, |
|
"eval_steps_per_second": 9.46, |
|
"eval_wer": 0.09843176509843177, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 96.23, |
|
"learning_rate": 1.1550986782395857e-07, |
|
"loss": 1.6015, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 96.23, |
|
"eval_loss": 2.166715383529663, |
|
"eval_runtime": 2.2602, |
|
"eval_samples_per_second": 317.23, |
|
"eval_steps_per_second": 10.176, |
|
"eval_wer": 0.0985985985985986, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 97.17, |
|
"learning_rate": 7.097015807511542e-08, |
|
"loss": 1.5609, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 97.17, |
|
"eval_loss": 2.1653175354003906, |
|
"eval_runtime": 2.2916, |
|
"eval_samples_per_second": 312.882, |
|
"eval_steps_per_second": 10.037, |
|
"eval_wer": 0.09818151484818151, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 98.11, |
|
"learning_rate": 3.71968496571018e-08, |
|
"loss": 1.6414, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 98.11, |
|
"eval_loss": 2.1648495197296143, |
|
"eval_runtime": 2.2488, |
|
"eval_samples_per_second": 318.838, |
|
"eval_steps_per_second": 10.228, |
|
"eval_wer": 0.09826493159826494, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 99.06, |
|
"learning_rate": 1.4579919805198795e-08, |
|
"loss": 1.581, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 99.06, |
|
"eval_loss": 2.166619300842285, |
|
"eval_runtime": 2.3312, |
|
"eval_samples_per_second": 307.572, |
|
"eval_steps_per_second": 9.866, |
|
"eval_wer": 0.09868201534868201, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"learning_rate": 2.2208379630039858e-09, |
|
"loss": 1.6469, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_loss": 2.1651339530944824, |
|
"eval_runtime": 2.2476, |
|
"eval_samples_per_second": 319.01, |
|
"eval_steps_per_second": 10.233, |
|
"eval_wer": 0.09851518184851518, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"step": 5300, |
|
"total_flos": 8.039557232590848e+16, |
|
"train_loss": 30.018039647228314, |
|
"train_runtime": 4054.7018, |
|
"train_samples_per_second": 83.459, |
|
"train_steps_per_second": 1.307 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 5300, |
|
"num_train_epochs": 100, |
|
"save_steps": 50, |
|
"total_flos": 8.039557232590848e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|