{ "best_metric": 0.09384384384384384, "best_model_checkpoint": "fine-w2v2base-bs16-ep100-lr2e-05-linguistic-rmsnorm-focal_ctc_a0.5_g0.5-0.05_10_0.004_40/checkpoint-2550", "epoch": 100.0, "eval_steps": 50, "global_step": 5300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.94, "learning_rate": 7.547169811320755e-07, "loss": 1068.6505, "step": 50 }, { "epoch": 0.94, "eval_loss": 527.9987182617188, "eval_runtime": 3.3391, "eval_samples_per_second": 214.726, "eval_steps_per_second": 6.888, "eval_wer": 15.883967300633968, "step": 50 }, { "epoch": 1.89, "learning_rate": 2.6037735849056606e-06, "loss": 924.9194, "step": 100 }, { "epoch": 1.89, "eval_loss": 365.2997741699219, "eval_runtime": 2.3955, "eval_samples_per_second": 299.317, "eval_steps_per_second": 9.602, "eval_wer": 15.72897897897898, "step": 100 }, { "epoch": 2.83, "learning_rate": 4.49056603773585e-06, "loss": 268.1159, "step": 150 }, { "epoch": 2.83, "eval_loss": 45.30757141113281, "eval_runtime": 2.3434, "eval_samples_per_second": 305.972, "eval_steps_per_second": 9.815, "eval_wer": 1.0, "step": 150 }, { "epoch": 3.77, "learning_rate": 6.377358490566038e-06, "loss": 56.3914, "step": 200 }, { "epoch": 3.77, "eval_loss": 42.26533508300781, "eval_runtime": 2.4101, "eval_samples_per_second": 297.501, "eval_steps_per_second": 9.543, "eval_wer": 1.0, "step": 200 }, { "epoch": 4.72, "learning_rate": 8.264150943396228e-06, "loss": 54.5992, "step": 250 }, { "epoch": 4.72, "eval_loss": 41.22721481323242, "eval_runtime": 2.3201, "eval_samples_per_second": 309.032, "eval_steps_per_second": 9.913, "eval_wer": 1.0, "step": 250 }, { "epoch": 5.66, "learning_rate": 1.0150943396226416e-05, "loss": 52.7823, "step": 300 }, { "epoch": 5.66, "eval_loss": 40.21255111694336, "eval_runtime": 2.2946, "eval_samples_per_second": 312.473, "eval_steps_per_second": 10.024, "eval_wer": 1.0, "step": 300 }, { "epoch": 6.6, "learning_rate": 1.2037735849056605e-05, "loss": 51.1032, "step": 350 }, { "epoch": 6.6, "eval_loss": 39.62540817260742, "eval_runtime": 2.3293, "eval_samples_per_second": 307.813, "eval_steps_per_second": 9.874, "eval_wer": 1.0, "step": 350 }, { "epoch": 7.55, "learning_rate": 1.3924528301886793e-05, "loss": 49.2081, "step": 400 }, { "epoch": 7.55, "eval_loss": 38.79893493652344, "eval_runtime": 2.3075, "eval_samples_per_second": 310.729, "eval_steps_per_second": 9.968, "eval_wer": 1.0, "step": 400 }, { "epoch": 8.49, "learning_rate": 1.5811320754716985e-05, "loss": 48.3538, "step": 450 }, { "epoch": 8.49, "eval_loss": 38.57915115356445, "eval_runtime": 2.3577, "eval_samples_per_second": 304.109, "eval_steps_per_second": 9.755, "eval_wer": 1.0, "step": 450 }, { "epoch": 9.43, "learning_rate": 1.7698113207547173e-05, "loss": 48.8615, "step": 500 }, { "epoch": 9.43, "eval_loss": 38.462188720703125, "eval_runtime": 2.3275, "eval_samples_per_second": 308.057, "eval_steps_per_second": 9.882, "eval_wer": 1.0, "step": 500 }, { "epoch": 10.38, "learning_rate": 1.9584905660377362e-05, "loss": 48.1912, "step": 550 }, { "epoch": 10.38, "eval_loss": 38.14221954345703, "eval_runtime": 2.241, "eval_samples_per_second": 319.943, "eval_steps_per_second": 10.263, "eval_wer": 1.0, "step": 550 }, { "epoch": 11.32, "learning_rate": 1.9996701334124693e-05, "loss": 48.3589, "step": 600 }, { "epoch": 11.32, "eval_loss": 38.614410400390625, "eval_runtime": 2.3533, "eval_samples_per_second": 304.679, "eval_steps_per_second": 9.774, "eval_wer": 1.0, "step": 600 }, { "epoch": 12.26, "learning_rate": 1.9982825320106917e-05, "loss": 46.5985, "step": 650 }, { "epoch": 12.26, "eval_loss": 39.63941192626953, "eval_runtime": 2.2466, "eval_samples_per_second": 319.155, "eval_steps_per_second": 10.238, "eval_wer": 1.0472972972972974, "step": 650 }, { "epoch": 13.21, "learning_rate": 1.995812457240187e-05, "loss": 45.5769, "step": 700 }, { "epoch": 13.21, "eval_loss": 37.75796127319336, "eval_runtime": 2.3563, "eval_samples_per_second": 304.287, "eval_steps_per_second": 9.761, "eval_wer": 0.9992492492492493, "step": 700 }, { "epoch": 14.15, "learning_rate": 1.9922625874911624e-05, "loss": 44.1749, "step": 750 }, { "epoch": 14.15, "eval_loss": 36.06916427612305, "eval_runtime": 2.294, "eval_samples_per_second": 312.554, "eval_steps_per_second": 10.026, "eval_wer": 0.9990824157490824, "step": 750 }, { "epoch": 15.09, "learning_rate": 1.987636772014047e-05, "loss": 41.8932, "step": 800 }, { "epoch": 15.09, "eval_loss": 27.94037437438965, "eval_runtime": 2.4164, "eval_samples_per_second": 296.718, "eval_steps_per_second": 9.518, "eval_wer": 0.9315982649315983, "step": 800 }, { "epoch": 16.04, "learning_rate": 1.981940026745616e-05, "loss": 29.8551, "step": 850 }, { "epoch": 16.04, "eval_loss": 14.121084213256836, "eval_runtime": 2.3404, "eval_samples_per_second": 306.353, "eval_steps_per_second": 9.827, "eval_wer": 0.3929763096429763, "step": 850 }, { "epoch": 16.98, "learning_rate": 1.9751785288700255e-05, "loss": 16.9135, "step": 900 }, { "epoch": 16.98, "eval_loss": 7.982431411743164, "eval_runtime": 2.3673, "eval_samples_per_second": 302.882, "eval_steps_per_second": 9.716, "eval_wer": 0.22280613947280614, "step": 900 }, { "epoch": 17.92, "learning_rate": 1.9673596101206766e-05, "loss": 11.5569, "step": 950 }, { "epoch": 17.92, "eval_loss": 5.807270050048828, "eval_runtime": 2.351, "eval_samples_per_second": 304.982, "eval_steps_per_second": 9.783, "eval_wer": 0.16925258591925257, "step": 950 }, { "epoch": 18.87, "learning_rate": 1.9584917488301524e-05, "loss": 9.1965, "step": 1000 }, { "epoch": 18.87, "eval_loss": 4.689126491546631, "eval_runtime": 2.2997, "eval_samples_per_second": 311.783, "eval_steps_per_second": 10.001, "eval_wer": 0.15765765765765766, "step": 1000 }, { "epoch": 19.81, "learning_rate": 1.9485845607368606e-05, "loss": 7.6846, "step": 1050 }, { "epoch": 19.81, "eval_loss": 4.0937886238098145, "eval_runtime": 2.2421, "eval_samples_per_second": 319.793, "eval_steps_per_second": 10.258, "eval_wer": 0.1443943943943944, "step": 1050 }, { "epoch": 20.75, "learning_rate": 1.937648788558344e-05, "loss": 6.6186, "step": 1100 }, { "epoch": 20.75, "eval_loss": 3.7074310779571533, "eval_runtime": 2.2452, "eval_samples_per_second": 319.355, "eval_steps_per_second": 10.244, "eval_wer": 0.13371705038371706, "step": 1100 }, { "epoch": 21.7, "learning_rate": 1.925696290342571e-05, "loss": 6.1733, "step": 1150 }, { "epoch": 21.7, "eval_loss": 3.376906156539917, "eval_runtime": 2.3462, "eval_samples_per_second": 305.602, "eval_steps_per_second": 9.803, "eval_wer": 0.12787787787787788, "step": 1150 }, { "epoch": 22.64, "learning_rate": 1.912740026609828e-05, "loss": 5.5833, "step": 1200 }, { "epoch": 22.64, "eval_loss": 3.193333864212036, "eval_runtime": 2.4175, "eval_samples_per_second": 296.587, "eval_steps_per_second": 9.514, "eval_wer": 0.12879546212879547, "step": 1200 }, { "epoch": 23.58, "learning_rate": 1.8987940462991673e-05, "loss": 5.1097, "step": 1250 }, { "epoch": 23.58, "eval_loss": 3.078519582748413, "eval_runtime": 2.3455, "eval_samples_per_second": 305.697, "eval_steps_per_second": 9.806, "eval_wer": 0.12320653987320654, "step": 1250 }, { "epoch": 24.53, "learning_rate": 1.8838734715346398e-05, "loss": 4.8098, "step": 1300 }, { "epoch": 24.53, "eval_loss": 3.068678140640259, "eval_runtime": 2.2608, "eval_samples_per_second": 317.143, "eval_steps_per_second": 10.173, "eval_wer": 0.12103770437103771, "step": 1300 }, { "epoch": 25.47, "learning_rate": 1.867994481227837e-05, "loss": 4.784, "step": 1350 }, { "epoch": 25.47, "eval_loss": 2.777139186859131, "eval_runtime": 2.4013, "eval_samples_per_second": 298.593, "eval_steps_per_second": 9.578, "eval_wer": 0.11519853186519853, "step": 1350 }, { "epoch": 26.42, "learning_rate": 1.8511742935345198e-05, "loss": 4.3574, "step": 1400 }, { "epoch": 26.42, "eval_loss": 2.7346842288970947, "eval_runtime": 2.2397, "eval_samples_per_second": 320.13, "eval_steps_per_second": 10.269, "eval_wer": 0.1200367033700367, "step": 1400 }, { "epoch": 27.36, "learning_rate": 1.8334311471843574e-05, "loss": 4.2972, "step": 1450 }, { "epoch": 27.36, "eval_loss": 2.685316324234009, "eval_runtime": 2.3028, "eval_samples_per_second": 311.361, "eval_steps_per_second": 9.988, "eval_wer": 0.11469803136469803, "step": 1450 }, { "epoch": 28.3, "learning_rate": 1.814784281704023e-05, "loss": 4.1072, "step": 1500 }, { "epoch": 28.3, "eval_loss": 2.5679965019226074, "eval_runtime": 2.294, "eval_samples_per_second": 312.557, "eval_steps_per_second": 10.026, "eval_wer": 0.11845178511845178, "step": 1500 }, { "epoch": 29.25, "learning_rate": 1.7952539165550863e-05, "loss": 3.9651, "step": 1550 }, { "epoch": 29.25, "eval_loss": 2.5938329696655273, "eval_runtime": 2.4099, "eval_samples_per_second": 297.522, "eval_steps_per_second": 9.544, "eval_wer": 0.1200367033700367, "step": 1550 }, { "epoch": 30.19, "learning_rate": 1.7748612292093336e-05, "loss": 4.0325, "step": 1600 }, { "epoch": 30.19, "eval_loss": 2.5324084758758545, "eval_runtime": 2.2799, "eval_samples_per_second": 314.482, "eval_steps_per_second": 10.088, "eval_wer": 0.1180347013680347, "step": 1600 }, { "epoch": 31.13, "learning_rate": 1.753628332185275e-05, "loss": 3.6586, "step": 1650 }, { "epoch": 31.13, "eval_loss": 2.584696054458618, "eval_runtime": 2.4425, "eval_samples_per_second": 293.556, "eval_steps_per_second": 9.417, "eval_wer": 0.11127794461127795, "step": 1650 }, { "epoch": 32.08, "learning_rate": 1.731578249070756e-05, "loss": 3.7213, "step": 1700 }, { "epoch": 32.08, "eval_loss": 2.5886008739471436, "eval_runtime": 2.2464, "eval_samples_per_second": 319.179, "eval_steps_per_second": 10.239, "eval_wer": 0.11161161161161161, "step": 1700 }, { "epoch": 33.02, "learning_rate": 1.7087348895576564e-05, "loss": 3.4746, "step": 1750 }, { "epoch": 33.02, "eval_loss": 2.428494453430176, "eval_runtime": 2.4929, "eval_samples_per_second": 287.612, "eval_steps_per_second": 9.226, "eval_wer": 0.10051718385051718, "step": 1750 }, { "epoch": 33.96, "learning_rate": 1.68512302351576e-05, "loss": 3.3572, "step": 1800 }, { "epoch": 33.96, "eval_loss": 2.4606618881225586, "eval_runtime": 2.2482, "eval_samples_per_second": 318.92, "eval_steps_per_second": 10.23, "eval_wer": 0.10727394060727394, "step": 1800 }, { "epoch": 34.91, "learning_rate": 1.6607682541338998e-05, "loss": 3.2202, "step": 1850 }, { "epoch": 34.91, "eval_loss": 2.4459075927734375, "eval_runtime": 2.406, "eval_samples_per_second": 298.009, "eval_steps_per_second": 9.56, "eval_wer": 0.11027694361027694, "step": 1850 }, { "epoch": 35.85, "learning_rate": 1.6356969901575094e-05, "loss": 3.2437, "step": 1900 }, { "epoch": 35.85, "eval_loss": 2.3630285263061523, "eval_runtime": 2.2948, "eval_samples_per_second": 312.446, "eval_steps_per_second": 10.023, "eval_wer": 0.10268601935268602, "step": 1900 }, { "epoch": 36.79, "learning_rate": 1.6099364172526732e-05, "loss": 3.1303, "step": 1950 }, { "epoch": 36.79, "eval_loss": 2.32814884185791, "eval_runtime": 2.3311, "eval_samples_per_second": 307.582, "eval_steps_per_second": 9.867, "eval_wer": 0.10251918585251918, "step": 1950 }, { "epoch": 37.74, "learning_rate": 1.583514468527744e-05, "loss": 3.0037, "step": 2000 }, { "epoch": 37.74, "eval_loss": 2.3129348754882812, "eval_runtime": 2.4145, "eval_samples_per_second": 296.962, "eval_steps_per_second": 9.526, "eval_wer": 0.10185185185185185, "step": 2000 }, { "epoch": 38.68, "learning_rate": 1.5564597942444743e-05, "loss": 3.0523, "step": 2050 }, { "epoch": 38.68, "eval_loss": 2.2962000370025635, "eval_runtime": 2.4793, "eval_samples_per_second": 289.191, "eval_steps_per_second": 9.277, "eval_wer": 0.09884884884884886, "step": 2050 }, { "epoch": 39.62, "learning_rate": 1.5288017307515142e-05, "loss": 2.8943, "step": 2100 }, { "epoch": 39.62, "eval_loss": 2.323789119720459, "eval_runtime": 2.2485, "eval_samples_per_second": 318.884, "eval_steps_per_second": 10.229, "eval_wer": 0.1021021021021021, "step": 2100 }, { "epoch": 40.57, "learning_rate": 1.500570268673965e-05, "loss": 2.8502, "step": 2150 }, { "epoch": 40.57, "eval_loss": 2.3549041748046875, "eval_runtime": 2.4526, "eval_samples_per_second": 292.339, "eval_steps_per_second": 9.378, "eval_wer": 0.10435435435435435, "step": 2150 }, { "epoch": 41.51, "learning_rate": 1.4717960203934704e-05, "loss": 2.7045, "step": 2200 }, { "epoch": 41.51, "eval_loss": 2.367952346801758, "eval_runtime": 2.2393, "eval_samples_per_second": 320.195, "eval_steps_per_second": 10.271, "eval_wer": 0.10176843510176843, "step": 2200 }, { "epoch": 42.45, "learning_rate": 1.4425101868541228e-05, "loss": 2.7291, "step": 2250 }, { "epoch": 42.45, "eval_loss": 2.4171645641326904, "eval_runtime": 2.2967, "eval_samples_per_second": 312.191, "eval_steps_per_second": 10.015, "eval_wer": 0.11286286286286286, "step": 2250 }, { "epoch": 43.4, "learning_rate": 1.412744523730163e-05, "loss": 2.6162, "step": 2300 }, { "epoch": 43.4, "eval_loss": 2.321629524230957, "eval_runtime": 2.3019, "eval_samples_per_second": 311.476, "eval_steps_per_second": 9.992, "eval_wer": 0.10176843510176843, "step": 2300 }, { "epoch": 44.34, "learning_rate": 1.3825313069921713e-05, "loss": 2.5643, "step": 2350 }, { "epoch": 44.34, "eval_loss": 2.2663228511810303, "eval_runtime": 2.4493, "eval_samples_per_second": 292.741, "eval_steps_per_second": 9.391, "eval_wer": 0.09793126459793126, "step": 2350 }, { "epoch": 45.28, "learning_rate": 1.3519032979090816e-05, "loss": 2.5842, "step": 2400 }, { "epoch": 45.28, "eval_loss": 2.240830898284912, "eval_runtime": 2.2616, "eval_samples_per_second": 317.033, "eval_steps_per_second": 10.17, "eval_wer": 0.0985985985985986, "step": 2400 }, { "epoch": 46.23, "learning_rate": 1.3208937075239663e-05, "loss": 2.4498, "step": 2450 }, { "epoch": 46.23, "eval_loss": 2.2695207595825195, "eval_runtime": 2.3803, "eval_samples_per_second": 301.222, "eval_steps_per_second": 9.663, "eval_wer": 0.10168501835168502, "step": 2450 }, { "epoch": 47.17, "learning_rate": 1.289536160642119e-05, "loss": 2.4177, "step": 2500 }, { "epoch": 47.17, "eval_loss": 2.202859878540039, "eval_runtime": 2.2399, "eval_samples_per_second": 320.105, "eval_steps_per_second": 10.268, "eval_wer": 0.09801468134801468, "step": 2500 }, { "epoch": 48.11, "learning_rate": 1.2578646593704786e-05, "loss": 2.3297, "step": 2550 }, { "epoch": 48.11, "eval_loss": 2.225446939468384, "eval_runtime": 2.3113, "eval_samples_per_second": 310.217, "eval_steps_per_second": 9.951, "eval_wer": 0.09384384384384384, "step": 2550 }, { "epoch": 49.06, "learning_rate": 1.2259135462479306e-05, "loss": 2.3637, "step": 2600 }, { "epoch": 49.06, "eval_loss": 2.2550511360168457, "eval_runtime": 2.3163, "eval_samples_per_second": 309.542, "eval_steps_per_second": 9.93, "eval_wer": 0.1011011011011011, "step": 2600 }, { "epoch": 50.0, "learning_rate": 1.1937174670064665e-05, "loss": 2.2528, "step": 2650 }, { "epoch": 50.0, "eval_loss": 2.235013723373413, "eval_runtime": 2.297, "eval_samples_per_second": 312.152, "eval_steps_per_second": 10.013, "eval_wer": 0.10118451785118451, "step": 2650 }, { "epoch": 50.94, "learning_rate": 1.1613113330035816e-05, "loss": 2.2221, "step": 2700 }, { "epoch": 50.94, "eval_loss": 2.2253198623657227, "eval_runtime": 2.4122, "eval_samples_per_second": 297.24, "eval_steps_per_second": 9.535, "eval_wer": 0.09676343009676343, "step": 2700 }, { "epoch": 51.89, "learning_rate": 1.1287302833666442e-05, "loss": 2.3083, "step": 2750 }, { "epoch": 51.89, "eval_loss": 2.242628335952759, "eval_runtime": 2.4199, "eval_samples_per_second": 296.292, "eval_steps_per_second": 9.504, "eval_wer": 0.09584584584584585, "step": 2750 }, { "epoch": 52.83, "learning_rate": 1.0960096468902856e-05, "loss": 2.0585, "step": 2800 }, { "epoch": 52.83, "eval_loss": 2.216921806335449, "eval_runtime": 2.2934, "eval_samples_per_second": 312.636, "eval_steps_per_second": 10.029, "eval_wer": 0.09718051384718052, "step": 2800 }, { "epoch": 53.77, "learning_rate": 1.0631849037281267e-05, "loss": 2.2349, "step": 2850 }, { "epoch": 53.77, "eval_loss": 2.215132236480713, "eval_runtime": 2.3656, "eval_samples_per_second": 303.097, "eval_steps_per_second": 9.723, "eval_wer": 0.10035035035035035, "step": 2850 }, { "epoch": 54.72, "learning_rate": 1.0302916469203831e-05, "loss": 2.1969, "step": 2900 }, { "epoch": 54.72, "eval_loss": 2.2561628818511963, "eval_runtime": 2.2347, "eval_samples_per_second": 320.851, "eval_steps_per_second": 10.292, "eval_wer": 0.10235235235235235, "step": 2900 }, { "epoch": 55.66, "learning_rate": 9.973655437990618e-06, "loss": 2.0415, "step": 2950 }, { "epoch": 55.66, "eval_loss": 2.2862448692321777, "eval_runtime": 2.3242, "eval_samples_per_second": 308.491, "eval_steps_per_second": 9.896, "eval_wer": 0.10268601935268602, "step": 2950 }, { "epoch": 56.6, "learning_rate": 9.644422973125977e-06, "loss": 2.0126, "step": 3000 }, { "epoch": 56.6, "eval_loss": 2.2166805267333984, "eval_runtime": 2.2438, "eval_samples_per_second": 319.545, "eval_steps_per_second": 10.25, "eval_wer": 0.10151818485151819, "step": 3000 }, { "epoch": 57.55, "learning_rate": 9.31557607311876e-06, "loss": 2.1, "step": 3050 }, { "epoch": 57.55, "eval_loss": 2.2359628677368164, "eval_runtime": 2.3078, "eval_samples_per_second": 310.684, "eval_steps_per_second": 9.966, "eval_wer": 0.10243576910243576, "step": 3050 }, { "epoch": 58.49, "learning_rate": 8.987471318396079e-06, "loss": 2.0739, "step": 3100 }, { "epoch": 58.49, "eval_loss": 2.219820976257324, "eval_runtime": 2.2533, "eval_samples_per_second": 318.193, "eval_steps_per_second": 10.207, "eval_wer": 0.10560560560560561, "step": 3100 }, { "epoch": 59.43, "learning_rate": 8.660464484650442e-06, "loss": 1.9875, "step": 3150 }, { "epoch": 59.43, "eval_loss": 2.1715681552886963, "eval_runtime": 2.3021, "eval_samples_per_second": 311.449, "eval_steps_per_second": 9.991, "eval_wer": 0.09868201534868201, "step": 3150 }, { "epoch": 60.38, "learning_rate": 8.334910157059459e-06, "loss": 2.0259, "step": 3200 }, { "epoch": 60.38, "eval_loss": 2.214280128479004, "eval_runtime": 2.2789, "eval_samples_per_second": 314.622, "eval_steps_per_second": 10.092, "eval_wer": 0.09993326659993326, "step": 3200 }, { "epoch": 61.32, "learning_rate": 8.011161345796495e-06, "loss": 1.8519, "step": 3250 }, { "epoch": 61.32, "eval_loss": 2.1836636066436768, "eval_runtime": 2.345, "eval_samples_per_second": 305.758, "eval_steps_per_second": 9.808, "eval_wer": 0.09584584584584585, "step": 3250 }, { "epoch": 62.26, "learning_rate": 7.689569103249139e-06, "loss": 1.9733, "step": 3300 }, { "epoch": 62.26, "eval_loss": 2.1865181922912598, "eval_runtime": 2.3454, "eval_samples_per_second": 305.699, "eval_steps_per_second": 9.806, "eval_wer": 0.10076743410076744, "step": 3300 }, { "epoch": 63.21, "learning_rate": 7.3704821433605685e-06, "loss": 1.8496, "step": 3350 }, { "epoch": 63.21, "eval_loss": 2.2044992446899414, "eval_runtime": 2.4785, "eval_samples_per_second": 289.29, "eval_steps_per_second": 9.28, "eval_wer": 0.10535535535535535, "step": 3350 }, { "epoch": 64.15, "learning_rate": 7.054246463506596e-06, "loss": 1.9354, "step": 3400 }, { "epoch": 64.15, "eval_loss": 2.1783363819122314, "eval_runtime": 2.2431, "eval_samples_per_second": 319.643, "eval_steps_per_second": 10.254, "eval_wer": 0.10018351685018352, "step": 3400 }, { "epoch": 65.09, "learning_rate": 6.741204969318343e-06, "loss": 1.8247, "step": 3450 }, { "epoch": 65.09, "eval_loss": 2.166977882385254, "eval_runtime": 2.2716, "eval_samples_per_second": 315.638, "eval_steps_per_second": 10.125, "eval_wer": 0.09893226559893227, "step": 3450 }, { "epoch": 66.04, "learning_rate": 6.43169710285745e-06, "loss": 1.8418, "step": 3500 }, { "epoch": 66.04, "eval_loss": 2.182258129119873, "eval_runtime": 2.4022, "eval_samples_per_second": 298.482, "eval_steps_per_second": 9.575, "eval_wer": 0.09926593259926593, "step": 3500 }, { "epoch": 66.98, "learning_rate": 6.126058474546936e-06, "loss": 1.8259, "step": 3550 }, { "epoch": 66.98, "eval_loss": 2.1875205039978027, "eval_runtime": 2.3977, "eval_samples_per_second": 299.039, "eval_steps_per_second": 9.593, "eval_wer": 0.09901568234901569, "step": 3550 }, { "epoch": 67.92, "learning_rate": 5.82462049925683e-06, "loss": 1.8458, "step": 3600 }, { "epoch": 67.92, "eval_loss": 2.2048122882843018, "eval_runtime": 2.2439, "eval_samples_per_second": 319.528, "eval_steps_per_second": 10.25, "eval_wer": 0.10001668335001668, "step": 3600 }, { "epoch": 68.87, "learning_rate": 5.527710036939207e-06, "loss": 1.7796, "step": 3650 }, { "epoch": 68.87, "eval_loss": 2.201897382736206, "eval_runtime": 2.3586, "eval_samples_per_second": 303.996, "eval_steps_per_second": 9.752, "eval_wer": 0.09751418084751418, "step": 3650 }, { "epoch": 69.81, "learning_rate": 5.235649038202294e-06, "loss": 1.7931, "step": 3700 }, { "epoch": 69.81, "eval_loss": 2.1672749519348145, "eval_runtime": 2.3721, "eval_samples_per_second": 302.258, "eval_steps_per_second": 9.696, "eval_wer": 0.09551217884551218, "step": 3700 }, { "epoch": 70.75, "learning_rate": 4.948754195207908e-06, "loss": 1.789, "step": 3750 }, { "epoch": 70.75, "eval_loss": 2.1924376487731934, "eval_runtime": 2.242, "eval_samples_per_second": 319.808, "eval_steps_per_second": 10.259, "eval_wer": 0.09851518184851518, "step": 3750 }, { "epoch": 71.7, "learning_rate": 4.6673365982708805e-06, "loss": 1.8166, "step": 3800 }, { "epoch": 71.7, "eval_loss": 2.183880567550659, "eval_runtime": 2.3389, "eval_samples_per_second": 306.558, "eval_steps_per_second": 9.834, "eval_wer": 0.09642976309642977, "step": 3800 }, { "epoch": 72.64, "learning_rate": 4.3917013985327075e-06, "loss": 1.692, "step": 3850 }, { "epoch": 72.64, "eval_loss": 2.1770949363708496, "eval_runtime": 2.2882, "eval_samples_per_second": 313.348, "eval_steps_per_second": 10.052, "eval_wer": 0.09501167834501167, "step": 3850 }, { "epoch": 73.58, "learning_rate": 4.12214747707527e-06, "loss": 1.6898, "step": 3900 }, { "epoch": 73.58, "eval_loss": 2.162118911743164, "eval_runtime": 2.4655, "eval_samples_per_second": 290.811, "eval_steps_per_second": 9.329, "eval_wer": 0.09442776109442776, "step": 3900 }, { "epoch": 74.53, "learning_rate": 3.8589671208334414e-06, "loss": 1.5916, "step": 3950 }, { "epoch": 74.53, "eval_loss": 2.171839714050293, "eval_runtime": 2.2934, "eval_samples_per_second": 312.641, "eval_steps_per_second": 10.029, "eval_wer": 0.09734734734734735, "step": 3950 }, { "epoch": 75.47, "learning_rate": 3.6024457056579186e-06, "loss": 1.7778, "step": 4000 }, { "epoch": 75.47, "eval_loss": 2.1617255210876465, "eval_runtime": 2.3683, "eval_samples_per_second": 302.752, "eval_steps_per_second": 9.712, "eval_wer": 0.09734734734734735, "step": 4000 }, { "epoch": 76.42, "learning_rate": 3.352861386871993e-06, "loss": 1.6884, "step": 4050 }, { "epoch": 76.42, "eval_loss": 2.1565628051757812, "eval_runtime": 2.2914, "eval_samples_per_second": 312.914, "eval_steps_per_second": 10.038, "eval_wer": 0.09818151484818151, "step": 4050 }, { "epoch": 77.36, "learning_rate": 3.1104847976578332e-06, "loss": 1.7182, "step": 4100 }, { "epoch": 77.36, "eval_loss": 2.1698944568634033, "eval_runtime": 2.4574, "eval_samples_per_second": 291.777, "eval_steps_per_second": 9.36, "eval_wer": 0.09676343009676343, "step": 4100 }, { "epoch": 78.3, "learning_rate": 2.8755787555992578e-06, "loss": 1.6774, "step": 4150 }, { "epoch": 78.3, "eval_loss": 2.184875011444092, "eval_runtime": 2.4088, "eval_samples_per_second": 297.655, "eval_steps_per_second": 9.548, "eval_wer": 0.09642976309642977, "step": 4150 }, { "epoch": 79.25, "learning_rate": 2.6483979776992406e-06, "loss": 1.5921, "step": 4200 }, { "epoch": 79.25, "eval_loss": 2.1785097122192383, "eval_runtime": 2.2929, "eval_samples_per_second": 312.709, "eval_steps_per_second": 10.031, "eval_wer": 0.09617951284617951, "step": 4200 }, { "epoch": 80.19, "learning_rate": 2.429188804181195e-06, "loss": 1.7108, "step": 4250 }, { "epoch": 80.19, "eval_loss": 2.1641573905944824, "eval_runtime": 2.4247, "eval_samples_per_second": 295.704, "eval_steps_per_second": 9.486, "eval_wer": 0.0980980980980981, "step": 4250 }, { "epoch": 81.13, "learning_rate": 2.2181889313734763e-06, "loss": 1.7039, "step": 4300 }, { "epoch": 81.13, "eval_loss": 2.18363094329834, "eval_runtime": 2.3288, "eval_samples_per_second": 307.881, "eval_steps_per_second": 9.876, "eval_wer": 0.09968301634968302, "step": 4300 }, { "epoch": 82.08, "learning_rate": 2.0156271539667517e-06, "loss": 1.6068, "step": 4350 }, { "epoch": 82.08, "eval_loss": 2.1923670768737793, "eval_runtime": 2.2451, "eval_samples_per_second": 319.365, "eval_steps_per_second": 10.245, "eval_wer": 0.10018351685018352, "step": 4350 }, { "epoch": 83.02, "learning_rate": 1.8217231169237837e-06, "loss": 1.6267, "step": 4400 }, { "epoch": 83.02, "eval_loss": 2.1807827949523926, "eval_runtime": 2.2451, "eval_samples_per_second": 319.363, "eval_steps_per_second": 10.245, "eval_wer": 0.09793126459793126, "step": 4400 }, { "epoch": 83.96, "learning_rate": 1.6366870773105415e-06, "loss": 1.6209, "step": 4450 }, { "epoch": 83.96, "eval_loss": 2.1807706356048584, "eval_runtime": 2.2844, "eval_samples_per_second": 313.863, "eval_steps_per_second": 10.068, "eval_wer": 0.09759759759759759, "step": 4450 }, { "epoch": 84.91, "learning_rate": 1.460719676306962e-06, "loss": 1.6989, "step": 4500 }, { "epoch": 84.91, "eval_loss": 2.1661221981048584, "eval_runtime": 2.4206, "eval_samples_per_second": 296.21, "eval_steps_per_second": 9.502, "eval_wer": 0.09759759759759759, "step": 4500 }, { "epoch": 85.85, "learning_rate": 1.294011721644568e-06, "loss": 1.6126, "step": 4550 }, { "epoch": 85.85, "eval_loss": 2.1738200187683105, "eval_runtime": 2.2807, "eval_samples_per_second": 314.376, "eval_steps_per_second": 10.085, "eval_wer": 0.09884884884884886, "step": 4550 }, { "epoch": 86.79, "learning_rate": 1.1367439807068337e-06, "loss": 1.6623, "step": 4600 }, { "epoch": 86.79, "eval_loss": 2.1695382595062256, "eval_runtime": 2.2977, "eval_samples_per_second": 312.057, "eval_steps_per_second": 10.01, "eval_wer": 0.09793126459793126, "step": 4600 }, { "epoch": 87.74, "learning_rate": 9.890869845166518e-07, "loss": 1.637, "step": 4650 }, { "epoch": 87.74, "eval_loss": 2.1701598167419434, "eval_runtime": 2.291, "eval_samples_per_second": 312.966, "eval_steps_per_second": 10.039, "eval_wer": 0.09893226559893227, "step": 4650 }, { "epoch": 88.68, "learning_rate": 8.512008428234775e-07, "loss": 1.63, "step": 4700 }, { "epoch": 88.68, "eval_loss": 2.163140296936035, "eval_runtime": 2.3179, "eval_samples_per_second": 309.332, "eval_steps_per_second": 9.923, "eval_wer": 0.09734734734734735, "step": 4700 }, { "epoch": 89.62, "learning_rate": 7.232350704906032e-07, "loss": 1.6153, "step": 4750 }, { "epoch": 89.62, "eval_loss": 2.165863037109375, "eval_runtime": 2.2389, "eval_samples_per_second": 320.252, "eval_steps_per_second": 10.273, "eval_wer": 0.09851518184851518, "step": 4750 }, { "epoch": 90.57, "learning_rate": 6.053284253708547e-07, "loss": 1.4989, "step": 4800 }, { "epoch": 90.57, "eval_loss": 2.169069528579712, "eval_runtime": 2.3639, "eval_samples_per_second": 303.318, "eval_steps_per_second": 9.73, "eval_wer": 0.0990990990990991, "step": 4800 }, { "epoch": 91.51, "learning_rate": 4.976087578465116e-07, "loss": 1.7316, "step": 4850 }, { "epoch": 91.51, "eval_loss": 2.1687872409820557, "eval_runtime": 2.4114, "eval_samples_per_second": 297.337, "eval_steps_per_second": 9.538, "eval_wer": 0.0985985985985986, "step": 4850 }, { "epoch": 92.45, "learning_rate": 4.0019287219656646e-07, "loss": 1.4623, "step": 4900 }, { "epoch": 92.45, "eval_loss": 2.163541316986084, "eval_runtime": 2.4367, "eval_samples_per_second": 294.251, "eval_steps_per_second": 9.439, "eval_wer": 0.09801468134801468, "step": 4900 }, { "epoch": 93.4, "learning_rate": 3.1318639994168176e-07, "loss": 1.6932, "step": 4950 }, { "epoch": 93.4, "eval_loss": 2.1671087741851807, "eval_runtime": 2.3756, "eval_samples_per_second": 301.824, "eval_steps_per_second": 9.682, "eval_wer": 0.0985985985985986, "step": 4950 }, { "epoch": 94.34, "learning_rate": 2.366836853041621e-07, "loss": 1.5762, "step": 5000 }, { "epoch": 94.34, "eval_loss": 2.167776107788086, "eval_runtime": 2.3981, "eval_samples_per_second": 298.987, "eval_steps_per_second": 9.591, "eval_wer": 0.09901568234901569, "step": 5000 }, { "epoch": 95.28, "learning_rate": 1.7076768290714806e-07, "loss": 1.5346, "step": 5050 }, { "epoch": 95.28, "eval_loss": 2.1653971672058105, "eval_runtime": 2.4313, "eval_samples_per_second": 294.9, "eval_steps_per_second": 9.46, "eval_wer": 0.09843176509843177, "step": 5050 }, { "epoch": 96.23, "learning_rate": 1.1550986782395857e-07, "loss": 1.6015, "step": 5100 }, { "epoch": 96.23, "eval_loss": 2.166715383529663, "eval_runtime": 2.2602, "eval_samples_per_second": 317.23, "eval_steps_per_second": 10.176, "eval_wer": 0.0985985985985986, "step": 5100 }, { "epoch": 97.17, "learning_rate": 7.097015807511542e-08, "loss": 1.5609, "step": 5150 }, { "epoch": 97.17, "eval_loss": 2.1653175354003906, "eval_runtime": 2.2916, "eval_samples_per_second": 312.882, "eval_steps_per_second": 10.037, "eval_wer": 0.09818151484818151, "step": 5150 }, { "epoch": 98.11, "learning_rate": 3.71968496571018e-08, "loss": 1.6414, "step": 5200 }, { "epoch": 98.11, "eval_loss": 2.1648495197296143, "eval_runtime": 2.2488, "eval_samples_per_second": 318.838, "eval_steps_per_second": 10.228, "eval_wer": 0.09826493159826494, "step": 5200 }, { "epoch": 99.06, "learning_rate": 1.4579919805198795e-08, "loss": 1.581, "step": 5250 }, { "epoch": 99.06, "eval_loss": 2.166619300842285, "eval_runtime": 2.3312, "eval_samples_per_second": 307.572, "eval_steps_per_second": 9.866, "eval_wer": 0.09868201534868201, "step": 5250 }, { "epoch": 100.0, "learning_rate": 2.2208379630039858e-09, "loss": 1.6469, "step": 5300 }, { "epoch": 100.0, "eval_loss": 2.1651339530944824, "eval_runtime": 2.2476, "eval_samples_per_second": 319.01, "eval_steps_per_second": 10.233, "eval_wer": 0.09851518184851518, "step": 5300 }, { "epoch": 100.0, "step": 5300, "total_flos": 8.039557232590848e+16, "train_loss": 30.018039647228314, "train_runtime": 4054.7018, "train_samples_per_second": 83.459, "train_steps_per_second": 1.307 } ], "logging_steps": 50, "max_steps": 5300, "num_train_epochs": 100, "save_steps": 50, "total_flos": 8.039557232590848e+16, "trial_name": null, "trial_params": null }