{ "best_metric": null, "best_model_checkpoint": null, "epoch": 99.9584, "global_step": 46800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.07, "learning_rate": 2.9880000000000002e-05, "loss": 7.2457, "step": 500 }, { "epoch": 2.13, "learning_rate": 2.967732181425486e-05, "loss": 2.3129, "step": 1000 }, { "epoch": 2.13, "eval_cer": 0.05796002383402849, "eval_loss": 0.5042470097541809, "eval_runtime": 80.7316, "eval_samples_per_second": 18.58, "eval_steps_per_second": 2.329, "eval_wer": 0.270302835569122, "step": 1000 }, { "epoch": 3.2, "learning_rate": 2.9353347732181423e-05, "loss": 0.389, "step": 1500 }, { "epoch": 4.27, "learning_rate": 2.9029373650107992e-05, "loss": 0.2251, "step": 2000 }, { "epoch": 4.27, "eval_cer": 0.02945398407453551, "eval_loss": 0.17824731767177582, "eval_runtime": 80.3146, "eval_samples_per_second": 18.677, "eval_steps_per_second": 2.341, "eval_wer": 0.11984898385412483, "step": 2000 }, { "epoch": 5.33, "learning_rate": 2.870539956803456e-05, "loss": 0.1727, "step": 2500 }, { "epoch": 6.4, "learning_rate": 2.8381425485961122e-05, "loss": 0.1462, "step": 3000 }, { "epoch": 6.4, "eval_cer": 0.02651535669790369, "eval_loss": 0.1635013222694397, "eval_runtime": 72.2551, "eval_samples_per_second": 20.76, "eval_steps_per_second": 2.602, "eval_wer": 0.10185557072857257, "step": 3000 }, { "epoch": 7.47, "learning_rate": 2.805745140388769e-05, "loss": 0.128, "step": 3500 }, { "epoch": 8.53, "learning_rate": 2.7733477321814256e-05, "loss": 0.1162, "step": 4000 }, { "epoch": 8.53, "eval_cer": 0.02478197280754022, "eval_loss": 0.1619153469800949, "eval_runtime": 81.2175, "eval_samples_per_second": 18.469, "eval_steps_per_second": 2.315, "eval_wer": 0.09309984737729939, "step": 4000 }, { "epoch": 9.6, "learning_rate": 2.740950323974082e-05, "loss": 0.1095, "step": 4500 }, { "epoch": 10.67, "learning_rate": 2.7085529157667386e-05, "loss": 0.0988, "step": 5000 }, { "epoch": 10.67, "eval_cer": 0.024890309300687936, "eval_loss": 0.1653970628976822, "eval_runtime": 72.7324, "eval_samples_per_second": 20.624, "eval_steps_per_second": 2.585, "eval_wer": 0.09398345248614347, "step": 5000 }, { "epoch": 11.73, "learning_rate": 2.676155507559395e-05, "loss": 0.0949, "step": 5500 }, { "epoch": 12.8, "learning_rate": 2.643758099352052e-05, "loss": 0.0904, "step": 6000 }, { "epoch": 12.8, "eval_cer": 0.024186122095227778, "eval_loss": 0.17023487389087677, "eval_runtime": 80.5666, "eval_samples_per_second": 18.618, "eval_steps_per_second": 2.333, "eval_wer": 0.08450477950036148, "step": 6000 }, { "epoch": 13.87, "learning_rate": 2.6113606911447086e-05, "loss": 0.0859, "step": 6500 }, { "epoch": 14.93, "learning_rate": 2.578963282937365e-05, "loss": 0.0813, "step": 7000 }, { "epoch": 14.93, "eval_cer": 0.02392882292400195, "eval_loss": 0.16578005254268646, "eval_runtime": 72.3723, "eval_samples_per_second": 20.726, "eval_steps_per_second": 2.598, "eval_wer": 0.08458510723752911, "step": 7000 }, { "epoch": 16.02, "learning_rate": 2.5466306695464364e-05, "loss": 0.0763, "step": 7500 }, { "epoch": 17.09, "learning_rate": 2.514233261339093e-05, "loss": 0.074, "step": 8000 }, { "epoch": 17.09, "eval_cer": 0.02395590704728888, "eval_loss": 0.1762518584728241, "eval_runtime": 72.6367, "eval_samples_per_second": 20.651, "eval_steps_per_second": 2.588, "eval_wer": 0.07928347658446462, "step": 8000 }, { "epoch": 18.16, "learning_rate": 2.4818358531317497e-05, "loss": 0.0714, "step": 8500 }, { "epoch": 19.22, "learning_rate": 2.4495032397408207e-05, "loss": 0.0692, "step": 9000 }, { "epoch": 19.22, "eval_cer": 0.0242538324034451, "eval_loss": 0.17680838704109192, "eval_runtime": 68.8274, "eval_samples_per_second": 21.794, "eval_steps_per_second": 2.731, "eval_wer": 0.0834605189171821, "step": 9000 }, { "epoch": 20.3, "learning_rate": 2.4171058315334776e-05, "loss": 0.0658, "step": 9500 }, { "epoch": 21.36, "learning_rate": 2.3847084233261337e-05, "loss": 0.0652, "step": 10000 }, { "epoch": 21.36, "eval_cer": 0.02365798169113266, "eval_loss": 0.18117107450962067, "eval_runtime": 72.0293, "eval_samples_per_second": 20.825, "eval_steps_per_second": 2.61, "eval_wer": 0.07968511527030284, "step": 10000 }, { "epoch": 22.43, "learning_rate": 2.3523110151187906e-05, "loss": 0.0625, "step": 10500 }, { "epoch": 23.5, "learning_rate": 2.319913606911447e-05, "loss": 0.0593, "step": 11000 }, { "epoch": 23.5, "eval_cer": 0.022127728725421156, "eval_loss": 0.18102437257766724, "eval_runtime": 69.723, "eval_samples_per_second": 21.514, "eval_steps_per_second": 2.696, "eval_wer": 0.07502610651457949, "step": 11000 }, { "epoch": 24.56, "learning_rate": 2.2875809935205184e-05, "loss": 0.0604, "step": 11500 }, { "epoch": 25.63, "learning_rate": 2.255183585313175e-05, "loss": 0.0547, "step": 12000 }, { "epoch": 25.63, "eval_cer": 0.023265261903472185, "eval_loss": 0.18346643447875977, "eval_runtime": 70.9539, "eval_samples_per_second": 21.14, "eval_steps_per_second": 2.65, "eval_wer": 0.07944413205879991, "step": 12000 }, { "epoch": 26.7, "learning_rate": 2.2227861771058315e-05, "loss": 0.0565, "step": 12500 }, { "epoch": 27.76, "learning_rate": 2.1903887688984883e-05, "loss": 0.0514, "step": 13000 }, { "epoch": 27.76, "eval_cer": 0.022385027896646984, "eval_loss": 0.18281148374080658, "eval_runtime": 70.1644, "eval_samples_per_second": 21.378, "eval_steps_per_second": 2.679, "eval_wer": 0.07607036709775886, "step": 13000 }, { "epoch": 28.83, "learning_rate": 2.157991360691145e-05, "loss": 0.053, "step": 13500 }, { "epoch": 29.9, "learning_rate": 2.1255939524838014e-05, "loss": 0.0488, "step": 14000 }, { "epoch": 29.9, "eval_cer": 0.02242565408157738, "eval_loss": 0.18435174226760864, "eval_runtime": 69.6075, "eval_samples_per_second": 21.549, "eval_steps_per_second": 2.701, "eval_wer": 0.07663266125793236, "step": 14000 }, { "epoch": 30.96, "learning_rate": 2.093196544276458e-05, "loss": 0.0514, "step": 14500 }, { "epoch": 32.03, "learning_rate": 2.0607991360691144e-05, "loss": 0.0478, "step": 15000 }, { "epoch": 32.03, "eval_cer": 0.022588158821298953, "eval_loss": 0.1909506469964981, "eval_runtime": 72.4928, "eval_samples_per_second": 20.692, "eval_steps_per_second": 2.593, "eval_wer": 0.0768736444694353, "step": 15000 }, { "epoch": 33.1, "learning_rate": 2.0284017278617713e-05, "loss": 0.0489, "step": 15500 }, { "epoch": 34.16, "learning_rate": 1.9960043196544274e-05, "loss": 0.0459, "step": 16000 }, { "epoch": 34.16, "eval_cer": 0.02386111261578463, "eval_loss": 0.19651177525520325, "eval_runtime": 69.572, "eval_samples_per_second": 21.56, "eval_steps_per_second": 2.702, "eval_wer": 0.08313920796851153, "step": 16000 }, { "epoch": 35.23, "learning_rate": 1.9636069114470843e-05, "loss": 0.0481, "step": 16500 }, { "epoch": 36.3, "learning_rate": 1.9312095032397408e-05, "loss": 0.0429, "step": 17000 }, { "epoch": 36.3, "eval_cer": 0.021951681924056117, "eval_loss": 0.20002886652946472, "eval_runtime": 69.2113, "eval_samples_per_second": 21.673, "eval_steps_per_second": 2.716, "eval_wer": 0.07599003936059122, "step": 17000 }, { "epoch": 37.36, "learning_rate": 1.898876889848812e-05, "loss": 0.0461, "step": 17500 }, { "epoch": 38.43, "learning_rate": 1.8664794816414686e-05, "loss": 0.0443, "step": 18000 }, { "epoch": 38.43, "eval_cer": 0.02277774768430746, "eval_loss": 0.20385122299194336, "eval_runtime": 72.6401, "eval_samples_per_second": 20.65, "eval_steps_per_second": 2.588, "eval_wer": 0.0774359386296088, "step": 18000 }, { "epoch": 39.5, "learning_rate": 1.834082073434125e-05, "loss": 0.0411, "step": 18500 }, { "epoch": 40.56, "learning_rate": 1.8017494600431965e-05, "loss": 0.0398, "step": 19000 }, { "epoch": 40.56, "eval_cer": 0.021856887492551866, "eval_loss": 0.19806204736232758, "eval_runtime": 72.4106, "eval_samples_per_second": 20.715, "eval_steps_per_second": 2.596, "eval_wer": 0.07550807293758535, "step": 19000 }, { "epoch": 41.63, "learning_rate": 1.7693520518358533e-05, "loss": 0.0431, "step": 19500 }, { "epoch": 42.7, "learning_rate": 1.7369546436285095e-05, "loss": 0.0408, "step": 20000 }, { "epoch": 42.7, "eval_cer": 0.02387465467742809, "eval_loss": 0.2053176462650299, "eval_runtime": 69.6266, "eval_samples_per_second": 21.543, "eval_steps_per_second": 2.7, "eval_wer": 0.0775965941039441, "step": 20000 }, { "epoch": 43.76, "learning_rate": 1.7045572354211664e-05, "loss": 0.0399, "step": 20500 }, { "epoch": 44.83, "learning_rate": 1.6721598272138232e-05, "loss": 0.0406, "step": 21000 }, { "epoch": 44.83, "eval_cer": 0.022060018417203835, "eval_loss": 0.2049773633480072, "eval_runtime": 70.2646, "eval_samples_per_second": 21.348, "eval_steps_per_second": 2.676, "eval_wer": 0.07398184593140011, "step": 21000 }, { "epoch": 45.9, "learning_rate": 1.6397624190064794e-05, "loss": 0.0381, "step": 21500 }, { "epoch": 46.96, "learning_rate": 1.6073650107991363e-05, "loss": 0.0383, "step": 22000 }, { "epoch": 46.96, "eval_cer": 0.022357943773360055, "eval_loss": 0.21280354261398315, "eval_runtime": 72.639, "eval_samples_per_second": 20.65, "eval_steps_per_second": 2.588, "eval_wer": 0.07325889629689132, "step": 22000 }, { "epoch": 48.03, "learning_rate": 1.5749676025917928e-05, "loss": 0.0365, "step": 22500 }, { "epoch": 49.1, "learning_rate": 1.5425701943844493e-05, "loss": 0.0379, "step": 23000 }, { "epoch": 49.1, "eval_cer": 0.021978766047343046, "eval_loss": 0.21096549928188324, "eval_runtime": 73.1028, "eval_samples_per_second": 20.519, "eval_steps_per_second": 2.572, "eval_wer": 0.07309824082255603, "step": 23000 }, { "epoch": 50.16, "learning_rate": 1.5102375809935208e-05, "loss": 0.0361, "step": 23500 }, { "epoch": 51.23, "learning_rate": 1.4778401727861771e-05, "loss": 0.0369, "step": 24000 }, { "epoch": 51.23, "eval_cer": 0.021951681924056117, "eval_loss": 0.2144922912120819, "eval_runtime": 69.8484, "eval_samples_per_second": 21.475, "eval_steps_per_second": 2.692, "eval_wer": 0.07446381235440598, "step": 24000 }, { "epoch": 52.3, "learning_rate": 1.4455075593952484e-05, "loss": 0.0334, "step": 24500 }, { "epoch": 53.36, "learning_rate": 1.4131101511879051e-05, "loss": 0.0341, "step": 25000 }, { "epoch": 53.36, "eval_cer": 0.022208981095281945, "eval_loss": 0.2145662158727646, "eval_runtime": 69.6873, "eval_samples_per_second": 21.525, "eval_steps_per_second": 2.698, "eval_wer": 0.07245561892521488, "step": 25000 }, { "epoch": 54.43, "learning_rate": 1.3807127429805616e-05, "loss": 0.0356, "step": 25500 }, { "epoch": 55.5, "learning_rate": 1.3483801295896327e-05, "loss": 0.0322, "step": 26000 }, { "epoch": 55.5, "eval_cer": 0.021626672444612968, "eval_loss": 0.21297892928123474, "eval_runtime": 73.9282, "eval_samples_per_second": 20.29, "eval_steps_per_second": 2.543, "eval_wer": 0.07100971965619729, "step": 26000 }, { "epoch": 56.56, "learning_rate": 1.3159827213822896e-05, "loss": 0.0327, "step": 26500 }, { "epoch": 57.63, "learning_rate": 1.2835853131749461e-05, "loss": 0.0316, "step": 27000 }, { "epoch": 57.63, "eval_cer": 0.02224960728021234, "eval_loss": 0.2133886069059372, "eval_runtime": 69.9039, "eval_samples_per_second": 21.458, "eval_steps_per_second": 2.689, "eval_wer": 0.0715720138163708, "step": 27000 }, { "epoch": 58.7, "learning_rate": 1.2511879049676026e-05, "loss": 0.0319, "step": 27500 }, { "epoch": 59.76, "learning_rate": 1.218855291576674e-05, "loss": 0.0324, "step": 28000 }, { "epoch": 59.76, "eval_cer": 0.022236065218568874, "eval_loss": 0.21722277998924255, "eval_runtime": 69.753, "eval_samples_per_second": 21.504, "eval_steps_per_second": 2.695, "eval_wer": 0.07309824082255603, "step": 28000 }, { "epoch": 60.83, "learning_rate": 1.1864578833693305e-05, "loss": 0.0296, "step": 28500 }, { "epoch": 61.9, "learning_rate": 1.1540604751619871e-05, "loss": 0.0315, "step": 29000 }, { "epoch": 61.9, "eval_cer": 0.02280483180759439, "eval_loss": 0.22069784998893738, "eval_runtime": 79.4911, "eval_samples_per_second": 18.87, "eval_steps_per_second": 2.365, "eval_wer": 0.07454414009157362, "step": 29000 }, { "epoch": 62.96, "learning_rate": 1.1216630669546437e-05, "loss": 0.0315, "step": 29500 }, { "epoch": 64.03, "learning_rate": 1.0892656587473002e-05, "loss": 0.0294, "step": 30000 }, { "epoch": 64.03, "eval_cer": 0.021843345430908403, "eval_loss": 0.2183454930782318, "eval_runtime": 77.818, "eval_samples_per_second": 19.276, "eval_steps_per_second": 2.416, "eval_wer": 0.07165234155353843, "step": 30000 }, { "epoch": 65.1, "learning_rate": 1.0569330453563715e-05, "loss": 0.0301, "step": 30500 }, { "epoch": 66.16, "learning_rate": 1.024535637149028e-05, "loss": 0.028, "step": 31000 }, { "epoch": 66.16, "eval_cer": 0.021355831211743677, "eval_loss": 0.2184668779373169, "eval_runtime": 80.0286, "eval_samples_per_second": 18.743, "eval_steps_per_second": 2.349, "eval_wer": 0.06956382038717969, "step": 31000 }, { "epoch": 67.23, "learning_rate": 9.921382289416847e-06, "loss": 0.0283, "step": 31500 }, { "epoch": 68.3, "learning_rate": 9.597408207343414e-06, "loss": 0.0263, "step": 32000 }, { "epoch": 68.3, "eval_cer": 0.021504793889821787, "eval_loss": 0.21668484807014465, "eval_runtime": 70.5125, "eval_samples_per_second": 21.273, "eval_steps_per_second": 2.666, "eval_wer": 0.06964414812434734, "step": 32000 }, { "epoch": 69.36, "learning_rate": 9.273434125269979e-06, "loss": 0.0286, "step": 32500 }, { "epoch": 70.43, "learning_rate": 8.949460043196544e-06, "loss": 0.0299, "step": 33000 }, { "epoch": 70.43, "eval_cer": 0.021707924814473756, "eval_loss": 0.22012607753276825, "eval_runtime": 72.8041, "eval_samples_per_second": 20.603, "eval_steps_per_second": 2.582, "eval_wer": 0.07092939191902964, "step": 33000 }, { "epoch": 71.5, "learning_rate": 8.62548596112311e-06, "loss": 0.0267, "step": 33500 }, { "epoch": 72.56, "learning_rate": 8.301511879049676e-06, "loss": 0.0273, "step": 34000 }, { "epoch": 72.56, "eval_cer": 0.02219543903363848, "eval_loss": 0.21641910076141357, "eval_runtime": 72.2901, "eval_samples_per_second": 20.75, "eval_steps_per_second": 2.601, "eval_wer": 0.07237529118804724, "step": 34000 }, { "epoch": 73.63, "learning_rate": 7.977537796976242e-06, "loss": 0.0267, "step": 34500 }, { "epoch": 74.7, "learning_rate": 7.653563714902809e-06, "loss": 0.0269, "step": 35000 }, { "epoch": 74.7, "eval_cer": 0.021951681924056117, "eval_loss": 0.22398144006729126, "eval_runtime": 69.5423, "eval_samples_per_second": 21.57, "eval_steps_per_second": 2.703, "eval_wer": 0.06932283717567676, "step": 35000 }, { "epoch": 75.85, "learning_rate": 7.329589632829374e-06, "loss": 0.0265, "step": 35500 }, { "epoch": 76.92, "learning_rate": 7.00561555075594e-06, "loss": 0.0264, "step": 36000 }, { "epoch": 76.92, "eval_cer": 0.021789177184334544, "eval_loss": 0.22204121947288513, "eval_runtime": 71.976, "eval_samples_per_second": 20.84, "eval_steps_per_second": 2.612, "eval_wer": 0.07036709775885613, "step": 36000 }, { "epoch": 77.99, "learning_rate": 6.682289416846653e-06, "loss": 0.0257, "step": 36500 }, { "epoch": 79.05, "learning_rate": 6.358963282937365e-06, "loss": 0.0257, "step": 37000 }, { "epoch": 79.05, "eval_cer": 0.02172146687611722, "eval_loss": 0.22285164892673492, "eval_runtime": 68.7898, "eval_samples_per_second": 21.806, "eval_steps_per_second": 2.733, "eval_wer": 0.0688408707526709, "step": 37000 }, { "epoch": 80.12, "learning_rate": 6.034989200863931e-06, "loss": 0.0257, "step": 37500 }, { "epoch": 81.19, "learning_rate": 5.711015118790497e-06, "loss": 0.0251, "step": 38000 }, { "epoch": 81.19, "eval_cer": 0.02154542007475218, "eval_loss": 0.22632832825183868, "eval_runtime": 71.6223, "eval_samples_per_second": 20.943, "eval_steps_per_second": 2.625, "eval_wer": 0.0694031649128444, "step": 38000 }, { "epoch": 82.25, "learning_rate": 5.387041036717062e-06, "loss": 0.0235, "step": 38500 }, { "epoch": 83.32, "learning_rate": 5.063066954643629e-06, "loss": 0.0245, "step": 39000 }, { "epoch": 83.32, "eval_cer": 0.02101727967065706, "eval_loss": 0.22526498138904572, "eval_runtime": 70.7229, "eval_samples_per_second": 21.21, "eval_steps_per_second": 2.658, "eval_wer": 0.06731464374648566, "step": 39000 }, { "epoch": 84.39, "learning_rate": 4.739740820734342e-06, "loss": 0.0252, "step": 39500 }, { "epoch": 85.45, "learning_rate": 4.415766738660907e-06, "loss": 0.0243, "step": 40000 }, { "epoch": 85.45, "eval_cer": 0.02151833595146525, "eval_loss": 0.2263830602169037, "eval_runtime": 69.4576, "eval_samples_per_second": 21.596, "eval_steps_per_second": 2.707, "eval_wer": 0.06916218170134147, "step": 40000 }, { "epoch": 86.52, "learning_rate": 4.09244060475162e-06, "loss": 0.0228, "step": 40500 }, { "epoch": 87.59, "learning_rate": 3.7684665226781857e-06, "loss": 0.0236, "step": 41000 }, { "epoch": 87.59, "eval_cer": 0.021667298629543363, "eval_loss": 0.22610026597976685, "eval_runtime": 69.0446, "eval_samples_per_second": 21.725, "eval_steps_per_second": 2.723, "eval_wer": 0.06892119848983855, "step": 41000 }, { "epoch": 88.65, "learning_rate": 3.4444924406047518e-06, "loss": 0.0244, "step": 41500 }, { "epoch": 89.72, "learning_rate": 3.1205183585313174e-06, "loss": 0.0225, "step": 42000 }, { "epoch": 89.72, "eval_cer": 0.021179784410378637, "eval_loss": 0.22654421627521515, "eval_runtime": 72.2716, "eval_samples_per_second": 20.755, "eval_steps_per_second": 2.601, "eval_wer": 0.06803759338099445, "step": 42000 }, { "epoch": 90.79, "learning_rate": 2.7965442764578835e-06, "loss": 0.0216, "step": 42500 }, { "epoch": 91.85, "learning_rate": 2.472570194384449e-06, "loss": 0.023, "step": 43000 }, { "epoch": 91.85, "eval_cer": 0.021030821732300524, "eval_loss": 0.22652284801006317, "eval_runtime": 72.2083, "eval_samples_per_second": 20.773, "eval_steps_per_second": 2.604, "eval_wer": 0.06739497148365331, "step": 43000 }, { "epoch": 92.92, "learning_rate": 2.149244060475162e-06, "loss": 0.0228, "step": 43500 }, { "epoch": 93.99, "learning_rate": 1.825917926565875e-06, "loss": 0.0217, "step": 44000 }, { "epoch": 93.99, "eval_cer": 0.020936027300796273, "eval_loss": 0.22653113305568695, "eval_runtime": 69.3882, "eval_samples_per_second": 21.618, "eval_steps_per_second": 2.709, "eval_wer": 0.06771628243232389, "step": 44000 }, { "epoch": 95.05, "learning_rate": 1.5019438444924408e-06, "loss": 0.0203, "step": 44500 }, { "epoch": 96.12, "learning_rate": 1.1779697624190064e-06, "loss": 0.022, "step": 45000 }, { "epoch": 96.12, "eval_cer": 0.021057905855587453, "eval_loss": 0.22544603049755096, "eval_runtime": 72.3315, "eval_samples_per_second": 20.738, "eval_steps_per_second": 2.599, "eval_wer": 0.06851955980400032, "step": 45000 }, { "epoch": 97.19, "learning_rate": 8.539956803455724e-07, "loss": 0.0215, "step": 45500 }, { "epoch": 98.25, "learning_rate": 5.300215982721382e-07, "loss": 0.0219, "step": 46000 }, { "epoch": 98.25, "eval_cer": 0.020759980499431233, "eval_loss": 0.22618666291236877, "eval_runtime": 73.3776, "eval_samples_per_second": 20.442, "eval_steps_per_second": 2.562, "eval_wer": 0.06715398827215037, "step": 46000 }, { "epoch": 99.32, "learning_rate": 2.0604751619870412e-07, "loss": 0.0204, "step": 46500 }, { "epoch": 99.96, "step": 46800, "total_flos": 2.6878408769720543e+20, "train_loss": 0.005932311532843826, "train_runtime": 23991.1474, "train_samples_per_second": 62.523, "train_steps_per_second": 1.951 } ], "max_steps": 46800, "num_train_epochs": 100, "total_flos": 2.6878408769720543e+20, "trial_name": null, "trial_params": null }