{ "best_metric": null, "best_model_checkpoint": null, "epoch": 29.677419354838708, "global_step": 9200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.29, "learning_rate": 0.00023999999999999998, "loss": 7.3425, "step": 400 }, { "epoch": 1.29, "eval_loss": 3.277569532394409, "eval_runtime": 138.4344, "eval_samples_per_second": 10.994, "eval_wer": 1.0, "step": 400 }, { "epoch": 2.58, "learning_rate": 0.0002897727272727273, "loss": 2.6649, "step": 800 }, { "epoch": 2.58, "eval_loss": 1.1241918802261353, "eval_runtime": 115.4749, "eval_samples_per_second": 13.18, "eval_wer": 0.8526294539214697, "step": 800 }, { "epoch": 3.87, "learning_rate": 0.0002761363636363636, "loss": 0.7589, "step": 1200 }, { "epoch": 3.87, "eval_loss": 0.69745272397995, "eval_runtime": 137.7134, "eval_samples_per_second": 11.052, "eval_wer": 0.6960734833955788, "step": 1200 }, { "epoch": 5.16, "learning_rate": 0.0002625, "loss": 0.476, "step": 1600 }, { "epoch": 5.16, "eval_loss": 0.6482455730438232, "eval_runtime": 115.5054, "eval_samples_per_second": 13.177, "eval_wer": 0.6553951751286968, "step": 1600 }, { "epoch": 6.45, "learning_rate": 0.00024886363636363637, "loss": 0.3479, "step": 2000 }, { "epoch": 6.45, "eval_loss": 0.6039410829544067, "eval_runtime": 115.7857, "eval_samples_per_second": 13.145, "eval_wer": 0.5894821843141214, "step": 2000 }, { "epoch": 7.74, "learning_rate": 0.0002352272727272727, "loss": 0.2783, "step": 2400 }, { "epoch": 7.74, "eval_loss": 0.6183858513832092, "eval_runtime": 115.9363, "eval_samples_per_second": 13.128, "eval_wer": 0.5855455738366812, "step": 2400 }, { "epoch": 9.03, "learning_rate": 0.00022159090909090908, "loss": 0.2204, "step": 2800 }, { "epoch": 9.03, "eval_loss": 0.5993764996528625, "eval_runtime": 137.9021, "eval_samples_per_second": 11.037, "eval_wer": 0.550721711920864, "step": 2800 }, { "epoch": 10.32, "learning_rate": 0.00020795454545454546, "loss": 0.1881, "step": 3200 }, { "epoch": 10.32, "eval_loss": 0.5736687779426575, "eval_runtime": 138.6384, "eval_samples_per_second": 10.978, "eval_wer": 0.537801554456445, "step": 3200 }, { "epoch": 11.61, "learning_rate": 0.00019431818181818179, "loss": 0.1639, "step": 3600 }, { "epoch": 11.61, "eval_loss": 0.5788838267326355, "eval_runtime": 139.0007, "eval_samples_per_second": 10.95, "eval_wer": 0.5217522963561119, "step": 3600 }, { "epoch": 12.9, "learning_rate": 0.00018068181818181817, "loss": 0.1464, "step": 4000 }, { "epoch": 12.9, "eval_loss": 0.5988554358482361, "eval_runtime": 116.0695, "eval_samples_per_second": 13.113, "eval_wer": 0.5138790754012315, "step": 4000 }, { "epoch": 14.19, "learning_rate": 0.00016704545454545452, "loss": 0.1222, "step": 4400 }, { "epoch": 14.19, "eval_loss": 0.6150318384170532, "eval_runtime": 140.7397, "eval_samples_per_second": 10.814, "eval_wer": 0.5012617341273847, "step": 4400 }, { "epoch": 15.48, "learning_rate": 0.0001534090909090909, "loss": 0.112, "step": 4800 }, { "epoch": 15.48, "eval_loss": 0.6471191644668579, "eval_runtime": 138.3313, "eval_samples_per_second": 11.003, "eval_wer": 0.5148884627031391, "step": 4800 }, { "epoch": 16.77, "learning_rate": 0.00013977272727272726, "loss": 0.105, "step": 5200 }, { "epoch": 16.77, "eval_loss": 0.6167843341827393, "eval_runtime": 116.3058, "eval_samples_per_second": 13.086, "eval_wer": 0.49237912587059657, "step": 5200 }, { "epoch": 18.06, "learning_rate": 0.00012613636363636364, "loss": 0.0936, "step": 5600 }, { "epoch": 18.06, "eval_loss": 0.6400735378265381, "eval_runtime": 138.5841, "eval_samples_per_second": 10.983, "eval_wer": 0.4856162309478147, "step": 5600 }, { "epoch": 19.35, "learning_rate": 0.0001125, "loss": 0.0849, "step": 6000 }, { "epoch": 19.35, "eval_loss": 0.6149299740791321, "eval_runtime": 116.9081, "eval_samples_per_second": 13.019, "eval_wer": 0.47875239729484204, "step": 6000 }, { "epoch": 20.65, "learning_rate": 9.886363636363635e-05, "loss": 0.079, "step": 6400 }, { "epoch": 20.65, "eval_loss": 0.6410804986953735, "eval_runtime": 139.105, "eval_samples_per_second": 10.941, "eval_wer": 0.48067023316846674, "step": 6400 }, { "epoch": 21.94, "learning_rate": 8.522727272727273e-05, "loss": 0.0752, "step": 6800 }, { "epoch": 21.94, "eval_loss": 0.6123934388160706, "eval_runtime": 138.8056, "eval_samples_per_second": 10.965, "eval_wer": 0.4773392550721712, "step": 6800 }, { "epoch": 23.23, "learning_rate": 7.159090909090909e-05, "loss": 0.0688, "step": 7200 }, { "epoch": 23.23, "eval_loss": 0.6324551105499268, "eval_runtime": 137.9116, "eval_samples_per_second": 11.036, "eval_wer": 0.47198950237206017, "step": 7200 }, { "epoch": 24.52, "learning_rate": 5.795454545454545e-05, "loss": 0.0659, "step": 7600 }, { "epoch": 24.52, "eval_loss": 0.6281149387359619, "eval_runtime": 139.9793, "eval_samples_per_second": 10.873, "eval_wer": 0.464923791258706, "step": 7600 }, { "epoch": 25.81, "learning_rate": 4.431818181818182e-05, "loss": 0.0582, "step": 8000 }, { "epoch": 25.81, "eval_loss": 0.6326279640197754, "eval_runtime": 139.8038, "eval_samples_per_second": 10.887, "eval_wer": 0.4604824871303119, "step": 8000 }, { "epoch": 27.1, "learning_rate": 3.068181818181818e-05, "loss": 0.0551, "step": 8400 }, { "epoch": 27.1, "eval_loss": 0.6271815299987793, "eval_runtime": 139.3604, "eval_samples_per_second": 10.921, "eval_wer": 0.46300595538508127, "step": 8400 }, { "epoch": 28.39, "learning_rate": 1.7045454545454543e-05, "loss": 0.0508, "step": 8800 }, { "epoch": 28.39, "eval_loss": 0.6332981586456299, "eval_runtime": 139.0366, "eval_samples_per_second": 10.947, "eval_wer": 0.4594730998284042, "step": 8800 }, { "epoch": 29.68, "learning_rate": 3.4090909090909087e-06, "loss": 0.0517, "step": 9200 }, { "epoch": 29.68, "eval_loss": 0.6157954931259155, "eval_runtime": 140.2207, "eval_samples_per_second": 10.854, "eval_wer": 0.45119612395276065, "step": 9200 } ], "max_steps": 9300, "num_train_epochs": 30, "total_flos": 1.4085584505165812e+19, "trial_name": null, "trial_params": null }