|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 29.677419354838708, |
|
"global_step": 9200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.00023999999999999998, |
|
"loss": 7.3425, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"eval_loss": 3.277569532394409, |
|
"eval_runtime": 138.4344, |
|
"eval_samples_per_second": 10.994, |
|
"eval_wer": 1.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 0.0002897727272727273, |
|
"loss": 2.6649, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"eval_loss": 1.1241918802261353, |
|
"eval_runtime": 115.4749, |
|
"eval_samples_per_second": 13.18, |
|
"eval_wer": 0.8526294539214697, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 0.0002761363636363636, |
|
"loss": 0.7589, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"eval_loss": 0.69745272397995, |
|
"eval_runtime": 137.7134, |
|
"eval_samples_per_second": 11.052, |
|
"eval_wer": 0.6960734833955788, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 5.16, |
|
"learning_rate": 0.0002625, |
|
"loss": 0.476, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 5.16, |
|
"eval_loss": 0.6482455730438232, |
|
"eval_runtime": 115.5054, |
|
"eval_samples_per_second": 13.177, |
|
"eval_wer": 0.6553951751286968, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"learning_rate": 0.00024886363636363637, |
|
"loss": 0.3479, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"eval_loss": 0.6039410829544067, |
|
"eval_runtime": 115.7857, |
|
"eval_samples_per_second": 13.145, |
|
"eval_wer": 0.5894821843141214, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"learning_rate": 0.0002352272727272727, |
|
"loss": 0.2783, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"eval_loss": 0.6183858513832092, |
|
"eval_runtime": 115.9363, |
|
"eval_samples_per_second": 13.128, |
|
"eval_wer": 0.5855455738366812, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 0.00022159090909090908, |
|
"loss": 0.2204, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"eval_loss": 0.5993764996528625, |
|
"eval_runtime": 137.9021, |
|
"eval_samples_per_second": 11.037, |
|
"eval_wer": 0.550721711920864, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 10.32, |
|
"learning_rate": 0.00020795454545454546, |
|
"loss": 0.1881, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 10.32, |
|
"eval_loss": 0.5736687779426575, |
|
"eval_runtime": 138.6384, |
|
"eval_samples_per_second": 10.978, |
|
"eval_wer": 0.537801554456445, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 11.61, |
|
"learning_rate": 0.00019431818181818179, |
|
"loss": 0.1639, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 11.61, |
|
"eval_loss": 0.5788838267326355, |
|
"eval_runtime": 139.0007, |
|
"eval_samples_per_second": 10.95, |
|
"eval_wer": 0.5217522963561119, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 12.9, |
|
"learning_rate": 0.00018068181818181817, |
|
"loss": 0.1464, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 12.9, |
|
"eval_loss": 0.5988554358482361, |
|
"eval_runtime": 116.0695, |
|
"eval_samples_per_second": 13.113, |
|
"eval_wer": 0.5138790754012315, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 14.19, |
|
"learning_rate": 0.00016704545454545452, |
|
"loss": 0.1222, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 14.19, |
|
"eval_loss": 0.6150318384170532, |
|
"eval_runtime": 140.7397, |
|
"eval_samples_per_second": 10.814, |
|
"eval_wer": 0.5012617341273847, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 15.48, |
|
"learning_rate": 0.0001534090909090909, |
|
"loss": 0.112, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 15.48, |
|
"eval_loss": 0.6471191644668579, |
|
"eval_runtime": 138.3313, |
|
"eval_samples_per_second": 11.003, |
|
"eval_wer": 0.5148884627031391, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 16.77, |
|
"learning_rate": 0.00013977272727272726, |
|
"loss": 0.105, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 16.77, |
|
"eval_loss": 0.6167843341827393, |
|
"eval_runtime": 116.3058, |
|
"eval_samples_per_second": 13.086, |
|
"eval_wer": 0.49237912587059657, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 18.06, |
|
"learning_rate": 0.00012613636363636364, |
|
"loss": 0.0936, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 18.06, |
|
"eval_loss": 0.6400735378265381, |
|
"eval_runtime": 138.5841, |
|
"eval_samples_per_second": 10.983, |
|
"eval_wer": 0.4856162309478147, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 19.35, |
|
"learning_rate": 0.0001125, |
|
"loss": 0.0849, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 19.35, |
|
"eval_loss": 0.6149299740791321, |
|
"eval_runtime": 116.9081, |
|
"eval_samples_per_second": 13.019, |
|
"eval_wer": 0.47875239729484204, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 20.65, |
|
"learning_rate": 9.886363636363635e-05, |
|
"loss": 0.079, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 20.65, |
|
"eval_loss": 0.6410804986953735, |
|
"eval_runtime": 139.105, |
|
"eval_samples_per_second": 10.941, |
|
"eval_wer": 0.48067023316846674, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 21.94, |
|
"learning_rate": 8.522727272727273e-05, |
|
"loss": 0.0752, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 21.94, |
|
"eval_loss": 0.6123934388160706, |
|
"eval_runtime": 138.8056, |
|
"eval_samples_per_second": 10.965, |
|
"eval_wer": 0.4773392550721712, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 23.23, |
|
"learning_rate": 7.159090909090909e-05, |
|
"loss": 0.0688, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 23.23, |
|
"eval_loss": 0.6324551105499268, |
|
"eval_runtime": 137.9116, |
|
"eval_samples_per_second": 11.036, |
|
"eval_wer": 0.47198950237206017, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 24.52, |
|
"learning_rate": 5.795454545454545e-05, |
|
"loss": 0.0659, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 24.52, |
|
"eval_loss": 0.6281149387359619, |
|
"eval_runtime": 139.9793, |
|
"eval_samples_per_second": 10.873, |
|
"eval_wer": 0.464923791258706, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 25.81, |
|
"learning_rate": 4.431818181818182e-05, |
|
"loss": 0.0582, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 25.81, |
|
"eval_loss": 0.6326279640197754, |
|
"eval_runtime": 139.8038, |
|
"eval_samples_per_second": 10.887, |
|
"eval_wer": 0.4604824871303119, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 27.1, |
|
"learning_rate": 3.068181818181818e-05, |
|
"loss": 0.0551, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 27.1, |
|
"eval_loss": 0.6271815299987793, |
|
"eval_runtime": 139.3604, |
|
"eval_samples_per_second": 10.921, |
|
"eval_wer": 0.46300595538508127, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 28.39, |
|
"learning_rate": 1.7045454545454543e-05, |
|
"loss": 0.0508, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 28.39, |
|
"eval_loss": 0.6332981586456299, |
|
"eval_runtime": 139.0366, |
|
"eval_samples_per_second": 10.947, |
|
"eval_wer": 0.4594730998284042, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 29.68, |
|
"learning_rate": 3.4090909090909087e-06, |
|
"loss": 0.0517, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 29.68, |
|
"eval_loss": 0.6157954931259155, |
|
"eval_runtime": 140.2207, |
|
"eval_samples_per_second": 10.854, |
|
"eval_wer": 0.45119612395276065, |
|
"step": 9200 |
|
} |
|
], |
|
"max_steps": 9300, |
|
"num_train_epochs": 30, |
|
"total_flos": 1.4085584505165812e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|