{ "best_metric": null, "best_model_checkpoint": null, "epoch": 50.0, "global_step": 40500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.85, "learning_rate": 4.9800000000000004e-05, "loss": 4.6222, "step": 1500 }, { "epoch": 1.85, "eval_loss": 5.947904586791992, "eval_runtime": 35.3873, "eval_samples_per_second": 21.505, "eval_steps_per_second": 1.356, "eval_wer": 0.5473815461346634, "step": 1500 }, { "epoch": 3.7, "learning_rate": 4.8084615384615386e-05, "loss": 1.1362, "step": 3000 }, { "epoch": 3.7, "eval_loss": 7.979872226715088, "eval_runtime": 34.3508, "eval_samples_per_second": 22.154, "eval_steps_per_second": 1.397, "eval_wer": 0.509440684004275, "step": 3000 }, { "epoch": 5.56, "learning_rate": 4.616153846153846e-05, "loss": 0.7814, "step": 4500 }, { "epoch": 5.56, "eval_loss": 5.032960891723633, "eval_runtime": 33.9113, "eval_samples_per_second": 22.441, "eval_steps_per_second": 1.415, "eval_wer": 0.47239045244032773, "step": 4500 }, { "epoch": 7.41, "learning_rate": 4.423974358974359e-05, "loss": 0.6281, "step": 6000 }, { "epoch": 7.41, "eval_loss": 2.3483684062957764, "eval_runtime": 35.8392, "eval_samples_per_second": 21.234, "eval_steps_per_second": 1.339, "eval_wer": 0.5019593872461703, "step": 6000 }, { "epoch": 9.26, "learning_rate": 4.2316666666666674e-05, "loss": 0.5472, "step": 7500 }, { "epoch": 9.26, "eval_loss": 2.249516487121582, "eval_runtime": 35.6948, "eval_samples_per_second": 21.32, "eval_steps_per_second": 1.345, "eval_wer": 0.47933737085856787, "step": 7500 }, { "epoch": 11.11, "learning_rate": 4.039358974358974e-05, "loss": 0.4827, "step": 9000 }, { "epoch": 11.11, "eval_loss": 1.1529797315597534, "eval_runtime": 35.6048, "eval_samples_per_second": 21.373, "eval_steps_per_second": 1.348, "eval_wer": 0.47684360527253294, "step": 9000 }, { "epoch": 12.96, "learning_rate": 3.847051282051282e-05, "loss": 0.4327, "step": 10500 }, { "epoch": 12.96, "eval_loss": 1.6159653663635254, "eval_runtime": 34.1129, "eval_samples_per_second": 22.308, "eval_steps_per_second": 1.407, "eval_wer": 0.4645529034556466, "step": 10500 }, { "epoch": 14.81, "learning_rate": 3.6548717948717956e-05, "loss": 0.3989, "step": 12000 }, { "epoch": 14.81, "eval_loss": 3.263315439224243, "eval_runtime": 34.7016, "eval_samples_per_second": 21.93, "eval_steps_per_second": 1.383, "eval_wer": 0.47025293908086924, "step": 12000 }, { "epoch": 16.67, "learning_rate": 3.4625641025641024e-05, "loss": 0.3522, "step": 13500 }, { "epoch": 16.67, "eval_loss": 2.2337419986724854, "eval_runtime": 35.6982, "eval_samples_per_second": 21.318, "eval_steps_per_second": 1.345, "eval_wer": 0.4707873174207339, "step": 13500 }, { "epoch": 18.52, "learning_rate": 3.2702564102564105e-05, "loss": 0.3201, "step": 15000 }, { "epoch": 18.52, "eval_loss": 3.6878626346588135, "eval_runtime": 36.4839, "eval_samples_per_second": 20.859, "eval_steps_per_second": 1.316, "eval_wer": 0.45653722835767724, "step": 15000 }, { "epoch": 20.37, "learning_rate": 3.078076923076923e-05, "loss": 0.2899, "step": 16500 }, { "epoch": 20.37, "eval_loss": 5.438948631286621, "eval_runtime": 34.4996, "eval_samples_per_second": 22.058, "eval_steps_per_second": 1.391, "eval_wer": 0.45992162451015317, "step": 16500 }, { "epoch": 22.22, "learning_rate": 2.885897435897436e-05, "loss": 0.2776, "step": 18000 }, { "epoch": 22.22, "eval_loss": 3.528372049331665, "eval_runtime": 35.9097, "eval_samples_per_second": 21.192, "eval_steps_per_second": 1.337, "eval_wer": 0.4536872105450659, "step": 18000 }, { "epoch": 24.07, "learning_rate": 2.6935897435897438e-05, "loss": 0.2574, "step": 19500 }, { "epoch": 24.07, "eval_loss": 2.1759419441223145, "eval_runtime": 34.313, "eval_samples_per_second": 22.178, "eval_steps_per_second": 1.399, "eval_wer": 0.464909155682223, "step": 19500 }, { "epoch": 25.93, "learning_rate": 2.5012820512820513e-05, "loss": 0.2378, "step": 21000 }, { "epoch": 25.93, "eval_loss": 3.390052080154419, "eval_runtime": 34.4471, "eval_samples_per_second": 22.092, "eval_steps_per_second": 1.393, "eval_wer": 0.4447809048806555, "step": 21000 }, { "epoch": 27.78, "learning_rate": 2.3092307692307694e-05, "loss": 0.217, "step": 22500 }, { "epoch": 27.78, "eval_loss": 1.163241982460022, "eval_runtime": 36.0254, "eval_samples_per_second": 21.124, "eval_steps_per_second": 1.332, "eval_wer": 0.45653722835767724, "step": 22500 }, { "epoch": 29.63, "learning_rate": 2.1169230769230768e-05, "loss": 0.2115, "step": 24000 }, { "epoch": 29.63, "eval_loss": 1.7441022396087646, "eval_runtime": 35.1297, "eval_samples_per_second": 21.663, "eval_steps_per_second": 1.366, "eval_wer": 0.42322764517278233, "step": 24000 }, { "epoch": 31.48, "learning_rate": 1.9246153846153846e-05, "loss": 0.1959, "step": 25500 }, { "epoch": 31.48, "eval_loss": 3.4991888999938965, "eval_runtime": 36.7374, "eval_samples_per_second": 20.715, "eval_steps_per_second": 1.307, "eval_wer": 0.4303526897043107, "step": 25500 }, { "epoch": 33.33, "learning_rate": 1.7323076923076924e-05, "loss": 0.187, "step": 27000 }, { "epoch": 33.33, "eval_loss": 3.6162784099578857, "eval_runtime": 34.8093, "eval_samples_per_second": 21.862, "eval_steps_per_second": 1.379, "eval_wer": 0.43694335589597433, "step": 27000 }, { "epoch": 35.19, "learning_rate": 1.540128205128205e-05, "loss": 0.1748, "step": 28500 }, { "epoch": 35.19, "eval_loss": 3.603774309158325, "eval_runtime": 35.9258, "eval_samples_per_second": 21.183, "eval_steps_per_second": 1.336, "eval_wer": 0.4467402921268258, "step": 28500 }, { "epoch": 37.04, "learning_rate": 1.347820512820513e-05, "loss": 0.17, "step": 30000 }, { "epoch": 37.04, "eval_loss": 2.970829486846924, "eval_runtime": 35.2981, "eval_samples_per_second": 21.559, "eval_steps_per_second": 1.36, "eval_wer": 0.43623085144282153, "step": 30000 }, { "epoch": 38.89, "learning_rate": 1.1557692307692308e-05, "loss": 0.159, "step": 31500 }, { "epoch": 38.89, "eval_loss": 3.2044625282287598, "eval_runtime": 34.6143, "eval_samples_per_second": 21.985, "eval_steps_per_second": 1.387, "eval_wer": 0.42785892411827575, "step": 31500 }, { "epoch": 40.74, "learning_rate": 9.635897435897436e-06, "loss": 0.153, "step": 33000 }, { "epoch": 40.74, "eval_loss": 3.2426888942718506, "eval_runtime": 35.08, "eval_samples_per_second": 21.693, "eval_steps_per_second": 1.368, "eval_wer": 0.42874955468471676, "step": 33000 }, { "epoch": 42.59, "learning_rate": 7.712820512820514e-06, "loss": 0.1463, "step": 34500 }, { "epoch": 42.59, "eval_loss": 3.5439305305480957, "eval_runtime": 36.6846, "eval_samples_per_second": 20.744, "eval_steps_per_second": 1.308, "eval_wer": 0.4269682935518347, "step": 34500 }, { "epoch": 44.44, "learning_rate": 5.78974358974359e-06, "loss": 0.139, "step": 36000 }, { "epoch": 44.44, "eval_loss": 3.938081741333008, "eval_runtime": 34.7219, "eval_samples_per_second": 21.917, "eval_steps_per_second": 1.382, "eval_wer": 0.41503384396152476, "step": 36000 }, { "epoch": 46.3, "learning_rate": 3.867948717948718e-06, "loss": 0.1352, "step": 37500 }, { "epoch": 46.3, "eval_loss": 4.174356937408447, "eval_runtime": 36.3496, "eval_samples_per_second": 20.936, "eval_steps_per_second": 1.321, "eval_wer": 0.4091556822230139, "step": 37500 }, { "epoch": 48.15, "learning_rate": 1.9461538461538464e-06, "loss": 0.1369, "step": 39000 }, { "epoch": 48.15, "eval_loss": 4.227924823760986, "eval_runtime": 34.5902, "eval_samples_per_second": 22.0, "eval_steps_per_second": 1.388, "eval_wer": 0.4153900961881012, "step": 39000 }, { "epoch": 50.0, "learning_rate": 2.3076923076923076e-08, "loss": 0.1273, "step": 40500 }, { "epoch": 50.0, "eval_loss": 4.169058322906494, "eval_runtime": 34.2804, "eval_samples_per_second": 22.199, "eval_steps_per_second": 1.4, "eval_wer": 0.4132525828286427, "step": 40500 }, { "epoch": 50.0, "step": 40500, "total_flos": 5.1003805267852526e+20, "train_loss": 0.0, "train_runtime": 69.3888, "train_samples_per_second": 3734.751, "train_steps_per_second": 116.733 } ], "max_steps": 8100, "num_train_epochs": 10, "total_flos": 5.1003805267852526e+20, "trial_name": null, "trial_params": null }