{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.2171581769436997, "eval_steps": 200, "global_step": 1800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.18, "learning_rate": 2.0000000000000003e-06, "loss": 2.9598, "step": 100 }, { "epoch": 0.36, "learning_rate": 4.000000000000001e-06, "loss": 2.3436, "step": 200 }, { "epoch": 0.36, "eval_loss": 1.8791261911392212, "eval_runtime": 1327.8534, "eval_samples_per_second": 0.753, "eval_steps_per_second": 0.047, "eval_wer": 0.8870904221802143, "step": 200 }, { "epoch": 0.54, "learning_rate": 6e-06, "loss": 1.5788, "step": 300 }, { "epoch": 0.71, "learning_rate": 8.000000000000001e-06, "loss": 1.1682, "step": 400 }, { "epoch": 0.71, "eval_loss": 1.0307379961013794, "eval_runtime": 1238.5009, "eval_samples_per_second": 0.807, "eval_steps_per_second": 0.051, "eval_wer": 0.5047652804032766, "step": 400 }, { "epoch": 0.89, "learning_rate": 1e-05, "loss": 0.9354, "step": 500 }, { "epoch": 1.07, "learning_rate": 9.987820251299121e-06, "loss": 0.7321, "step": 600 }, { "epoch": 1.07, "eval_loss": 0.6299881935119629, "eval_runtime": 830.7332, "eval_samples_per_second": 1.204, "eval_steps_per_second": 0.076, "eval_wer": 0.36645400126023947, "step": 600 }, { "epoch": 1.25, "learning_rate": 9.951340343707852e-06, "loss": 0.5397, "step": 700 }, { "epoch": 1.43, "learning_rate": 9.890738003669029e-06, "loss": 0.4564, "step": 800 }, { "epoch": 1.43, "eval_loss": 0.438092440366745, "eval_runtime": 723.2192, "eval_samples_per_second": 1.383, "eval_steps_per_second": 0.087, "eval_wer": 0.35148865784499056, "step": 800 }, { "epoch": 1.61, "learning_rate": 9.806308479691595e-06, "loss": 0.4291, "step": 900 }, { "epoch": 1.79, "learning_rate": 9.698463103929542e-06, "loss": 0.4095, "step": 1000 }, { "epoch": 1.79, "eval_loss": 0.40272918343544006, "eval_runtime": 636.5063, "eval_samples_per_second": 1.571, "eval_steps_per_second": 0.099, "eval_wer": 0.33297889098928796, "step": 1000 }, { "epoch": 1.97, "learning_rate": 9.567727288213005e-06, "loss": 0.3992, "step": 1100 }, { "epoch": 2.14, "learning_rate": 9.414737964294636e-06, "loss": 0.3813, "step": 1200 }, { "epoch": 2.14, "eval_loss": 0.3847169280052185, "eval_runtime": 616.7138, "eval_samples_per_second": 1.621, "eval_steps_per_second": 0.102, "eval_wer": 0.3359719596723377, "step": 1200 }, { "epoch": 2.32, "learning_rate": 9.24024048078213e-06, "loss": 0.3711, "step": 1300 }, { "epoch": 2.5, "learning_rate": 9.045084971874738e-06, "loss": 0.3667, "step": 1400 }, { "epoch": 2.5, "eval_loss": 0.37336310744285583, "eval_runtime": 614.3217, "eval_samples_per_second": 1.628, "eval_steps_per_second": 0.103, "eval_wer": 0.33916194076874606, "step": 1400 }, { "epoch": 2.68, "learning_rate": 8.83022221559489e-06, "loss": 0.3654, "step": 1500 }, { "epoch": 2.86, "learning_rate": 8.596699001693257e-06, "loss": 0.3583, "step": 1600 }, { "epoch": 2.86, "eval_loss": 0.3648846447467804, "eval_runtime": 617.5084, "eval_samples_per_second": 1.619, "eval_steps_per_second": 0.102, "eval_wer": 0.34904694391934465, "step": 1600 }, { "epoch": 3.04, "learning_rate": 8.345653031794292e-06, "loss": 0.353, "step": 1700 }, { "epoch": 3.22, "learning_rate": 8.078307376628292e-06, "loss": 0.3454, "step": 1800 }, { "epoch": 3.22, "eval_loss": 0.35879915952682495, "eval_runtime": 620.7465, "eval_samples_per_second": 1.611, "eval_steps_per_second": 0.101, "eval_wer": 0.35715973534971646, "step": 1800 } ], "logging_steps": 100, "max_steps": 5000, "num_input_tokens_seen": 0, "num_train_epochs": 9, "save_steps": 200, "total_flos": 6.712519290052608e+19, "train_batch_size": 32, "trial_name": null, "trial_params": null }