{ "best_metric": null, "best_model_checkpoint": null, "epoch": 8.579088471849866, "eval_steps": 200, "global_step": 4800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.18, "learning_rate": 2.0000000000000003e-06, "loss": 2.9598, "step": 100 }, { "epoch": 0.36, "learning_rate": 4.000000000000001e-06, "loss": 2.3436, "step": 200 }, { "epoch": 0.36, "eval_loss": 1.8791261911392212, "eval_runtime": 1327.8534, "eval_samples_per_second": 0.753, "eval_steps_per_second": 0.047, "eval_wer": 0.8870904221802143, "step": 200 }, { "epoch": 0.54, "learning_rate": 6e-06, "loss": 1.5788, "step": 300 }, { "epoch": 0.71, "learning_rate": 8.000000000000001e-06, "loss": 1.1682, "step": 400 }, { "epoch": 0.71, "eval_loss": 1.0307379961013794, "eval_runtime": 1238.5009, "eval_samples_per_second": 0.807, "eval_steps_per_second": 0.051, "eval_wer": 0.5047652804032766, "step": 400 }, { "epoch": 0.89, "learning_rate": 1e-05, "loss": 0.9354, "step": 500 }, { "epoch": 1.07, "learning_rate": 9.987820251299121e-06, "loss": 0.7321, "step": 600 }, { "epoch": 1.07, "eval_loss": 0.6299881935119629, "eval_runtime": 830.7332, "eval_samples_per_second": 1.204, "eval_steps_per_second": 0.076, "eval_wer": 0.36645400126023947, "step": 600 }, { "epoch": 1.25, "learning_rate": 9.951340343707852e-06, "loss": 0.5397, "step": 700 }, { "epoch": 1.43, "learning_rate": 9.890738003669029e-06, "loss": 0.4564, "step": 800 }, { "epoch": 1.43, "eval_loss": 0.438092440366745, "eval_runtime": 723.2192, "eval_samples_per_second": 1.383, "eval_steps_per_second": 0.087, "eval_wer": 0.35148865784499056, "step": 800 }, { "epoch": 1.61, "learning_rate": 9.806308479691595e-06, "loss": 0.4291, "step": 900 }, { "epoch": 1.79, "learning_rate": 9.698463103929542e-06, "loss": 0.4095, "step": 1000 }, { "epoch": 1.79, "eval_loss": 0.40272918343544006, "eval_runtime": 636.5063, "eval_samples_per_second": 1.571, "eval_steps_per_second": 0.099, "eval_wer": 0.33297889098928796, "step": 1000 }, { "epoch": 1.97, "learning_rate": 9.567727288213005e-06, "loss": 0.3992, "step": 1100 }, { "epoch": 2.14, "learning_rate": 9.414737964294636e-06, "loss": 0.3813, "step": 1200 }, { "epoch": 2.14, "eval_loss": 0.3847169280052185, "eval_runtime": 616.7138, "eval_samples_per_second": 1.621, "eval_steps_per_second": 0.102, "eval_wer": 0.3359719596723377, "step": 1200 }, { "epoch": 2.32, "learning_rate": 9.24024048078213e-06, "loss": 0.3711, "step": 1300 }, { "epoch": 2.5, "learning_rate": 9.045084971874738e-06, "loss": 0.3667, "step": 1400 }, { "epoch": 2.5, "eval_loss": 0.37336310744285583, "eval_runtime": 614.3217, "eval_samples_per_second": 1.628, "eval_steps_per_second": 0.103, "eval_wer": 0.33916194076874606, "step": 1400 }, { "epoch": 2.68, "learning_rate": 8.83022221559489e-06, "loss": 0.3654, "step": 1500 }, { "epoch": 2.86, "learning_rate": 8.596699001693257e-06, "loss": 0.3583, "step": 1600 }, { "epoch": 2.86, "eval_loss": 0.3648846447467804, "eval_runtime": 617.5084, "eval_samples_per_second": 1.619, "eval_steps_per_second": 0.102, "eval_wer": 0.34904694391934465, "step": 1600 }, { "epoch": 3.04, "learning_rate": 8.345653031794292e-06, "loss": 0.353, "step": 1700 }, { "epoch": 3.22, "learning_rate": 8.078307376628292e-06, "loss": 0.3454, "step": 1800 }, { "epoch": 3.22, "eval_loss": 0.35879915952682495, "eval_runtime": 620.7465, "eval_samples_per_second": 1.611, "eval_steps_per_second": 0.101, "eval_wer": 0.35715973534971646, "step": 1800 }, { "epoch": 3.4, "learning_rate": 7.795964517353734e-06, "loss": 0.3399, "step": 1900 }, { "epoch": 3.57, "learning_rate": 7.500000000000001e-06, "loss": 0.3422, "step": 2000 }, { "epoch": 3.57, "eval_loss": 0.3536700904369354, "eval_runtime": 625.9316, "eval_samples_per_second": 1.598, "eval_steps_per_second": 0.101, "eval_wer": 0.3704710144927536, "step": 2000 }, { "epoch": 3.75, "learning_rate": 7.191855733945388e-06, "loss": 0.3355, "step": 2100 }, { "epoch": 3.93, "learning_rate": 6.873032967079562e-06, "loss": 0.3371, "step": 2200 }, { "epoch": 3.93, "eval_loss": 0.3503468930721283, "eval_runtime": 634.2679, "eval_samples_per_second": 1.577, "eval_steps_per_second": 0.099, "eval_wer": 0.3811436672967864, "step": 2200 }, { "epoch": 4.11, "learning_rate": 6.545084971874738e-06, "loss": 0.3311, "step": 2300 }, { "epoch": 4.29, "learning_rate": 6.209609477998339e-06, "loss": 0.3291, "step": 2400 }, { "epoch": 4.29, "eval_loss": 0.347513347864151, "eval_runtime": 615.3576, "eval_samples_per_second": 1.625, "eval_steps_per_second": 0.102, "eval_wer": 0.3677930056710775, "step": 2400 }, { "epoch": 4.47, "learning_rate": 5.8682408883346535e-06, "loss": 0.3273, "step": 2500 }, { "epoch": 4.65, "learning_rate": 5.522642316338268e-06, "loss": 0.324, "step": 2600 }, { "epoch": 4.65, "eval_loss": 0.3451197147369385, "eval_runtime": 628.249, "eval_samples_per_second": 1.592, "eval_steps_per_second": 0.1, "eval_wer": 0.36700535601764334, "step": 2600 }, { "epoch": 4.83, "learning_rate": 5.174497483512506e-06, "loss": 0.3219, "step": 2700 }, { "epoch": 5.0, "learning_rate": 4.825502516487497e-06, "loss": 0.3262, "step": 2800 }, { "epoch": 5.0, "eval_loss": 0.34306031465530396, "eval_runtime": 613.0254, "eval_samples_per_second": 1.631, "eval_steps_per_second": 0.103, "eval_wer": 0.3710223692501575, "step": 2800 }, { "epoch": 5.18, "learning_rate": 4.477357683661734e-06, "loss": 0.3203, "step": 2900 }, { "epoch": 5.36, "learning_rate": 4.131759111665349e-06, "loss": 0.3168, "step": 3000 }, { "epoch": 5.36, "eval_loss": 0.34187400341033936, "eval_runtime": 628.864, "eval_samples_per_second": 1.59, "eval_steps_per_second": 0.1, "eval_wer": 0.3847274732199118, "step": 3000 }, { "epoch": 5.54, "learning_rate": 3.790390522001662e-06, "loss": 0.3144, "step": 3100 }, { "epoch": 5.72, "learning_rate": 3.4549150281252635e-06, "loss": 0.3178, "step": 3200 }, { "epoch": 5.72, "eval_loss": 0.34061843156814575, "eval_runtime": 627.3787, "eval_samples_per_second": 1.594, "eval_steps_per_second": 0.1, "eval_wer": 0.3832703213610586, "step": 3200 }, { "epoch": 5.9, "learning_rate": 3.12696703292044e-06, "loss": 0.3127, "step": 3300 }, { "epoch": 6.08, "learning_rate": 2.8081442660546126e-06, "loss": 0.3136, "step": 3400 }, { "epoch": 6.08, "eval_loss": 0.34004053473472595, "eval_runtime": 597.4949, "eval_samples_per_second": 1.674, "eval_steps_per_second": 0.105, "eval_wer": 0.3853182104599874, "step": 3400 }, { "epoch": 6.26, "learning_rate": 2.5000000000000015e-06, "loss": 0.3126, "step": 3500 }, { "epoch": 6.43, "learning_rate": 2.204035482646267e-06, "loss": 0.3092, "step": 3600 }, { "epoch": 6.43, "eval_loss": 0.3392544984817505, "eval_runtime": 603.3789, "eval_samples_per_second": 1.657, "eval_steps_per_second": 0.104, "eval_wer": 0.38961090107120355, "step": 3600 }, { "epoch": 6.61, "learning_rate": 1.9216926233717087e-06, "loss": 0.3135, "step": 3700 }, { "epoch": 6.79, "learning_rate": 1.6543469682057105e-06, "loss": 0.3106, "step": 3800 }, { "epoch": 6.79, "eval_loss": 0.33891087770462036, "eval_runtime": 587.9522, "eval_samples_per_second": 1.701, "eval_steps_per_second": 0.107, "eval_wer": 0.3900047258979206, "step": 3800 }, { "epoch": 6.97, "learning_rate": 1.4033009983067454e-06, "loss": 0.3094, "step": 3900 }, { "epoch": 7.15, "learning_rate": 1.1697777844051105e-06, "loss": 0.3057, "step": 4000 }, { "epoch": 7.15, "eval_loss": 0.33877310156822205, "eval_runtime": 590.6741, "eval_samples_per_second": 1.693, "eval_steps_per_second": 0.107, "eval_wer": 0.38031663516068054, "step": 4000 }, { "epoch": 7.33, "learning_rate": 9.549150281252633e-07, "loss": 0.3079, "step": 4100 }, { "epoch": 7.51, "learning_rate": 7.597595192178702e-07, "loss": 0.3087, "step": 4200 }, { "epoch": 7.51, "eval_loss": 0.33828216791152954, "eval_runtime": 586.9517, "eval_samples_per_second": 1.704, "eval_steps_per_second": 0.107, "eval_wer": 0.39406112161310647, "step": 4200 }, { "epoch": 7.69, "learning_rate": 5.852620357053651e-07, "loss": 0.3082, "step": 4300 }, { "epoch": 7.86, "learning_rate": 4.322727117869951e-07, "loss": 0.308, "step": 4400 }, { "epoch": 7.86, "eval_loss": 0.3381615877151489, "eval_runtime": 590.3123, "eval_samples_per_second": 1.694, "eval_steps_per_second": 0.107, "eval_wer": 0.3873660995589162, "step": 4400 }, { "epoch": 8.04, "learning_rate": 3.015368960704584e-07, "loss": 0.3091, "step": 4500 }, { "epoch": 8.22, "learning_rate": 1.9369152030840553e-07, "loss": 0.3036, "step": 4600 }, { "epoch": 8.22, "eval_loss": 0.33812272548675537, "eval_runtime": 590.6229, "eval_samples_per_second": 1.693, "eval_steps_per_second": 0.107, "eval_wer": 0.38961090107120355, "step": 4600 }, { "epoch": 8.4, "learning_rate": 1.0926199633097156e-07, "loss": 0.3049, "step": 4700 }, { "epoch": 8.58, "learning_rate": 4.865965629214819e-08, "loss": 0.3087, "step": 4800 }, { "epoch": 8.58, "eval_loss": 0.338046669960022, "eval_runtime": 593.4841, "eval_samples_per_second": 1.685, "eval_steps_per_second": 0.106, "eval_wer": 0.3909892879647133, "step": 4800 } ], "logging_steps": 100, "max_steps": 5000, "num_input_tokens_seen": 0, "num_train_epochs": 9, "save_steps": 200, "total_flos": 1.7900051440140288e+20, "train_batch_size": 32, "trial_name": null, "trial_params": null }