{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.997824510514866, "global_step": 3440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.29, "learning_rate": 5e-05, "loss": 7.294, "step": 100 }, { "epoch": 0.58, "learning_rate": 0.0001, "loss": 3.2282, "step": 200 }, { "epoch": 0.87, "learning_rate": 0.00015, "loss": 3.0143, "step": 300 }, { "epoch": 1.16, "learning_rate": 0.0002, "loss": 2.9501, "step": 400 }, { "epoch": 1.45, "learning_rate": 0.00025, "loss": 2.6683, "step": 500 }, { "epoch": 1.45, "eval_loss": 1.7697819471359253, "eval_runtime": 250.7581, "eval_samples_per_second": 18.424, "eval_steps_per_second": 4.606, "eval_wer": 1.0040690203756961, "step": 500 }, { "epoch": 1.74, "learning_rate": 0.0003, "loss": 2.1972, "step": 600 }, { "epoch": 2.03, "learning_rate": 0.00035, "loss": 2.0775, "step": 700 }, { "epoch": 2.32, "learning_rate": 0.0004, "loss": 1.9871, "step": 800 }, { "epoch": 2.61, "learning_rate": 0.00045000000000000004, "loss": 1.9768, "step": 900 }, { "epoch": 2.91, "learning_rate": 0.0005, "loss": 1.9548, "step": 1000 }, { "epoch": 2.91, "eval_loss": 1.0889918804168701, "eval_runtime": 243.6531, "eval_samples_per_second": 18.961, "eval_steps_per_second": 4.74, "eval_wer": 0.8601847885945053, "step": 1000 }, { "epoch": 3.2, "learning_rate": 0.00055, "loss": 1.9588, "step": 1100 }, { "epoch": 3.49, "learning_rate": 0.0006, "loss": 1.9734, "step": 1200 }, { "epoch": 3.78, "learning_rate": 0.0006495, "loss": 1.9725, "step": 1300 }, { "epoch": 4.07, "learning_rate": 0.0006995, "loss": 1.9717, "step": 1400 }, { "epoch": 4.36, "learning_rate": 0.0007495000000000001, "loss": 1.9568, "step": 1500 }, { "epoch": 4.36, "eval_loss": 1.0877875089645386, "eval_runtime": 239.5705, "eval_samples_per_second": 19.285, "eval_steps_per_second": 4.821, "eval_wer": 0.868016887964266, "step": 1500 }, { "epoch": 4.65, "learning_rate": 0.0007995, "loss": 1.9528, "step": 1600 }, { "epoch": 4.94, "learning_rate": 0.0008495000000000001, "loss": 1.9862, "step": 1700 }, { "epoch": 5.23, "learning_rate": 0.0008995, "loss": 1.9592, "step": 1800 }, { "epoch": 5.52, "learning_rate": 0.0009495, "loss": 1.938, "step": 1900 }, { "epoch": 5.81, "learning_rate": 0.0009995000000000002, "loss": 1.9497, "step": 2000 }, { "epoch": 5.81, "eval_loss": 1.1500531435012817, "eval_runtime": 242.7035, "eval_samples_per_second": 19.036, "eval_steps_per_second": 4.759, "eval_wer": 0.8837728691182769, "step": 2000 }, { "epoch": 6.1, "learning_rate": 0.00093125, "loss": 1.9326, "step": 2100 }, { "epoch": 6.39, "learning_rate": 0.0008618055555555557, "loss": 1.8927, "step": 2200 }, { "epoch": 6.68, "learning_rate": 0.0007923611111111111, "loss": 1.8929, "step": 2300 }, { "epoch": 6.97, "learning_rate": 0.0007229166666666666, "loss": 1.8684, "step": 2400 }, { "epoch": 7.27, "learning_rate": 0.0006534722222222223, "loss": 1.8453, "step": 2500 }, { "epoch": 7.27, "eval_loss": 1.0452075004577637, "eval_runtime": 247.1245, "eval_samples_per_second": 18.695, "eval_steps_per_second": 4.674, "eval_wer": 0.8417977115584654, "step": 2500 }, { "epoch": 7.56, "learning_rate": 0.0005840277777777778, "loss": 1.8081, "step": 2600 }, { "epoch": 7.85, "learning_rate": 0.0005145833333333333, "loss": 1.78, "step": 2700 }, { "epoch": 8.14, "learning_rate": 0.00044513888888888885, "loss": 1.7618, "step": 2800 }, { "epoch": 8.43, "learning_rate": 0.00037569444444444445, "loss": 1.7155, "step": 2900 }, { "epoch": 8.72, "learning_rate": 0.00030625000000000004, "loss": 1.6952, "step": 3000 }, { "epoch": 8.72, "eval_loss": 0.9152895212173462, "eval_runtime": 242.409, "eval_samples_per_second": 19.059, "eval_steps_per_second": 4.765, "eval_wer": 0.7822615186930184, "step": 3000 }, { "epoch": 9.01, "learning_rate": 0.00023680555555555556, "loss": 1.6874, "step": 3100 }, { "epoch": 9.3, "learning_rate": 0.0001673611111111111, "loss": 1.6143, "step": 3200 }, { "epoch": 9.59, "learning_rate": 9.791666666666667e-05, "loss": 1.5948, "step": 3300 }, { "epoch": 9.88, "learning_rate": 2.9166666666666666e-05, "loss": 1.5723, "step": 3400 }, { "epoch": 10.0, "step": 3440, "total_flos": 1.01153307245466e+19, "train_loss": 2.1586562755496, "train_runtime": 12185.0109, "train_samples_per_second": 9.052, "train_steps_per_second": 0.282 } ], "max_steps": 3440, "num_train_epochs": 10, "total_flos": 1.01153307245466e+19, "trial_name": null, "trial_params": null }