{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.4367816091954024, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07, "learning_rate": 2.7599999999999997e-05, "loss": 10.9066, "step": 50 }, { "epoch": 0.14, "learning_rate": 5.6399999999999995e-05, "loss": 7.3263, "step": 100 }, { "epoch": 0.14, "eval_cer": 0.9998702550415184, "eval_loss": 5.000309467315674, "eval_runtime": 31.7393, "eval_samples_per_second": 14.399, "eval_steps_per_second": 0.914, "eval_wer": 1.0, "step": 100 }, { "epoch": 0.22, "learning_rate": 8.639999999999999e-05, "loss": 3.4967, "step": 150 }, { "epoch": 0.29, "learning_rate": 0.0001164, "loss": 3.1464, "step": 200 }, { "epoch": 0.29, "eval_cer": 0.9998702550415184, "eval_loss": 3.5314910411834717, "eval_runtime": 31.3912, "eval_samples_per_second": 14.558, "eval_steps_per_second": 0.924, "eval_wer": 1.0, "step": 200 }, { "epoch": 0.36, "learning_rate": 0.00014639999999999998, "loss": 3.1238, "step": 250 }, { "epoch": 0.43, "learning_rate": 0.00017639999999999998, "loss": 3.1152, "step": 300 }, { "epoch": 0.43, "eval_cer": 0.9998702550415184, "eval_loss": 3.6040682792663574, "eval_runtime": 31.267, "eval_samples_per_second": 14.616, "eval_steps_per_second": 0.927, "eval_wer": 1.0, "step": 300 }, { "epoch": 0.5, "learning_rate": 0.00020639999999999998, "loss": 3.088, "step": 350 }, { "epoch": 0.57, "learning_rate": 0.0002364, "loss": 3.0626, "step": 400 }, { "epoch": 0.57, "eval_cer": 0.9743290332147093, "eval_loss": 3.385751247406006, "eval_runtime": 31.1188, "eval_samples_per_second": 14.686, "eval_steps_per_second": 0.932, "eval_wer": 0.9992471013401596, "step": 400 }, { "epoch": 0.65, "learning_rate": 0.00026639999999999997, "loss": 3.032, "step": 450 }, { "epoch": 0.72, "learning_rate": 0.0002964, "loss": 2.3387, "step": 500 }, { "epoch": 0.72, "eval_cer": 0.3497553380782918, "eval_loss": 1.502172589302063, "eval_runtime": 31.3947, "eval_samples_per_second": 14.557, "eval_steps_per_second": 0.924, "eval_wer": 1.0904984189128144, "step": 500 }, { "epoch": 0.79, "learning_rate": 0.00029168765743073046, "loss": 1.1185, "step": 550 }, { "epoch": 0.86, "learning_rate": 0.0002822418136020151, "loss": 0.7737, "step": 600 }, { "epoch": 0.86, "eval_cer": 0.21517274614472123, "eval_loss": 0.6938613057136536, "eval_runtime": 31.1722, "eval_samples_per_second": 14.661, "eval_steps_per_second": 0.93, "eval_wer": 0.872910706218943, "step": 600 }, { "epoch": 0.93, "learning_rate": 0.00027279596977329974, "loss": 0.647, "step": 650 }, { "epoch": 1.01, "learning_rate": 0.00026335012594458433, "loss": 0.5643, "step": 700 }, { "epoch": 1.01, "eval_cer": 0.15467452550415184, "eval_loss": 0.4780799448490143, "eval_runtime": 30.8612, "eval_samples_per_second": 14.808, "eval_steps_per_second": 0.94, "eval_wer": 0.7375395271796417, "step": 700 }, { "epoch": 1.08, "learning_rate": 0.00025390428211586897, "loss": 0.5188, "step": 750 }, { "epoch": 1.15, "learning_rate": 0.0002444584382871536, "loss": 0.4762, "step": 800 }, { "epoch": 1.15, "eval_cer": 0.1396055753262159, "eval_loss": 0.41990911960601807, "eval_runtime": 31.306, "eval_samples_per_second": 14.598, "eval_steps_per_second": 0.926, "eval_wer": 0.6905586508056015, "step": 800 }, { "epoch": 1.22, "learning_rate": 0.00023501259445843828, "loss": 0.4614, "step": 850 }, { "epoch": 1.29, "learning_rate": 0.0002255667506297229, "loss": 0.4371, "step": 900 }, { "epoch": 1.29, "eval_cer": 0.12713152431791222, "eval_loss": 0.382140189409256, "eval_runtime": 31.4276, "eval_samples_per_second": 14.541, "eval_steps_per_second": 0.923, "eval_wer": 0.6390603824725192, "step": 900 }, { "epoch": 1.36, "learning_rate": 0.00021612090680100753, "loss": 0.4259, "step": 950 }, { "epoch": 1.44, "learning_rate": 0.00020667506297229217, "loss": 0.4138, "step": 1000 }, { "epoch": 1.44, "eval_cer": 0.12171930604982206, "eval_loss": 0.36281564831733704, "eval_runtime": 31.2629, "eval_samples_per_second": 14.618, "eval_steps_per_second": 0.928, "eval_wer": 0.6143653064297545, "step": 1000 } ], "max_steps": 2088, "num_train_epochs": 3, "total_flos": 8.198176189032398e+18, "trial_name": null, "trial_params": null }