{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "global_step": 8140, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_runtime": 5.9123, "eval_samples_per_second": 84.4, "eval_steps_per_second": 10.656, "step": 407 }, { "epoch": 1.23, "learning_rate": 1.877886977886978e-05, "loss": 0.3917, "step": 500 }, { "epoch": 2.0, "eval_runtime": 5.883, "eval_samples_per_second": 84.821, "eval_steps_per_second": 10.709, "step": 814 }, { "epoch": 2.46, "learning_rate": 1.7552825552825554e-05, "loss": 0.191, "step": 1000 }, { "epoch": 3.0, "eval_runtime": 5.8655, "eval_samples_per_second": 85.073, "eval_steps_per_second": 10.741, "step": 1221 }, { "epoch": 3.69, "learning_rate": 1.6324324324324326e-05, "loss": 0.1213, "step": 1500 }, { "epoch": 4.0, "eval_runtime": 5.8538, "eval_samples_per_second": 85.244, "eval_steps_per_second": 10.762, "step": 1628 }, { "epoch": 4.91, "learning_rate": 1.5095823095823097e-05, "loss": 0.0805, "step": 2000 }, { "epoch": 5.0, "eval_runtime": 5.8461, "eval_samples_per_second": 85.356, "eval_steps_per_second": 10.776, "step": 2035 }, { "epoch": 6.0, "eval_runtime": 5.8536, "eval_samples_per_second": 85.246, "eval_steps_per_second": 10.763, "step": 2442 }, { "epoch": 6.14, "learning_rate": 1.3867321867321867e-05, "loss": 0.0494, "step": 2500 }, { "epoch": 7.0, "eval_runtime": 5.8462, "eval_samples_per_second": 85.354, "eval_steps_per_second": 10.776, "step": 2849 }, { "epoch": 7.37, "learning_rate": 1.2641277641277642e-05, "loss": 0.0343, "step": 3000 }, { "epoch": 8.0, "eval_runtime": 5.8523, "eval_samples_per_second": 85.265, "eval_steps_per_second": 10.765, "step": 3256 }, { "epoch": 8.6, "learning_rate": 1.1412776412776414e-05, "loss": 0.0308, "step": 3500 }, { "epoch": 9.0, "eval_runtime": 5.8555, "eval_samples_per_second": 85.22, "eval_steps_per_second": 10.759, "step": 3663 }, { "epoch": 9.83, "learning_rate": 1.0184275184275186e-05, "loss": 0.0262, "step": 4000 }, { "epoch": 10.0, "eval_runtime": 5.8564, "eval_samples_per_second": 85.207, "eval_steps_per_second": 10.758, "step": 4070 }, { "epoch": 11.0, "eval_runtime": 5.8649, "eval_samples_per_second": 85.082, "eval_steps_per_second": 10.742, "step": 4477 }, { "epoch": 11.06, "learning_rate": 8.955773955773957e-06, "loss": 0.0249, "step": 4500 }, { "epoch": 12.0, "eval_runtime": 5.8497, "eval_samples_per_second": 85.303, "eval_steps_per_second": 10.77, "step": 4884 }, { "epoch": 12.29, "learning_rate": 7.727272727272727e-06, "loss": 0.0122, "step": 5000 }, { "epoch": 13.0, "eval_runtime": 5.8618, "eval_samples_per_second": 85.127, "eval_steps_per_second": 10.748, "step": 5291 }, { "epoch": 13.51, "learning_rate": 6.501228501228501e-06, "loss": 0.0137, "step": 5500 }, { "epoch": 14.0, "eval_runtime": 5.8612, "eval_samples_per_second": 85.136, "eval_steps_per_second": 10.749, "step": 5698 }, { "epoch": 14.74, "learning_rate": 5.272727272727273e-06, "loss": 0.0071, "step": 6000 }, { "epoch": 15.0, "eval_runtime": 5.8722, "eval_samples_per_second": 84.977, "eval_steps_per_second": 10.729, "step": 6105 }, { "epoch": 15.97, "learning_rate": 4.0442260442260445e-06, "loss": 0.0094, "step": 6500 }, { "epoch": 16.0, "eval_runtime": 5.8761, "eval_samples_per_second": 84.92, "eval_steps_per_second": 10.721, "step": 6512 }, { "epoch": 17.0, "eval_runtime": 5.8601, "eval_samples_per_second": 85.153, "eval_steps_per_second": 10.751, "step": 6919 }, { "epoch": 17.2, "learning_rate": 2.8157248157248157e-06, "loss": 0.0094, "step": 7000 }, { "epoch": 18.0, "eval_runtime": 5.8528, "eval_samples_per_second": 85.258, "eval_steps_per_second": 10.764, "step": 7326 }, { "epoch": 18.43, "learning_rate": 1.5872235872235874e-06, "loss": 0.0072, "step": 7500 }, { "epoch": 19.0, "eval_runtime": 5.8548, "eval_samples_per_second": 85.23, "eval_steps_per_second": 10.76, "step": 7733 }, { "epoch": 19.66, "learning_rate": 3.587223587223587e-07, "loss": 0.0065, "step": 8000 }, { "epoch": 20.0, "eval_runtime": 5.8599, "eval_samples_per_second": 85.155, "eval_steps_per_second": 10.751, "step": 8140 } ], "max_steps": 8140, "num_train_epochs": 20, "total_flos": 3.403128959729664e+16, "trial_name": null, "trial_params": null }