{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.880770180202418, "global_step": 2500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.003949642063687978, "learning_rate": 5.000000000000001e-07, "loss": 25.19717788696289, "step": 1 }, { "epoch": 0.9992594421130585, "eval_gen_len": 16.6, "eval_loss": 13.611336708068848, "eval_rouge1": 9.8775, "eval_rouge2": 2.958, "eval_rougeL": 8.7847, "eval_rougeLsum": 8.871, "eval_runtime": 220.704, "eval_samples_per_second": 4.585, "step": 253 }, { "epoch": 1.9755615897309307, "learning_rate": 4.176954732510288e-05, "loss": 13.314938470691382, "step": 500 }, { "epoch": 1.9992594421130585, "eval_gen_len": 13.0, "eval_loss": 7.852785110473633, "eval_rouge1": 10.5151, "eval_rouge2": 3.2488, "eval_rougeL": 9.5196, "eval_rougeLsum": 9.5659, "eval_runtime": 203.1626, "eval_samples_per_second": 4.981, "step": 506 }, { "epoch": 2.9992594421130585, "eval_gen_len": 5.3, "eval_loss": 5.0542707443237305, "eval_rouge1": 3.3057, "eval_rouge2": 1.0908, "eval_rougeL": 2.9248, "eval_rougeLsum": 2.906, "eval_runtime": 191.681, "eval_samples_per_second": 5.28, "step": 759 }, { "epoch": 3.951863737348803, "learning_rate": 3.148148148148148e-05, "loss": 6.2216669921875, "step": 1000 }, { "epoch": 3.9992594421130585, "eval_gen_len": 14.8, "eval_loss": 4.541400909423828, "eval_rouge1": 15.0004, "eval_rouge2": 5.7011, "eval_rougeL": 13.2283, "eval_rougeLsum": 13.2086, "eval_runtime": 197.5356, "eval_samples_per_second": 5.123, "step": 1012 }, { "epoch": 4.9992594421130585, "eval_gen_len": 16.3, "eval_loss": 4.135443687438965, "eval_rouge1": 17.3311, "eval_rouge2": 6.3922, "eval_rougeL": 15.3676, "eval_rougeLsum": 15.3563, "eval_runtime": 200.4736, "eval_samples_per_second": 5.048, "step": 1265 }, { "epoch": 5.928165884966675, "learning_rate": 2.1193415637860082e-05, "loss": 4.99077294921875, "step": 1500 }, { "epoch": 5.9992594421130585, "eval_gen_len": 17.1, "eval_loss": 3.8587934970855713, "eval_rouge1": 18.2338, "eval_rouge2": 6.8747, "eval_rougeL": 16.1719, "eval_rougeLsum": 16.1563, "eval_runtime": 201.2411, "eval_samples_per_second": 5.029, "step": 1518 }, { "epoch": 6.9992594421130585, "eval_gen_len": 17.4, "eval_loss": 3.6997532844543457, "eval_rouge1": 19.4383, "eval_rouge2": 7.6786, "eval_rougeL": 17.1778, "eval_rougeLsum": 17.1963, "eval_runtime": 200.0236, "eval_samples_per_second": 5.059, "step": 1771 }, { "epoch": 7.904468032584547, "learning_rate": 1.0905349794238684e-05, "loss": 4.552568359375, "step": 2000 }, { "epoch": 7.9992594421130585, "eval_gen_len": 17.6, "eval_loss": 3.6233103275299072, "eval_rouge1": 20.0385, "eval_rouge2": 7.9683, "eval_rougeL": 17.6542, "eval_rougeLsum": 17.6752, "eval_runtime": 201.3719, "eval_samples_per_second": 5.026, "step": 2024 }, { "epoch": 8.999259442113058, "eval_gen_len": 17.7, "eval_loss": 3.5823326110839844, "eval_rouge1": 20.504, "eval_rouge2": 8.2176, "eval_rougeL": 18.0877, "eval_rougeLsum": 18.082, "eval_runtime": 199.3661, "eval_samples_per_second": 5.076, "step": 2277 }, { "epoch": 9.880770180202418, "learning_rate": 6.17283950617284e-07, "loss": 4.4207666015625, "step": 2500 } ], "max_steps": 2530, "num_train_epochs": 10, "total_flos": 63945902037307392, "trial_name": null, "trial_params": null }