{ "best_metric": null, "best_model_checkpoint": null, "epoch": 6.0, "eval_steps": 500, "global_step": 2172, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_gen_len": 11.021739130434783, "eval_loss": 5.008267879486084, "eval_rouge1": 29.3556, "eval_rouge2": 23.1164, "eval_rougeL": 26.8076, "eval_rougeLsum": 26.8263, "eval_runtime": 173.025, "eval_samples_per_second": 1.861, "eval_steps_per_second": 0.237, "step": 362 }, { "epoch": 1.38, "learning_rate": 3e-05, "loss": 19.6995, "step": 500 }, { "epoch": 2.0, "eval_gen_len": 10.335403726708075, "eval_loss": 0.8298134207725525, "eval_rouge1": 32.0346, "eval_rouge2": 24.9074, "eval_rougeL": 28.9355, "eval_rougeLsum": 29.0842, "eval_runtime": 150.6955, "eval_samples_per_second": 2.137, "eval_steps_per_second": 0.272, "step": 724 }, { "epoch": 2.76, "learning_rate": 2.5192307692307694e-05, "loss": 0.9076, "step": 1000 }, { "epoch": 3.0, "eval_gen_len": 10.863354037267081, "eval_loss": 0.7763211131095886, "eval_rouge1": 31.1555, "eval_rouge2": 23.6164, "eval_rougeL": 27.9825, "eval_rougeLsum": 28.1607, "eval_runtime": 162.2266, "eval_samples_per_second": 1.985, "eval_steps_per_second": 0.253, "step": 1086 }, { "epoch": 4.0, "eval_gen_len": 12.23913043478261, "eval_loss": 0.7397241592407227, "eval_rouge1": 28.3909, "eval_rouge2": 20.1283, "eval_rougeL": 24.9491, "eval_rougeLsum": 25.0633, "eval_runtime": 160.7559, "eval_samples_per_second": 2.003, "eval_steps_per_second": 0.255, "step": 1448 }, { "epoch": 4.14, "learning_rate": 2.0384615384615387e-05, "loss": 0.8095, "step": 1500 }, { "epoch": 5.0, "eval_gen_len": 11.658385093167702, "eval_loss": 0.7186636328697205, "eval_rouge1": 30.3081, "eval_rouge2": 22.0426, "eval_rougeL": 26.8549, "eval_rougeLsum": 27.0402, "eval_runtime": 161.9975, "eval_samples_per_second": 1.988, "eval_steps_per_second": 0.253, "step": 1810 }, { "epoch": 5.52, "learning_rate": 1.557692307692308e-05, "loss": 0.7683, "step": 2000 }, { "epoch": 6.0, "eval_gen_len": 11.70807453416149, "eval_loss": 0.7056237459182739, "eval_rouge1": 30.3901, "eval_rouge2": 21.9556, "eval_rougeL": 26.8053, "eval_rougeLsum": 27.0162, "eval_runtime": 161.96, "eval_samples_per_second": 1.988, "eval_steps_per_second": 0.253, "step": 2172 } ], "logging_steps": 500, "max_steps": 3620, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 1.164912412852224e+16, "trial_name": null, "trial_params": null }