{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "eval_steps": 500, "global_step": 8915, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 1.905051827430725, "learning_rate": 4e-05, "loss": 1.3911, "step": 1783 }, { "epoch": 1.0, "eval_gen_len": 1.0, "eval_loss": 0.6610555052757263, "eval_rouge1": 0.4074, "eval_rouge2": 0.0, "eval_rougeL": 0.4062, "eval_rougeLsum": 0.4066, "eval_runtime": 114.1926, "eval_samples_per_second": 6.559, "eval_steps_per_second": 0.21, "step": 1783 }, { "epoch": 2.0, "grad_norm": 1.241516351699829, "learning_rate": 3e-05, "loss": 0.8526, "step": 3566 }, { "epoch": 2.0, "eval_gen_len": 1.0, "eval_loss": 0.6166712045669556, "eval_rouge1": 0.5108, "eval_rouge2": 0.0, "eval_rougeL": 0.515, "eval_rougeLsum": 0.51, "eval_runtime": 113.6094, "eval_samples_per_second": 6.593, "eval_steps_per_second": 0.211, "step": 3566 }, { "epoch": 3.0, "grad_norm": 1.1109647750854492, "learning_rate": 2e-05, "loss": 0.7928, "step": 5349 }, { "epoch": 3.0, "eval_gen_len": 1.0, "eval_loss": 0.5968391299247742, "eval_rouge1": 0.4966, "eval_rouge2": 0.0, "eval_rougeL": 0.5042, "eval_rougeLsum": 0.4969, "eval_runtime": 114.0147, "eval_samples_per_second": 6.569, "eval_steps_per_second": 0.21, "step": 5349 }, { "epoch": 4.0, "grad_norm": 1.4407908916473389, "learning_rate": 1e-05, "loss": 0.7651, "step": 7132 }, { "epoch": 4.0, "eval_gen_len": 1.0, "eval_loss": 0.5860394835472107, "eval_rouge1": 0.5171, "eval_rouge2": 0.0, "eval_rougeL": 0.5228, "eval_rougeLsum": 0.5173, "eval_runtime": 114.1886, "eval_samples_per_second": 6.559, "eval_steps_per_second": 0.21, "step": 7132 }, { "epoch": 5.0, "grad_norm": 1.6015982627868652, "learning_rate": 0.0, "loss": 0.7528, "step": 8915 }, { "epoch": 5.0, "eval_gen_len": 1.0, "eval_loss": 0.5838220119476318, "eval_rouge1": 0.5174, "eval_rouge2": 0.0, "eval_rougeL": 0.523, "eval_rougeLsum": 0.5165, "eval_runtime": 113.9414, "eval_samples_per_second": 6.574, "eval_steps_per_second": 0.211, "step": 8915 }, { "epoch": 5.0, "step": 8915, "total_flos": 7.466029945887744e+16, "train_loss": 0.9108704058293606, "train_runtime": 5154.4687, "train_samples_per_second": 13.836, "train_steps_per_second": 1.73 } ], "logging_steps": 500, "max_steps": 8915, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "total_flos": 7.466029945887744e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }