{ "best_metric": 71.4437, "best_model_checkpoint": "bin/indosum-base-3/checkpoint-4460", "epoch": 5.0, "eval_steps": 500, "global_step": 4460, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 1.0639431476593018, "learning_rate": 0.0008, "loss": 1.1937, "step": 892 }, { "epoch": 1.0, "eval_gen_len": 90.984, "eval_loss": 0.8369650840759277, "eval_rouge1": 64.7751, "eval_rouge2": 56.7545, "eval_rougeL": 61.4956, "eval_rougeLsum": 63.81, "eval_runtime": 190.626, "eval_samples_per_second": 3.934, "eval_steps_per_second": 0.126, "step": 892 }, { "epoch": 2.0, "grad_norm": 0.4769066572189331, "learning_rate": 0.0006, "loss": 0.6828, "step": 1784 }, { "epoch": 2.0, "eval_gen_len": 101.11733333333333, "eval_loss": 0.6911363005638123, "eval_rouge1": 69.9628, "eval_rouge2": 62.6338, "eval_rougeL": 66.8253, "eval_rougeLsum": 69.0763, "eval_runtime": 191.8081, "eval_samples_per_second": 3.91, "eval_steps_per_second": 0.125, "step": 1784 }, { "epoch": 3.0, "grad_norm": 0.5286809802055359, "learning_rate": 0.0004, "loss": 0.4847, "step": 2676 }, { "epoch": 3.0, "eval_gen_len": 95.61333333333333, "eval_loss": 0.6692180037498474, "eval_rouge1": 69.9807, "eval_rouge2": 62.5614, "eval_rougeL": 66.7619, "eval_rougeLsum": 69.0683, "eval_runtime": 188.7212, "eval_samples_per_second": 3.974, "eval_steps_per_second": 0.127, "step": 2676 }, { "epoch": 4.0, "grad_norm": 0.3024337887763977, "learning_rate": 0.0002, "loss": 0.3348, "step": 3568 }, { "epoch": 4.0, "eval_gen_len": 100.15466666666667, "eval_loss": 0.7028738856315613, "eval_rouge1": 70.9247, "eval_rouge2": 63.6191, "eval_rougeL": 67.7749, "eval_rougeLsum": 70.0079, "eval_runtime": 191.1263, "eval_samples_per_second": 3.924, "eval_steps_per_second": 0.126, "step": 3568 }, { "epoch": 5.0, "grad_norm": 0.5054429173469543, "learning_rate": 0.0, "loss": 0.1988, "step": 4460 }, { "epoch": 5.0, "eval_gen_len": 98.66666666666667, "eval_loss": 0.7689540982246399, "eval_rouge1": 71.4437, "eval_rouge2": 64.1873, "eval_rougeL": 68.2379, "eval_rougeLsum": 70.5264, "eval_runtime": 192.0358, "eval_samples_per_second": 3.906, "eval_steps_per_second": 0.125, "step": 4460 }, { "epoch": 5.0, "step": 4460, "total_flos": 4.886428467068928e+16, "train_loss": 0.5789496143837146, "train_runtime": 5011.6095, "train_samples_per_second": 14.239, "train_steps_per_second": 0.89 } ], "logging_steps": 500, "max_steps": 4460, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "total_flos": 4.886428467068928e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }