{ "best_metric": 72.3075, "best_model_checkpoint": "bin/indosum-lora-3/checkpoint-2676", "epoch": 5.0, "eval_steps": 500, "global_step": 4460, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 0.5876893401145935, "learning_rate": 0.0008, "loss": 0.8274, "step": 892 }, { "epoch": 1.0, "eval_gen_len": 100.152, "eval_loss": 0.5684335827827454, "eval_rouge1": 69.5991, "eval_rouge2": 62.0811, "eval_rougeL": 66.2103, "eval_rougeLsum": 68.5179, "eval_runtime": 219.0061, "eval_samples_per_second": 3.425, "eval_steps_per_second": 0.11, "step": 892 }, { "epoch": 2.0, "grad_norm": 0.4317479431629181, "learning_rate": 0.0006, "loss": 0.6237, "step": 1784 }, { "epoch": 2.0, "eval_gen_len": 104.78133333333334, "eval_loss": 0.5486577153205872, "eval_rouge1": 71.3379, "eval_rouge2": 64.0461, "eval_rougeL": 68.1399, "eval_rougeLsum": 70.3424, "eval_runtime": 219.5178, "eval_samples_per_second": 3.417, "eval_steps_per_second": 0.109, "step": 1784 }, { "epoch": 3.0, "grad_norm": 0.5349451303482056, "learning_rate": 0.0004, "loss": 0.5824, "step": 2676 }, { "epoch": 3.0, "eval_gen_len": 101.356, "eval_loss": 0.5441932678222656, "eval_rouge1": 72.3075, "eval_rouge2": 65.3703, "eval_rougeL": 69.1638, "eval_rougeLsum": 71.3776, "eval_runtime": 219.0561, "eval_samples_per_second": 3.424, "eval_steps_per_second": 0.11, "step": 2676 }, { "epoch": 4.0, "grad_norm": 0.44907253980636597, "learning_rate": 0.0002, "loss": 0.555, "step": 3568 }, { "epoch": 4.0, "eval_gen_len": 103.64666666666666, "eval_loss": 0.5253363251686096, "eval_rouge1": 72.2139, "eval_rouge2": 65.2916, "eval_rougeL": 69.2278, "eval_rougeLsum": 71.2919, "eval_runtime": 219.9659, "eval_samples_per_second": 3.41, "eval_steps_per_second": 0.109, "step": 3568 }, { "epoch": 5.0, "grad_norm": 0.3915654420852661, "learning_rate": 0.0, "loss": 0.5391, "step": 4460 }, { "epoch": 5.0, "eval_gen_len": 102.11733333333333, "eval_loss": 0.5204364657402039, "eval_rouge1": 72.0194, "eval_rouge2": 64.9755, "eval_rougeL": 68.9244, "eval_rougeLsum": 71.0549, "eval_runtime": 218.95, "eval_samples_per_second": 3.425, "eval_steps_per_second": 0.11, "step": 4460 }, { "epoch": 5.0, "step": 4460, "total_flos": 4.90582346563584e+16, "train_loss": 0.6255005293362879, "train_runtime": 4838.9085, "train_samples_per_second": 14.747, "train_steps_per_second": 0.922 } ], "logging_steps": 500, "max_steps": 4460, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "total_flos": 4.90582346563584e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }