|
{ |
|
"best_metric": 71.4437, |
|
"best_model_checkpoint": "bin/indosum-base-3/checkpoint-4460", |
|
"epoch": 5.0, |
|
"eval_steps": 500, |
|
"global_step": 4460, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.0639431476593018, |
|
"learning_rate": 0.0008, |
|
"loss": 1.1937, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_gen_len": 90.984, |
|
"eval_loss": 0.8369650840759277, |
|
"eval_rouge1": 64.7751, |
|
"eval_rouge2": 56.7545, |
|
"eval_rougeL": 61.4956, |
|
"eval_rougeLsum": 63.81, |
|
"eval_runtime": 190.626, |
|
"eval_samples_per_second": 3.934, |
|
"eval_steps_per_second": 0.126, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 0.4769066572189331, |
|
"learning_rate": 0.0006, |
|
"loss": 0.6828, |
|
"step": 1784 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_gen_len": 101.11733333333333, |
|
"eval_loss": 0.6911363005638123, |
|
"eval_rouge1": 69.9628, |
|
"eval_rouge2": 62.6338, |
|
"eval_rougeL": 66.8253, |
|
"eval_rougeLsum": 69.0763, |
|
"eval_runtime": 191.8081, |
|
"eval_samples_per_second": 3.91, |
|
"eval_steps_per_second": 0.125, |
|
"step": 1784 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 0.5286809802055359, |
|
"learning_rate": 0.0004, |
|
"loss": 0.4847, |
|
"step": 2676 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_gen_len": 95.61333333333333, |
|
"eval_loss": 0.6692180037498474, |
|
"eval_rouge1": 69.9807, |
|
"eval_rouge2": 62.5614, |
|
"eval_rougeL": 66.7619, |
|
"eval_rougeLsum": 69.0683, |
|
"eval_runtime": 188.7212, |
|
"eval_samples_per_second": 3.974, |
|
"eval_steps_per_second": 0.127, |
|
"step": 2676 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.3024337887763977, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3348, |
|
"step": 3568 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_gen_len": 100.15466666666667, |
|
"eval_loss": 0.7028738856315613, |
|
"eval_rouge1": 70.9247, |
|
"eval_rouge2": 63.6191, |
|
"eval_rougeL": 67.7749, |
|
"eval_rougeLsum": 70.0079, |
|
"eval_runtime": 191.1263, |
|
"eval_samples_per_second": 3.924, |
|
"eval_steps_per_second": 0.126, |
|
"step": 3568 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 0.5054429173469543, |
|
"learning_rate": 0.0, |
|
"loss": 0.1988, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_gen_len": 98.66666666666667, |
|
"eval_loss": 0.7689540982246399, |
|
"eval_rouge1": 71.4437, |
|
"eval_rouge2": 64.1873, |
|
"eval_rougeL": 68.2379, |
|
"eval_rougeLsum": 70.5264, |
|
"eval_runtime": 192.0358, |
|
"eval_samples_per_second": 3.906, |
|
"eval_steps_per_second": 0.125, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 4460, |
|
"total_flos": 4.886428467068928e+16, |
|
"train_loss": 0.5789496143837146, |
|
"train_runtime": 5011.6095, |
|
"train_samples_per_second": 14.239, |
|
"train_steps_per_second": 0.89 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 4460, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"total_flos": 4.886428467068928e+16, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|