|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 6.0, |
|
"eval_steps": 500, |
|
"global_step": 2172, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_gen_len": 11.021739130434783, |
|
"eval_loss": 5.008267879486084, |
|
"eval_rouge1": 29.3556, |
|
"eval_rouge2": 23.1164, |
|
"eval_rougeL": 26.8076, |
|
"eval_rougeLsum": 26.8263, |
|
"eval_runtime": 173.025, |
|
"eval_samples_per_second": 1.861, |
|
"eval_steps_per_second": 0.237, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 3e-05, |
|
"loss": 19.6995, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_gen_len": 10.335403726708075, |
|
"eval_loss": 0.8298134207725525, |
|
"eval_rouge1": 32.0346, |
|
"eval_rouge2": 24.9074, |
|
"eval_rougeL": 28.9355, |
|
"eval_rougeLsum": 29.0842, |
|
"eval_runtime": 150.6955, |
|
"eval_samples_per_second": 2.137, |
|
"eval_steps_per_second": 0.272, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 2.5192307692307694e-05, |
|
"loss": 0.9076, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_gen_len": 10.863354037267081, |
|
"eval_loss": 0.7763211131095886, |
|
"eval_rouge1": 31.1555, |
|
"eval_rouge2": 23.6164, |
|
"eval_rougeL": 27.9825, |
|
"eval_rougeLsum": 28.1607, |
|
"eval_runtime": 162.2266, |
|
"eval_samples_per_second": 1.985, |
|
"eval_steps_per_second": 0.253, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_gen_len": 12.23913043478261, |
|
"eval_loss": 0.7397241592407227, |
|
"eval_rouge1": 28.3909, |
|
"eval_rouge2": 20.1283, |
|
"eval_rougeL": 24.9491, |
|
"eval_rougeLsum": 25.0633, |
|
"eval_runtime": 160.7559, |
|
"eval_samples_per_second": 2.003, |
|
"eval_steps_per_second": 0.255, |
|
"step": 1448 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 2.0384615384615387e-05, |
|
"loss": 0.8095, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_gen_len": 11.658385093167702, |
|
"eval_loss": 0.7186636328697205, |
|
"eval_rouge1": 30.3081, |
|
"eval_rouge2": 22.0426, |
|
"eval_rougeL": 26.8549, |
|
"eval_rougeLsum": 27.0402, |
|
"eval_runtime": 161.9975, |
|
"eval_samples_per_second": 1.988, |
|
"eval_steps_per_second": 0.253, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 5.52, |
|
"learning_rate": 1.557692307692308e-05, |
|
"loss": 0.7683, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_gen_len": 11.70807453416149, |
|
"eval_loss": 0.7056237459182739, |
|
"eval_rouge1": 30.3901, |
|
"eval_rouge2": 21.9556, |
|
"eval_rougeL": 26.8053, |
|
"eval_rougeLsum": 27.0162, |
|
"eval_runtime": 161.96, |
|
"eval_samples_per_second": 1.988, |
|
"eval_steps_per_second": 0.253, |
|
"step": 2172 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 3620, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 1.164912412852224e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|