|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 6.527415143603133, |
|
"global_step": 2500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_gen_len": 81.7789, |
|
"eval_loss": 0.7409296035766602, |
|
"eval_rouge-1": 52.5771, |
|
"eval_rouge-2": 35.6684, |
|
"eval_rouge-l": 49.1637, |
|
"eval_runtime": 402.5304, |
|
"eval_samples_per_second": 0.236, |
|
"eval_steps_per_second": 0.119, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 1.6307348004475943e-05, |
|
"loss": 0.9884, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_gen_len": 82.1263, |
|
"eval_loss": 0.7081394195556641, |
|
"eval_rouge-1": 53.3979, |
|
"eval_rouge-2": 36.4405, |
|
"eval_rouge-l": 49.9086, |
|
"eval_runtime": 389.2507, |
|
"eval_samples_per_second": 0.244, |
|
"eval_steps_per_second": 0.123, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 1.2584856396866842e-05, |
|
"loss": 0.7265, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_gen_len": 83.6211, |
|
"eval_loss": 0.6961584091186523, |
|
"eval_rouge-1": 53.1336, |
|
"eval_rouge-2": 36.0563, |
|
"eval_rouge-l": 49.7218, |
|
"eval_runtime": 395.7734, |
|
"eval_samples_per_second": 0.24, |
|
"eval_steps_per_second": 0.121, |
|
"step": 1149 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 8.86236478925774e-06, |
|
"loss": 0.6564, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_gen_len": 83.7895, |
|
"eval_loss": 0.693389892578125, |
|
"eval_rouge-1": 53.6419, |
|
"eval_rouge-2": 36.5755, |
|
"eval_rouge-l": 50.0063, |
|
"eval_runtime": 394.1844, |
|
"eval_samples_per_second": 0.241, |
|
"eval_steps_per_second": 0.122, |
|
"step": 1532 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_gen_len": 82.1474, |
|
"eval_loss": 0.6953384280204773, |
|
"eval_rouge-1": 54.1103, |
|
"eval_rouge-2": 37.0725, |
|
"eval_rouge-l": 50.7383, |
|
"eval_runtime": 388.3706, |
|
"eval_samples_per_second": 0.245, |
|
"eval_steps_per_second": 0.124, |
|
"step": 1915 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"learning_rate": 5.132413278627378e-06, |
|
"loss": 0.6026, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_gen_len": 82.1053, |
|
"eval_loss": 0.6941447257995605, |
|
"eval_rouge-1": 53.5465, |
|
"eval_rouge-2": 36.571, |
|
"eval_rouge-l": 50.1244, |
|
"eval_runtime": 382.416, |
|
"eval_samples_per_second": 0.248, |
|
"eval_steps_per_second": 0.126, |
|
"step": 2298 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"learning_rate": 1.4024617679970163e-06, |
|
"loss": 0.5873, |
|
"step": 2500 |
|
} |
|
], |
|
"max_steps": 2681, |
|
"num_train_epochs": 7, |
|
"total_flos": 6497005883228160.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|