|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"global_step": 3200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_bleu": 0.060808744840238495, |
|
"eval_loss": 2.7135698795318604, |
|
"eval_runtime": 315.8806, |
|
"eval_samples_per_second": 3.603, |
|
"eval_steps_per_second": 0.114, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bleu": 0.23820890608280518, |
|
"eval_loss": 1.7017812728881836, |
|
"eval_runtime": 199.1113, |
|
"eval_samples_per_second": 5.715, |
|
"eval_steps_per_second": 0.181, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bleu": 0.2333157930860921, |
|
"eval_loss": 1.7553855180740356, |
|
"eval_runtime": 183.4244, |
|
"eval_samples_per_second": 6.204, |
|
"eval_steps_per_second": 0.196, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 1.69125e-05, |
|
"loss": 2.1899, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bleu": 0.24403815580286167, |
|
"eval_loss": 1.729722499847412, |
|
"eval_runtime": 199.443, |
|
"eval_samples_per_second": 5.706, |
|
"eval_steps_per_second": 0.181, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_bleu": 0.24248370965297772, |
|
"eval_loss": 1.7367736101150513, |
|
"eval_runtime": 195.0886, |
|
"eval_samples_per_second": 5.833, |
|
"eval_steps_per_second": 0.185, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_bleu": 0.24708175633139415, |
|
"eval_loss": 1.7663674354553223, |
|
"eval_runtime": 190.8193, |
|
"eval_samples_per_second": 5.964, |
|
"eval_steps_per_second": 0.189, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"learning_rate": 1.37875e-05, |
|
"loss": 1.1629, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_bleu": 0.24734088993827677, |
|
"eval_loss": 1.8130639791488647, |
|
"eval_runtime": 185.235, |
|
"eval_samples_per_second": 6.144, |
|
"eval_steps_per_second": 0.194, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_bleu": 0.25035574867022287, |
|
"eval_loss": 1.860228419303894, |
|
"eval_runtime": 191.0903, |
|
"eval_samples_per_second": 5.955, |
|
"eval_steps_per_second": 0.188, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_bleu": 0.2550337582147487, |
|
"eval_loss": 1.9042994976043701, |
|
"eval_runtime": 192.163, |
|
"eval_samples_per_second": 5.922, |
|
"eval_steps_per_second": 0.187, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 9.38, |
|
"learning_rate": 1.06625e-05, |
|
"loss": 0.807, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_bleu": 0.255168022317063, |
|
"eval_loss": 1.9512995481491089, |
|
"eval_runtime": 193.6447, |
|
"eval_samples_per_second": 5.877, |
|
"eval_steps_per_second": 0.186, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_bleu": 0.2583105775089707, |
|
"eval_loss": 2.0014865398406982, |
|
"eval_runtime": 194.2032, |
|
"eval_samples_per_second": 5.86, |
|
"eval_steps_per_second": 0.185, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_bleu": 0.2611697071205468, |
|
"eval_loss": 2.036052942276001, |
|
"eval_runtime": 190.4985, |
|
"eval_samples_per_second": 5.974, |
|
"eval_steps_per_second": 0.189, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"learning_rate": 7.537500000000001e-06, |
|
"loss": 0.5977, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_bleu": 0.2624028465673082, |
|
"eval_loss": 2.0794923305511475, |
|
"eval_runtime": 192.5774, |
|
"eval_samples_per_second": 5.909, |
|
"eval_steps_per_second": 0.187, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_bleu": 0.26034097889106955, |
|
"eval_loss": 2.1036157608032227, |
|
"eval_runtime": 198.097, |
|
"eval_samples_per_second": 5.745, |
|
"eval_steps_per_second": 0.182, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_bleu": 0.264538215714405, |
|
"eval_loss": 2.1185383796691895, |
|
"eval_runtime": 189.6413, |
|
"eval_samples_per_second": 6.001, |
|
"eval_steps_per_second": 0.19, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 15.62, |
|
"learning_rate": 4.4125000000000005e-06, |
|
"loss": 0.4697, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_bleu": 0.2666872542669057, |
|
"eval_loss": 2.1361563205718994, |
|
"eval_runtime": 189.4756, |
|
"eval_samples_per_second": 6.006, |
|
"eval_steps_per_second": 0.19, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_bleu": 0.2652516887552325, |
|
"eval_loss": 2.162111520767212, |
|
"eval_runtime": 193.0939, |
|
"eval_samples_per_second": 5.894, |
|
"eval_steps_per_second": 0.186, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_bleu": 0.2673360550776601, |
|
"eval_loss": 2.163081169128418, |
|
"eval_runtime": 190.9327, |
|
"eval_samples_per_second": 5.96, |
|
"eval_steps_per_second": 0.189, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 18.75, |
|
"learning_rate": 1.2875000000000002e-06, |
|
"loss": 0.4032, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_bleu": 0.2660614156233256, |
|
"eval_loss": 2.1683239936828613, |
|
"eval_runtime": 190.2616, |
|
"eval_samples_per_second": 5.981, |
|
"eval_steps_per_second": 0.189, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_bleu": 0.2674984140410235, |
|
"eval_loss": 2.173663377761841, |
|
"eval_runtime": 189.5283, |
|
"eval_samples_per_second": 6.004, |
|
"eval_steps_per_second": 0.19, |
|
"step": 3200 |
|
} |
|
], |
|
"max_steps": 3200, |
|
"num_train_epochs": 20, |
|
"total_flos": 5.530638338624717e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|