|
{ |
|
"best_metric": 1.1539206504821777, |
|
"best_model_checkpoint": "./jako_mbartLarge_6p_run1/checkpoint-4000", |
|
"epoch": 3.8396928245740343, |
|
"eval_steps": 1000, |
|
"global_step": 8000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.951290793960059e-05, |
|
"loss": 1.8861, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.829517778860205e-05, |
|
"loss": 1.4641, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_bleu": 21.6162, |
|
"eval_gen_len": 19.4434, |
|
"eval_loss": 1.3276299238204956, |
|
"eval_runtime": 299.0357, |
|
"eval_samples_per_second": 13.931, |
|
"eval_steps_per_second": 0.873, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.707744763760351e-05, |
|
"loss": 1.3282, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.585971748660497e-05, |
|
"loss": 1.2615, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_bleu": 24.346, |
|
"eval_gen_len": 19.4734, |
|
"eval_loss": 1.186624526977539, |
|
"eval_runtime": 297.0522, |
|
"eval_samples_per_second": 14.024, |
|
"eval_steps_per_second": 0.879, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 4.4641987335606436e-05, |
|
"loss": 1.0805, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 4.342425718460789e-05, |
|
"loss": 0.9103, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"eval_bleu": 25.4249, |
|
"eval_gen_len": 19.0086, |
|
"eval_loss": 1.1637648344039917, |
|
"eval_runtime": 293.4921, |
|
"eval_samples_per_second": 14.195, |
|
"eval_steps_per_second": 0.889, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 4.2206527033609356e-05, |
|
"loss": 0.8534, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 4.0988796882610817e-05, |
|
"loss": 0.8285, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"eval_bleu": 26.2658, |
|
"eval_gen_len": 19.3961, |
|
"eval_loss": 1.1539206504821777, |
|
"eval_runtime": 298.2089, |
|
"eval_samples_per_second": 13.97, |
|
"eval_steps_per_second": 0.875, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 3.977106673161228e-05, |
|
"loss": 0.7521, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 3.855333658061374e-05, |
|
"loss": 0.5977, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_bleu": 25.5651, |
|
"eval_gen_len": 19.6248, |
|
"eval_loss": 1.1977771520614624, |
|
"eval_runtime": 299.5483, |
|
"eval_samples_per_second": 13.908, |
|
"eval_steps_per_second": 0.871, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 3.73356064296152e-05, |
|
"loss": 0.5686, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 3.611787627861666e-05, |
|
"loss": 0.5423, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"eval_bleu": 26.8441, |
|
"eval_gen_len": 19.1349, |
|
"eval_loss": 1.1830259561538696, |
|
"eval_runtime": 285.8007, |
|
"eval_samples_per_second": 14.577, |
|
"eval_steps_per_second": 0.913, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 3.4900146127618125e-05, |
|
"loss": 0.5099, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 3.368241597661958e-05, |
|
"loss": 0.3816, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"eval_bleu": 26.1301, |
|
"eval_gen_len": 19.1207, |
|
"eval_loss": 1.266960620880127, |
|
"eval_runtime": 292.1624, |
|
"eval_samples_per_second": 14.259, |
|
"eval_steps_per_second": 0.893, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 3.2464685825621045e-05, |
|
"loss": 0.3637, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 3.1246955674622506e-05, |
|
"loss": 0.3412, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"eval_bleu": 26.7783, |
|
"eval_gen_len": 19.2417, |
|
"eval_loss": 1.2869776487350464, |
|
"eval_runtime": 291.2259, |
|
"eval_samples_per_second": 14.305, |
|
"eval_steps_per_second": 0.896, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"step": 8000, |
|
"total_flos": 2.7749663225623347e+17, |
|
"train_loss": 0.8543571968078614, |
|
"train_runtime": 11541.5643, |
|
"train_samples_per_second": 28.881, |
|
"train_steps_per_second": 1.805 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 20830, |
|
"num_train_epochs": 10, |
|
"save_steps": 1000, |
|
"total_flos": 2.7749663225623347e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|