|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 6.0, |
|
"eval_steps": 500, |
|
"global_step": 14016, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.821632420091324e-05, |
|
"loss": 0.3322, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.6432648401826485e-05, |
|
"loss": 0.2664, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.464897260273973e-05, |
|
"loss": 0.2412, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.286529680365297e-05, |
|
"loss": 0.2219, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_bleu": 21.2068, |
|
"eval_gen_len": 19.2422, |
|
"eval_loss": 0.18159246444702148, |
|
"eval_runtime": 154.1038, |
|
"eval_samples_per_second": 53.886, |
|
"eval_steps_per_second": 1.687, |
|
"step": 2336 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 4.108162100456621e-05, |
|
"loss": 0.1887, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.929794520547945e-05, |
|
"loss": 0.1575, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 3.7514269406392696e-05, |
|
"loss": 0.1534, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 3.573059360730594e-05, |
|
"loss": 0.1553, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 3.394691780821918e-05, |
|
"loss": 0.1479, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bleu": 21.6783, |
|
"eval_gen_len": 19.2471, |
|
"eval_loss": 0.16088075935840607, |
|
"eval_runtime": 154.8749, |
|
"eval_samples_per_second": 53.617, |
|
"eval_steps_per_second": 1.679, |
|
"step": 4672 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 3.2163242009132423e-05, |
|
"loss": 0.1249, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 3.037956621004566e-05, |
|
"loss": 0.1117, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 2.8595890410958903e-05, |
|
"loss": 0.1119, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 2.681221461187215e-05, |
|
"loss": 0.1137, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 2.502853881278539e-05, |
|
"loss": 0.1092, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bleu": 21.8526, |
|
"eval_gen_len": 19.2463, |
|
"eval_loss": 0.15343397855758667, |
|
"eval_runtime": 154.8601, |
|
"eval_samples_per_second": 53.623, |
|
"eval_steps_per_second": 1.679, |
|
"step": 7008 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 2.324486301369863e-05, |
|
"loss": 0.0842, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 2.1461187214611872e-05, |
|
"loss": 0.085, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 1.9677511415525117e-05, |
|
"loss": 0.0853, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 1.7893835616438355e-05, |
|
"loss": 0.0856, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bleu": 22.0841, |
|
"eval_gen_len": 19.2482, |
|
"eval_loss": 0.15252342820167542, |
|
"eval_runtime": 155.0415, |
|
"eval_samples_per_second": 53.56, |
|
"eval_steps_per_second": 1.677, |
|
"step": 9344 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 1.61101598173516e-05, |
|
"loss": 0.0792, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 1.4326484018264841e-05, |
|
"loss": 0.0664, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 1.2542808219178081e-05, |
|
"loss": 0.0673, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"learning_rate": 1.0759132420091326e-05, |
|
"loss": 0.0663, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 8.975456621004565e-06, |
|
"loss": 0.0667, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_bleu": 22.1943, |
|
"eval_gen_len": 19.2467, |
|
"eval_loss": 0.15876752138137817, |
|
"eval_runtime": 155.4094, |
|
"eval_samples_per_second": 53.433, |
|
"eval_steps_per_second": 1.673, |
|
"step": 11680 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"learning_rate": 7.191780821917809e-06, |
|
"loss": 0.0579, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"learning_rate": 5.40810502283105e-06, |
|
"loss": 0.0545, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"learning_rate": 3.6244292237442927e-06, |
|
"loss": 0.0543, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 5.78, |
|
"learning_rate": 1.8407534246575344e-06, |
|
"loss": 0.0534, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"learning_rate": 5.7077625570776255e-08, |
|
"loss": 0.0549, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_bleu": 22.2237, |
|
"eval_gen_len": 19.2467, |
|
"eval_loss": 0.16120968759059906, |
|
"eval_runtime": 154.8857, |
|
"eval_samples_per_second": 53.614, |
|
"eval_steps_per_second": 1.679, |
|
"step": 14016 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"step": 14016, |
|
"total_flos": 3.006237250179072e+16, |
|
"train_loss": 0.12124856073222204, |
|
"train_runtime": 4033.4181, |
|
"train_samples_per_second": 111.169, |
|
"train_steps_per_second": 3.475 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 14016, |
|
"num_train_epochs": 6, |
|
"save_steps": 500, |
|
"total_flos": 3.006237250179072e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|