{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "eval_steps": 500, "global_step": 2850, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_bleu": 0.03086976985092356, "eval_loss": 4.82624626159668, "eval_runtime": 97.3301, "eval_samples_per_second": 11.682, "eval_steps_per_second": 0.74, "step": 285 }, { "epoch": 1.7543859649122808, "grad_norm": 0.6388371586799622, "learning_rate": 1.9840000000000003e-05, "loss": 5.7131, "step": 500 }, { "epoch": 2.0, "eval_bleu": 1.2576014507933267e-08, "eval_loss": 1.6568931341171265, "eval_runtime": 344.4207, "eval_samples_per_second": 3.301, "eval_steps_per_second": 0.209, "step": 570 }, { "epoch": 3.0, "eval_bleu": 0.0, "eval_loss": 1.6353662014007568, "eval_runtime": 344.9175, "eval_samples_per_second": 3.296, "eval_steps_per_second": 0.209, "step": 855 }, { "epoch": 3.5087719298245617, "grad_norm": 0.41950201988220215, "learning_rate": 1.5778723404255322e-05, "loss": 1.6327, "step": 1000 }, { "epoch": 4.0, "eval_bleu": 0.028380647856164357, "eval_loss": 1.6304912567138672, "eval_runtime": 341.8623, "eval_samples_per_second": 3.326, "eval_steps_per_second": 0.211, "step": 1140 }, { "epoch": 5.0, "eval_bleu": 2.3806283652785307, "eval_loss": 1.6316522359848022, "eval_runtime": 316.8037, "eval_samples_per_second": 3.589, "eval_steps_per_second": 0.227, "step": 1425 }, { "epoch": 5.2631578947368425, "grad_norm": 0.46960917115211487, "learning_rate": 1.152340425531915e-05, "loss": 1.5867, "step": 1500 }, { "epoch": 6.0, "eval_bleu": 5.280724757906034, "eval_loss": 1.632155179977417, "eval_runtime": 173.2701, "eval_samples_per_second": 6.562, "eval_steps_per_second": 0.416, "step": 1710 }, { "epoch": 7.0, "eval_bleu": 5.34771509760483, "eval_loss": 1.6327825784683228, "eval_runtime": 237.2864, "eval_samples_per_second": 4.792, "eval_steps_per_second": 0.303, "step": 1995 }, { "epoch": 7.017543859649122, "grad_norm": 0.33166804909706116, "learning_rate": 7.268085106382979e-06, "loss": 1.5654, "step": 2000 }, { "epoch": 8.0, "eval_bleu": 4.833246227017286, "eval_loss": 1.634158730506897, "eval_runtime": 173.1237, "eval_samples_per_second": 6.568, "eval_steps_per_second": 0.416, "step": 2280 }, { "epoch": 8.771929824561404, "grad_norm": 0.2613541781902313, "learning_rate": 3.012765957446809e-06, "loss": 1.5528, "step": 2500 }, { "epoch": 9.0, "eval_bleu": 5.348525572887838, "eval_loss": 1.6349856853485107, "eval_runtime": 140.3041, "eval_samples_per_second": 8.104, "eval_steps_per_second": 0.513, "step": 2565 }, { "epoch": 10.0, "eval_bleu": 5.1657380844985274, "eval_loss": 1.6354198455810547, "eval_runtime": 113.6949, "eval_samples_per_second": 10.0, "eval_steps_per_second": 0.633, "step": 2850 } ], "logging_steps": 500, "max_steps": 2850, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.463997932601344e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }