{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.0017792011386887287, "eval_steps": 5, "global_step": 30, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 5.930670462295763e-05, "eval_loss": 12.147173881530762, "eval_runtime": 290.7949, "eval_samples_per_second": 24.416, "eval_steps_per_second": 12.208, "step": 1 }, { "epoch": 0.00017792011386887289, "grad_norm": 11.14660930633545, "learning_rate": 6e-05, "loss": 12.1674, "step": 3 }, { "epoch": 0.0002965335231147881, "eval_loss": 11.853402137756348, "eval_runtime": 294.1314, "eval_samples_per_second": 24.139, "eval_steps_per_second": 12.069, "step": 5 }, { "epoch": 0.00035584022773774577, "grad_norm": 10.97592830657959, "learning_rate": 0.00012, "loss": 11.9565, "step": 6 }, { "epoch": 0.0005337603416066186, "grad_norm": 8.313521385192871, "learning_rate": 0.00018, "loss": 11.4876, "step": 9 }, { "epoch": 0.0005930670462295762, "eval_loss": 10.954883575439453, "eval_runtime": 291.2838, "eval_samples_per_second": 24.375, "eval_steps_per_second": 12.187, "step": 10 }, { "epoch": 0.0007116804554754915, "grad_norm": 7.10131311416626, "learning_rate": 0.00019510565162951537, "loss": 10.9684, "step": 12 }, { "epoch": 0.0008896005693443643, "grad_norm": 5.987206935882568, "learning_rate": 0.00017071067811865476, "loss": 10.4647, "step": 15 }, { "epoch": 0.0008896005693443643, "eval_loss": 10.177637100219727, "eval_runtime": 294.0051, "eval_samples_per_second": 24.149, "eval_steps_per_second": 12.075, "step": 15 }, { "epoch": 0.0010675206832132373, "grad_norm": 5.192551612854004, "learning_rate": 0.00013090169943749476, "loss": 10.0547, "step": 18 }, { "epoch": 0.0011861340924591525, "eval_loss": 9.714789390563965, "eval_runtime": 292.8811, "eval_samples_per_second": 24.242, "eval_steps_per_second": 12.121, "step": 20 }, { "epoch": 0.0012454407970821102, "grad_norm": 4.78351354598999, "learning_rate": 8.435655349597689e-05, "loss": 9.7974, "step": 21 }, { "epoch": 0.001423360910950983, "grad_norm": 4.210782527923584, "learning_rate": 4.12214747707527e-05, "loss": 9.623, "step": 24 }, { "epoch": 0.0014826676155739406, "eval_loss": 9.522984504699707, "eval_runtime": 293.8342, "eval_samples_per_second": 24.163, "eval_steps_per_second": 12.082, "step": 25 }, { "epoch": 0.0016012810248198558, "grad_norm": 3.7491424083709717, "learning_rate": 1.0899347581163221e-05, "loss": 9.5269, "step": 27 }, { "epoch": 0.0017792011386887287, "grad_norm": 3.693744421005249, "learning_rate": 0.0, "loss": 9.4953, "step": 30 }, { "epoch": 0.0017792011386887287, "eval_loss": 9.486776351928711, "eval_runtime": 293.7624, "eval_samples_per_second": 24.169, "eval_steps_per_second": 12.085, "step": 30 } ], "logging_steps": 3, "max_steps": 30, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7771550121984.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }