{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 150, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.18, "learning_rate": 0.00012, "loss": 12.7445, "step": 9 }, { "epoch": 0.36, "learning_rate": 0.00019555555555555556, "loss": 11.2951, "step": 18 }, { "epoch": 0.54, "learning_rate": 0.00018222222222222224, "loss": 9.9077, "step": 27 }, { "epoch": 0.72, "learning_rate": 0.00016888888888888889, "loss": 8.6537, "step": 36 }, { "epoch": 0.9, "learning_rate": 0.00015555555555555556, "loss": 7.7628, "step": 45 }, { "epoch": 1.08, "learning_rate": 0.00014222222222222224, "loss": 7.0578, "step": 54 }, { "epoch": 1.26, "learning_rate": 0.00012888888888888892, "loss": 6.7923, "step": 63 }, { "epoch": 1.44, "learning_rate": 0.00011555555555555555, "loss": 6.4867, "step": 72 }, { "epoch": 1.62, "learning_rate": 0.00010222222222222222, "loss": 6.4616, "step": 81 }, { "epoch": 1.8, "learning_rate": 8.888888888888889e-05, "loss": 6.278, "step": 90 }, { "epoch": 1.98, "learning_rate": 7.555555555555556e-05, "loss": 6.2509, "step": 99 }, { "epoch": 2.16, "learning_rate": 6.222222222222222e-05, "loss": 6.1527, "step": 108 }, { "epoch": 2.34, "learning_rate": 4.888888888888889e-05, "loss": 6.1656, "step": 117 }, { "epoch": 2.52, "learning_rate": 3.555555555555556e-05, "loss": 6.0661, "step": 126 }, { "epoch": 2.7, "learning_rate": 2.2222222222222223e-05, "loss": 6.0176, "step": 135 }, { "epoch": 2.88, "learning_rate": 8.88888888888889e-06, "loss": 6.0408, "step": 144 } ], "logging_steps": 9, "max_steps": 150, "num_train_epochs": 3, "save_steps": 500, "total_flos": 281424942858240.0, "trial_name": null, "trial_params": null }