{ "best_metric": 1.8867709636688232, "best_model_checkpoint": "output/kurt-cobain/checkpoint-126", "epoch": 2.0, "global_step": 126, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08, "learning_rate": 0.00013507870183531476, "loss": 3.0032, "step": 5 }, { "epoch": 0.16, "learning_rate": 0.00012884599993319768, "loss": 2.5844, "step": 10 }, { "epoch": 0.24, "learning_rate": 0.00011888735840752609, "loss": 2.5435, "step": 15 }, { "epoch": 0.32, "learning_rate": 0.0001058186737011911, "loss": 2.4834, "step": 20 }, { "epoch": 0.4, "learning_rate": 9.044818420726556e-05, "loss": 2.4343, "step": 25 }, { "epoch": 0.48, "learning_rate": 7.372648442002871e-05, "loss": 2.7558, "step": 30 }, { "epoch": 0.56, "learning_rate": 5.668773501204858e-05, "loss": 2.5089, "step": 35 }, { "epoch": 0.63, "learning_rate": 4.0385704725240065e-05, "loss": 2.4147, "step": 40 }, { "epoch": 0.71, "learning_rate": 2.5828599592490882e-05, "loss": 2.123, "step": 45 }, { "epoch": 0.79, "learning_rate": 1.3916710004507539e-05, "loss": 2.2562, "step": 50 }, { "epoch": 0.87, "learning_rate": 5.38673186569003e-06, "loss": 2.4784, "step": 55 }, { "epoch": 0.95, "learning_rate": 7.662053209561833e-07, "loss": 2.5457, "step": 60 }, { "epoch": 1.0, "eval_loss": 2.059675931930542, "eval_runtime": 3.6409, "eval_samples_per_second": 22.522, "eval_steps_per_second": 3.021, "step": 63 }, { "epoch": 1.03, "learning_rate": 3.408888099334633e-07, "loss": 2.2316, "step": 65 }, { "epoch": 1.11, "learning_rate": 4.137086214086682e-06, "loss": 2.3402, "step": 70 }, { "epoch": 1.19, "learning_rate": 1.1920020081922749e-05, "loss": 2.2531, "step": 75 }, { "epoch": 1.27, "learning_rate": 2.320835154085542e-05, "loss": 2.3415, "step": 80 }, { "epoch": 1.35, "learning_rate": 3.7303948905573005e-05, "loss": 2.1674, "step": 85 }, { "epoch": 1.43, "learning_rate": 5.333506393059682e-05, "loss": 2.2066, "step": 90 }, { "epoch": 1.51, "learning_rate": 7.031024545323179e-05, "loss": 1.9122, "step": 95 }, { "epoch": 1.59, "learning_rate": 8.71796561146101e-05, "loss": 2.1474, "step": 100 }, { "epoch": 1.67, "learning_rate": 0.00010290000000000001, "loss": 2.0603, "step": 105 }, { "epoch": 1.75, "learning_rate": 0.0001164990457207046, "loss": 1.8847, "step": 110 }, { "epoch": 1.83, "learning_rate": 0.00012713575447996587, "loss": 1.881, "step": 115 }, { "epoch": 1.9, "learning_rate": 0.00013415229447692924, "loss": 2.2478, "step": 120 }, { "epoch": 1.98, "learning_rate": 0.00013711472479561806, "loss": 2.2805, "step": 125 }, { "epoch": 2.0, "eval_loss": 1.8867709636688232, "eval_runtime": 3.3707, "eval_samples_per_second": 22.547, "eval_steps_per_second": 2.967, "step": 126 } ], "max_steps": 126, "num_train_epochs": 2, "total_flos": 130646016000000.0, "trial_name": null, "trial_params": null }