{ "best_metric": null, "best_model_checkpoint": null, "epoch": 15.0, "global_step": 29505, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.25, "learning_rate": 0.002976, "loss": 4.0956, "step": 500 }, { "epoch": 0.51, "learning_rate": 0.0029488019307016035, "loss": 3.522, "step": 1000 }, { "epoch": 0.76, "learning_rate": 0.0028970867091880712, "loss": 3.4575, "step": 1500 }, { "epoch": 1.02, "learning_rate": 0.002845371487674539, "loss": 3.4469, "step": 2000 }, { "epoch": 1.27, "learning_rate": 0.0027936562661610067, "loss": 3.4404, "step": 2500 }, { "epoch": 1.53, "learning_rate": 0.0027419410446474744, "loss": 3.4604, "step": 3000 }, { "epoch": 1.78, "learning_rate": 0.002690225823133942, "loss": 3.4632, "step": 3500 }, { "epoch": 2.03, "learning_rate": 0.0026385106016204103, "loss": 3.4906, "step": 4000 }, { "epoch": 2.29, "learning_rate": 0.002586795380106878, "loss": 3.4523, "step": 4500 }, { "epoch": 2.54, "learning_rate": 0.002535080158593346, "loss": 3.4521, "step": 5000 }, { "epoch": 2.8, "learning_rate": 0.002483468367522841, "loss": 3.4765, "step": 5500 }, { "epoch": 3.05, "learning_rate": 0.0024318565764523356, "loss": 3.4482, "step": 6000 }, { "epoch": 3.3, "learning_rate": 0.0023801413549388037, "loss": 3.4663, "step": 6500 }, { "epoch": 3.56, "learning_rate": 0.0023284261334252715, "loss": 3.4747, "step": 7000 }, { "epoch": 3.81, "learning_rate": 0.002276710911911739, "loss": 3.4471, "step": 7500 }, { "epoch": 4.07, "learning_rate": 0.0022250991208412345, "loss": 3.4668, "step": 8000 }, { "epoch": 4.32, "learning_rate": 0.0021733838993277022, "loss": 3.4726, "step": 8500 }, { "epoch": 4.58, "learning_rate": 0.00212166867781417, "loss": 3.4374, "step": 9000 }, { "epoch": 4.83, "learning_rate": 0.0020699534563006377, "loss": 3.4422, "step": 9500 }, { "epoch": 5.08, "learning_rate": 0.002018238234787106, "loss": 3.4557, "step": 10000 }, { "epoch": 5.34, "learning_rate": 0.0019666264437166008, "loss": 3.4643, "step": 10500 }, { "epoch": 5.59, "learning_rate": 0.0019150146526460954, "loss": 3.4428, "step": 11000 }, { "epoch": 5.85, "learning_rate": 0.0018632994311325636, "loss": 3.4795, "step": 11500 }, { "epoch": 6.1, "learning_rate": 0.0018115842096190313, "loss": 3.4469, "step": 12000 }, { "epoch": 6.35, "learning_rate": 0.001759868988105499, "loss": 3.452, "step": 12500 }, { "epoch": 6.61, "learning_rate": 0.001708153766591967, "loss": 3.4436, "step": 13000 }, { "epoch": 6.86, "learning_rate": 0.0016564385450784347, "loss": 3.4325, "step": 13500 }, { "epoch": 7.12, "learning_rate": 0.0016048267540079296, "loss": 3.4705, "step": 14000 }, { "epoch": 7.37, "learning_rate": 0.0015531115324943976, "loss": 3.432, "step": 14500 }, { "epoch": 7.63, "learning_rate": 0.0015013963109808653, "loss": 3.4689, "step": 15000 }, { "epoch": 7.88, "learning_rate": 0.0014496810894673333, "loss": 3.4853, "step": 15500 }, { "epoch": 8.13, "learning_rate": 0.001397965867953801, "loss": 3.4548, "step": 16000 }, { "epoch": 8.39, "learning_rate": 0.001346354076883296, "loss": 3.4377, "step": 16500 }, { "epoch": 8.64, "learning_rate": 0.0012946388553697638, "loss": 3.4421, "step": 17000 }, { "epoch": 8.9, "learning_rate": 0.0012429236338562318, "loss": 3.437, "step": 17500 }, { "epoch": 9.15, "learning_rate": 0.0011912084123426995, "loss": 3.474, "step": 18000 }, { "epoch": 9.41, "learning_rate": 0.0011394931908291675, "loss": 3.4269, "step": 18500 }, { "epoch": 9.66, "learning_rate": 0.0010877779693156354, "loss": 3.4404, "step": 19000 }, { "epoch": 9.91, "learning_rate": 0.0010361661782451303, "loss": 3.4725, "step": 19500 }, { "epoch": 10.17, "learning_rate": 0.000984450956731598, "loss": 3.4633, "step": 20000 }, { "epoch": 10.42, "learning_rate": 0.0009327357352180659, "loss": 3.4325, "step": 20500 }, { "epoch": 10.68, "learning_rate": 0.0008810205137045338, "loss": 3.4446, "step": 21000 }, { "epoch": 10.93, "learning_rate": 0.0008293052921910016, "loss": 3.4433, "step": 21500 }, { "epoch": 11.18, "learning_rate": 0.0007775900706774695, "loss": 3.4662, "step": 22000 }, { "epoch": 11.44, "learning_rate": 0.0007258748491639372, "loss": 3.4421, "step": 22500 }, { "epoch": 11.69, "learning_rate": 0.0006742630580934321, "loss": 3.4358, "step": 23000 }, { "epoch": 11.95, "learning_rate": 0.0006225478365799, "loss": 3.456, "step": 23500 }, { "epoch": 12.2, "learning_rate": 0.0005708326150663679, "loss": 3.4475, "step": 24000 }, { "epoch": 12.46, "learning_rate": 0.0005191173935528358, "loss": 3.452, "step": 24500 }, { "epoch": 12.71, "learning_rate": 0.0004674021720393036, "loss": 3.4259, "step": 25000 }, { "epoch": 12.96, "learning_rate": 0.0004157903809687985, "loss": 3.4611, "step": 25500 }, { "epoch": 13.22, "learning_rate": 0.0003640751594552663, "loss": 3.4556, "step": 26000 }, { "epoch": 13.47, "learning_rate": 0.0003123599379417342, "loss": 3.4278, "step": 26500 }, { "epoch": 13.73, "learning_rate": 0.000260644716428202, "loss": 3.4602, "step": 27000 }, { "epoch": 13.98, "learning_rate": 0.0002089294949146699, "loss": 3.4503, "step": 27500 }, { "epoch": 14.23, "learning_rate": 0.00015721427340113775, "loss": 3.4479, "step": 28000 }, { "epoch": 14.49, "learning_rate": 0.00010560248233063266, "loss": 3.4541, "step": 28500 }, { "epoch": 14.74, "learning_rate": 5.38872608171005e-05, "loss": 3.4546, "step": 29000 }, { "epoch": 15.0, "learning_rate": 2.1720393035683503e-06, "loss": 3.4205, "step": 29500 }, { "epoch": 15.0, "step": 29505, "total_flos": 3.264158561621508e+18, "train_loss": 3.4646308027512136, "train_runtime": 5488.5308, "train_samples_per_second": 10.752, "train_steps_per_second": 5.376 } ], "max_steps": 29505, "num_train_epochs": 15, "total_flos": 3.264158561621508e+18, "trial_name": null, "trial_params": null }