|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 15.0, |
|
"global_step": 29505, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.002976, |
|
"loss": 4.0956, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0029488019307016035, |
|
"loss": 3.522, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.0028970867091880712, |
|
"loss": 3.4575, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.002845371487674539, |
|
"loss": 3.4469, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.0027936562661610067, |
|
"loss": 3.4404, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.0027419410446474744, |
|
"loss": 3.4604, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 0.002690225823133942, |
|
"loss": 3.4632, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 0.0026385106016204103, |
|
"loss": 3.4906, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 0.002586795380106878, |
|
"loss": 3.4523, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 0.002535080158593346, |
|
"loss": 3.4521, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 0.002483468367522841, |
|
"loss": 3.4765, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 0.0024318565764523356, |
|
"loss": 3.4482, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 0.0023801413549388037, |
|
"loss": 3.4663, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 0.0023284261334252715, |
|
"loss": 3.4747, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 0.002276710911911739, |
|
"loss": 3.4471, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 0.0022250991208412345, |
|
"loss": 3.4668, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 0.0021733838993277022, |
|
"loss": 3.4726, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 0.00212166867781417, |
|
"loss": 3.4374, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 0.0020699534563006377, |
|
"loss": 3.4422, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"learning_rate": 0.002018238234787106, |
|
"loss": 3.4557, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"learning_rate": 0.0019666264437166008, |
|
"loss": 3.4643, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"learning_rate": 0.0019150146526460954, |
|
"loss": 3.4428, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 5.85, |
|
"learning_rate": 0.0018632994311325636, |
|
"loss": 3.4795, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"learning_rate": 0.0018115842096190313, |
|
"loss": 3.4469, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 6.35, |
|
"learning_rate": 0.001759868988105499, |
|
"loss": 3.452, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 6.61, |
|
"learning_rate": 0.001708153766591967, |
|
"loss": 3.4436, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 6.86, |
|
"learning_rate": 0.0016564385450784347, |
|
"loss": 3.4325, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 7.12, |
|
"learning_rate": 0.0016048267540079296, |
|
"loss": 3.4705, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 7.37, |
|
"learning_rate": 0.0015531115324943976, |
|
"loss": 3.432, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 7.63, |
|
"learning_rate": 0.0015013963109808653, |
|
"loss": 3.4689, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 7.88, |
|
"learning_rate": 0.0014496810894673333, |
|
"loss": 3.4853, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 8.13, |
|
"learning_rate": 0.001397965867953801, |
|
"loss": 3.4548, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 8.39, |
|
"learning_rate": 0.001346354076883296, |
|
"loss": 3.4377, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 8.64, |
|
"learning_rate": 0.0012946388553697638, |
|
"loss": 3.4421, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 8.9, |
|
"learning_rate": 0.0012429236338562318, |
|
"loss": 3.437, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 9.15, |
|
"learning_rate": 0.0011912084123426995, |
|
"loss": 3.474, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 9.41, |
|
"learning_rate": 0.0011394931908291675, |
|
"loss": 3.4269, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 9.66, |
|
"learning_rate": 0.0010877779693156354, |
|
"loss": 3.4404, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 9.91, |
|
"learning_rate": 0.0010361661782451303, |
|
"loss": 3.4725, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 10.17, |
|
"learning_rate": 0.000984450956731598, |
|
"loss": 3.4633, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 10.42, |
|
"learning_rate": 0.0009327357352180659, |
|
"loss": 3.4325, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 10.68, |
|
"learning_rate": 0.0008810205137045338, |
|
"loss": 3.4446, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 10.93, |
|
"learning_rate": 0.0008293052921910016, |
|
"loss": 3.4433, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 11.18, |
|
"learning_rate": 0.0007775900706774695, |
|
"loss": 3.4662, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 11.44, |
|
"learning_rate": 0.0007258748491639372, |
|
"loss": 3.4421, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 11.69, |
|
"learning_rate": 0.0006742630580934321, |
|
"loss": 3.4358, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 11.95, |
|
"learning_rate": 0.0006225478365799, |
|
"loss": 3.456, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 12.2, |
|
"learning_rate": 0.0005708326150663679, |
|
"loss": 3.4475, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 12.46, |
|
"learning_rate": 0.0005191173935528358, |
|
"loss": 3.452, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 12.71, |
|
"learning_rate": 0.0004674021720393036, |
|
"loss": 3.4259, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 12.96, |
|
"learning_rate": 0.0004157903809687985, |
|
"loss": 3.4611, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 13.22, |
|
"learning_rate": 0.0003640751594552663, |
|
"loss": 3.4556, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 13.47, |
|
"learning_rate": 0.0003123599379417342, |
|
"loss": 3.4278, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 13.73, |
|
"learning_rate": 0.000260644716428202, |
|
"loss": 3.4602, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 13.98, |
|
"learning_rate": 0.0002089294949146699, |
|
"loss": 3.4503, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 14.23, |
|
"learning_rate": 0.00015721427340113775, |
|
"loss": 3.4479, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 14.49, |
|
"learning_rate": 0.00010560248233063266, |
|
"loss": 3.4541, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 14.74, |
|
"learning_rate": 5.38872608171005e-05, |
|
"loss": 3.4546, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 2.1720393035683503e-06, |
|
"loss": 3.4205, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"step": 29505, |
|
"total_flos": 3.264158561621508e+18, |
|
"train_loss": 3.4646308027512136, |
|
"train_runtime": 5488.5308, |
|
"train_samples_per_second": 10.752, |
|
"train_steps_per_second": 5.376 |
|
} |
|
], |
|
"max_steps": 29505, |
|
"num_train_epochs": 15, |
|
"total_flos": 3.264158561621508e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|