|
[ |
|
{ |
|
"loss": 0.9641, |
|
"grad_norm": 12.902933120727539, |
|
"learning_rate": 1.8e-05, |
|
"epoch": 1.0, |
|
"step": 250 |
|
}, |
|
{ |
|
"eval_loss": 0.6194158792495728, |
|
"eval_accuracy": 0.792, |
|
"eval_f1_score": 0.7818567350710909, |
|
"eval_runtime": 4.3481, |
|
"eval_samples_per_second": 459.97, |
|
"eval_steps_per_second": 7.36, |
|
"epoch": 1.0, |
|
"step": 250 |
|
}, |
|
{ |
|
"loss": 0.4398, |
|
"grad_norm": 8.209343910217285, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"epoch": 2.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"eval_loss": 0.33885374665260315, |
|
"eval_accuracy": 0.883, |
|
"eval_f1_score": 0.8825473908240207, |
|
"eval_runtime": 4.3879, |
|
"eval_samples_per_second": 455.804, |
|
"eval_steps_per_second": 7.293, |
|
"epoch": 2.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"loss": 0.258, |
|
"grad_norm": 7.1059184074401855, |
|
"learning_rate": 1.4e-05, |
|
"epoch": 3.0, |
|
"step": 750 |
|
}, |
|
{ |
|
"eval_loss": 0.2948116958141327, |
|
"eval_accuracy": 0.8945, |
|
"eval_f1_score": 0.8950761079856652, |
|
"eval_runtime": 4.3338, |
|
"eval_samples_per_second": 461.493, |
|
"eval_steps_per_second": 7.384, |
|
"epoch": 3.0, |
|
"step": 750 |
|
}, |
|
{ |
|
"loss": 0.1744, |
|
"grad_norm": 14.858270645141602, |
|
"learning_rate": 1.2e-05, |
|
"epoch": 4.0, |
|
"step": 1000 |
|
}, |
|
{ |
|
"eval_loss": 0.28408634662628174, |
|
"eval_accuracy": 0.9035, |
|
"eval_f1_score": 0.9038343567745306, |
|
"eval_runtime": 4.2528, |
|
"eval_samples_per_second": 470.273, |
|
"eval_steps_per_second": 7.524, |
|
"epoch": 4.0, |
|
"step": 1000 |
|
}, |
|
{ |
|
"loss": 0.132, |
|
"grad_norm": 10.98675537109375, |
|
"learning_rate": 1e-05, |
|
"epoch": 5.0, |
|
"step": 1250 |
|
}, |
|
{ |
|
"eval_loss": 0.29371485114097595, |
|
"eval_accuracy": 0.8985, |
|
"eval_f1_score": 0.8982752344158218, |
|
"eval_runtime": 4.368, |
|
"eval_samples_per_second": 457.874, |
|
"eval_steps_per_second": 7.326, |
|
"epoch": 5.0, |
|
"step": 1250 |
|
}, |
|
{ |
|
"loss": 0.1078, |
|
"grad_norm": 1.496416449546814, |
|
"learning_rate": 8.000000000000001e-06, |
|
"epoch": 6.0, |
|
"step": 1500 |
|
}, |
|
{ |
|
"eval_loss": 0.27695581316947937, |
|
"eval_accuracy": 0.9055, |
|
"eval_f1_score": 0.9053682374738501, |
|
"eval_runtime": 4.2619, |
|
"eval_samples_per_second": 469.274, |
|
"eval_steps_per_second": 7.508, |
|
"epoch": 6.0, |
|
"step": 1500 |
|
}, |
|
{ |
|
"loss": 0.0888, |
|
"grad_norm": 2.4278478622436523, |
|
"learning_rate": 6e-06, |
|
"epoch": 7.0, |
|
"step": 1750 |
|
}, |
|
{ |
|
"eval_loss": 0.3017047643661499, |
|
"eval_accuracy": 0.903, |
|
"eval_f1_score": 0.9027803797533116, |
|
"eval_runtime": 4.3556, |
|
"eval_samples_per_second": 459.177, |
|
"eval_steps_per_second": 7.347, |
|
"epoch": 7.0, |
|
"step": 1750 |
|
}, |
|
{ |
|
"loss": 0.0739, |
|
"grad_norm": 2.9510045051574707, |
|
"learning_rate": 4.000000000000001e-06, |
|
"epoch": 8.0, |
|
"step": 2000 |
|
}, |
|
{ |
|
"eval_loss": 0.28290167450904846, |
|
"eval_accuracy": 0.9095, |
|
"eval_f1_score": 0.9096314060750578, |
|
"eval_runtime": 4.3333, |
|
"eval_samples_per_second": 461.542, |
|
"eval_steps_per_second": 7.385, |
|
"epoch": 8.0, |
|
"step": 2000 |
|
}, |
|
{ |
|
"loss": 0.0611, |
|
"grad_norm": 9.680631637573242, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"epoch": 9.0, |
|
"step": 2250 |
|
}, |
|
{ |
|
"eval_loss": 0.306204229593277, |
|
"eval_accuracy": 0.91, |
|
"eval_f1_score": 0.9101550513849552, |
|
"eval_runtime": 4.3144, |
|
"eval_samples_per_second": 463.564, |
|
"eval_steps_per_second": 7.417, |
|
"epoch": 9.0, |
|
"step": 2250 |
|
}, |
|
{ |
|
"loss": 0.0506, |
|
"grad_norm": 3.266329050064087, |
|
"learning_rate": 0.0, |
|
"epoch": 10.0, |
|
"step": 2500 |
|
}, |
|
{ |
|
"eval_loss": 0.3068402111530304, |
|
"eval_accuracy": 0.9085, |
|
"eval_f1_score": 0.9086081968965166, |
|
"eval_runtime": 4.2835, |
|
"eval_samples_per_second": 466.913, |
|
"eval_steps_per_second": 7.471, |
|
"epoch": 10.0, |
|
"step": 2500 |
|
}, |
|
{ |
|
"train_runtime": 1334.5966, |
|
"train_samples_per_second": 119.886, |
|
"train_steps_per_second": 1.873, |
|
"total_flos": 3891507413760000.0, |
|
"train_loss": 0.23506236724853516, |
|
"epoch": 10.0, |
|
"step": 2500 |
|
}, |
|
{ |
|
"eval_loss": 0.3068402111530304, |
|
"eval_accuracy": 0.9085, |
|
"eval_f1_score": 0.9086081968965166, |
|
"eval_runtime": 4.7892, |
|
"eval_samples_per_second": 417.606, |
|
"eval_steps_per_second": 6.682, |
|
"epoch": 10.0, |
|
"step": 2500 |
|
} |
|
] |