sfarrukh's picture
End of training
0dabb72 verified
[
{
"loss": 0.9641,
"grad_norm": 12.902933120727539,
"learning_rate": 1.8e-05,
"epoch": 1.0,
"step": 250
},
{
"eval_loss": 0.6194158792495728,
"eval_accuracy": 0.792,
"eval_f1_score": 0.7818567350710909,
"eval_runtime": 4.3481,
"eval_samples_per_second": 459.97,
"eval_steps_per_second": 7.36,
"epoch": 1.0,
"step": 250
},
{
"loss": 0.4398,
"grad_norm": 8.209343910217285,
"learning_rate": 1.6000000000000003e-05,
"epoch": 2.0,
"step": 500
},
{
"eval_loss": 0.33885374665260315,
"eval_accuracy": 0.883,
"eval_f1_score": 0.8825473908240207,
"eval_runtime": 4.3879,
"eval_samples_per_second": 455.804,
"eval_steps_per_second": 7.293,
"epoch": 2.0,
"step": 500
},
{
"loss": 0.258,
"grad_norm": 7.1059184074401855,
"learning_rate": 1.4e-05,
"epoch": 3.0,
"step": 750
},
{
"eval_loss": 0.2948116958141327,
"eval_accuracy": 0.8945,
"eval_f1_score": 0.8950761079856652,
"eval_runtime": 4.3338,
"eval_samples_per_second": 461.493,
"eval_steps_per_second": 7.384,
"epoch": 3.0,
"step": 750
},
{
"loss": 0.1744,
"grad_norm": 14.858270645141602,
"learning_rate": 1.2e-05,
"epoch": 4.0,
"step": 1000
},
{
"eval_loss": 0.28408634662628174,
"eval_accuracy": 0.9035,
"eval_f1_score": 0.9038343567745306,
"eval_runtime": 4.2528,
"eval_samples_per_second": 470.273,
"eval_steps_per_second": 7.524,
"epoch": 4.0,
"step": 1000
},
{
"loss": 0.132,
"grad_norm": 10.98675537109375,
"learning_rate": 1e-05,
"epoch": 5.0,
"step": 1250
},
{
"eval_loss": 0.29371485114097595,
"eval_accuracy": 0.8985,
"eval_f1_score": 0.8982752344158218,
"eval_runtime": 4.368,
"eval_samples_per_second": 457.874,
"eval_steps_per_second": 7.326,
"epoch": 5.0,
"step": 1250
},
{
"loss": 0.1078,
"grad_norm": 1.496416449546814,
"learning_rate": 8.000000000000001e-06,
"epoch": 6.0,
"step": 1500
},
{
"eval_loss": 0.27695581316947937,
"eval_accuracy": 0.9055,
"eval_f1_score": 0.9053682374738501,
"eval_runtime": 4.2619,
"eval_samples_per_second": 469.274,
"eval_steps_per_second": 7.508,
"epoch": 6.0,
"step": 1500
},
{
"loss": 0.0888,
"grad_norm": 2.4278478622436523,
"learning_rate": 6e-06,
"epoch": 7.0,
"step": 1750
},
{
"eval_loss": 0.3017047643661499,
"eval_accuracy": 0.903,
"eval_f1_score": 0.9027803797533116,
"eval_runtime": 4.3556,
"eval_samples_per_second": 459.177,
"eval_steps_per_second": 7.347,
"epoch": 7.0,
"step": 1750
},
{
"loss": 0.0739,
"grad_norm": 2.9510045051574707,
"learning_rate": 4.000000000000001e-06,
"epoch": 8.0,
"step": 2000
},
{
"eval_loss": 0.28290167450904846,
"eval_accuracy": 0.9095,
"eval_f1_score": 0.9096314060750578,
"eval_runtime": 4.3333,
"eval_samples_per_second": 461.542,
"eval_steps_per_second": 7.385,
"epoch": 8.0,
"step": 2000
},
{
"loss": 0.0611,
"grad_norm": 9.680631637573242,
"learning_rate": 2.0000000000000003e-06,
"epoch": 9.0,
"step": 2250
},
{
"eval_loss": 0.306204229593277,
"eval_accuracy": 0.91,
"eval_f1_score": 0.9101550513849552,
"eval_runtime": 4.3144,
"eval_samples_per_second": 463.564,
"eval_steps_per_second": 7.417,
"epoch": 9.0,
"step": 2250
},
{
"loss": 0.0506,
"grad_norm": 3.266329050064087,
"learning_rate": 0.0,
"epoch": 10.0,
"step": 2500
},
{
"eval_loss": 0.3068402111530304,
"eval_accuracy": 0.9085,
"eval_f1_score": 0.9086081968965166,
"eval_runtime": 4.2835,
"eval_samples_per_second": 466.913,
"eval_steps_per_second": 7.471,
"epoch": 10.0,
"step": 2500
},
{
"train_runtime": 1334.5966,
"train_samples_per_second": 119.886,
"train_steps_per_second": 1.873,
"total_flos": 3891507413760000.0,
"train_loss": 0.23506236724853516,
"epoch": 10.0,
"step": 2500
},
{
"eval_loss": 0.3068402111530304,
"eval_accuracy": 0.9085,
"eval_f1_score": 0.9086081968965166,
"eval_runtime": 4.7892,
"eval_samples_per_second": 417.606,
"eval_steps_per_second": 6.682,
"epoch": 10.0,
"step": 2500
}
]