secdisclosure-28l / checkpoint-1144 /trainer_state.json
lomov's picture
Upload folder using huggingface_hub
0ade65e verified
{
"best_metric": 0.46745064854621887,
"best_model_checkpoint": "secdisclosure-28l/checkpoint-1144",
"epoch": 4.0,
"eval_steps": 500,
"global_step": 1144,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.35,
"grad_norm": 6.856657028198242,
"learning_rate": 6.993006993006993e-05,
"loss": 3.041,
"step": 100
},
{
"epoch": 0.7,
"grad_norm": 6.033031940460205,
"learning_rate": 9.953357444566039e-05,
"loss": 1.4738,
"step": 200
},
{
"epoch": 1.0,
"eval_accuracy": 0.7657342657342657,
"eval_f1_macro": 0.7322810229739891,
"eval_f1_micro": 0.7657342657342657,
"eval_f1_weighted": 0.7319583902073576,
"eval_loss": 0.7600058317184448,
"eval_precision_macro": 0.7772121039163471,
"eval_precision_micro": 0.7657342657342657,
"eval_precision_weighted": 0.777820015465549,
"eval_recall_macro": 0.7660714285714286,
"eval_recall_micro": 0.7657342657342657,
"eval_recall_weighted": 0.7657342657342657,
"eval_runtime": 1.7527,
"eval_samples_per_second": 326.349,
"eval_steps_per_second": 20.539,
"step": 286
},
{
"epoch": 1.05,
"grad_norm": 10.619550704956055,
"learning_rate": 9.641839665080363e-05,
"loss": 0.9378,
"step": 300
},
{
"epoch": 1.4,
"grad_norm": 9.948221206665039,
"learning_rate": 9.055104900071376e-05,
"loss": 0.8129,
"step": 400
},
{
"epoch": 1.75,
"grad_norm": 7.1508331298828125,
"learning_rate": 8.22794094596864e-05,
"loss": 0.7151,
"step": 500
},
{
"epoch": 2.0,
"eval_accuracy": 0.8479020979020979,
"eval_f1_macro": 0.829473167087361,
"eval_f1_micro": 0.8479020979020979,
"eval_f1_weighted": 0.8300987833238631,
"eval_loss": 0.5089119672775269,
"eval_precision_macro": 0.836193234751419,
"eval_precision_micro": 0.8479020979020979,
"eval_precision_weighted": 0.8361691693898202,
"eval_recall_macro": 0.8466836734693878,
"eval_recall_micro": 0.8479020979020979,
"eval_recall_weighted": 0.8479020979020979,
"eval_runtime": 1.7558,
"eval_samples_per_second": 325.77,
"eval_steps_per_second": 20.503,
"step": 572
},
{
"epoch": 2.1,
"grad_norm": 2.9531726837158203,
"learning_rate": 7.209390765564318e-05,
"loss": 0.5656,
"step": 600
},
{
"epoch": 2.45,
"grad_norm": 10.758095741271973,
"learning_rate": 6.0598447063242855e-05,
"loss": 0.5336,
"step": 700
},
{
"epoch": 2.8,
"grad_norm": 4.116607666015625,
"learning_rate": 4.847459926610619e-05,
"loss": 0.5038,
"step": 800
},
{
"epoch": 3.0,
"eval_accuracy": 0.8531468531468531,
"eval_f1_macro": 0.846184381645929,
"eval_f1_micro": 0.8531468531468531,
"eval_f1_weighted": 0.8463781132711151,
"eval_loss": 0.5155654549598694,
"eval_precision_macro": 0.8566878178421709,
"eval_precision_micro": 0.8531468531468531,
"eval_precision_weighted": 0.8564943001399524,
"eval_recall_macro": 0.8526360544217687,
"eval_recall_micro": 0.8531468531468531,
"eval_recall_weighted": 0.8531468531468531,
"eval_runtime": 1.7505,
"eval_samples_per_second": 326.761,
"eval_steps_per_second": 20.565,
"step": 858
},
{
"epoch": 3.15,
"grad_norm": 1.0205471515655518,
"learning_rate": 3.644119323817915e-05,
"loss": 0.4418,
"step": 900
},
{
"epoch": 3.5,
"grad_norm": 3.8727076053619385,
"learning_rate": 2.5211695615868458e-05,
"loss": 0.4395,
"step": 1000
},
{
"epoch": 3.85,
"grad_norm": 6.586015224456787,
"learning_rate": 1.5540241344564915e-05,
"loss": 0.3997,
"step": 1100
},
{
"epoch": 4.0,
"eval_accuracy": 0.8479020979020979,
"eval_f1_macro": 0.8387454620589124,
"eval_f1_micro": 0.8479020979020979,
"eval_f1_weighted": 0.839048143374511,
"eval_loss": 0.46745064854621887,
"eval_precision_macro": 0.8497877221797597,
"eval_precision_micro": 0.8479020979020979,
"eval_precision_weighted": 0.849814874011776,
"eval_recall_macro": 0.8473639455782312,
"eval_recall_micro": 0.8479020979020979,
"eval_recall_weighted": 0.8479020979020979,
"eval_runtime": 1.7526,
"eval_samples_per_second": 326.368,
"eval_steps_per_second": 20.541,
"step": 1144
}
],
"logging_steps": 100,
"max_steps": 1430,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"total_flos": 1210783301566464.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}