|
{ |
|
"best_metric": 0.46745064854621887, |
|
"best_model_checkpoint": "secdisclosure-28l/checkpoint-1144", |
|
"epoch": 4.0, |
|
"eval_steps": 500, |
|
"global_step": 1144, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 6.856657028198242, |
|
"learning_rate": 6.993006993006993e-05, |
|
"loss": 3.041, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 6.033031940460205, |
|
"learning_rate": 9.953357444566039e-05, |
|
"loss": 1.4738, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7657342657342657, |
|
"eval_f1_macro": 0.7322810229739891, |
|
"eval_f1_micro": 0.7657342657342657, |
|
"eval_f1_weighted": 0.7319583902073576, |
|
"eval_loss": 0.7600058317184448, |
|
"eval_precision_macro": 0.7772121039163471, |
|
"eval_precision_micro": 0.7657342657342657, |
|
"eval_precision_weighted": 0.777820015465549, |
|
"eval_recall_macro": 0.7660714285714286, |
|
"eval_recall_micro": 0.7657342657342657, |
|
"eval_recall_weighted": 0.7657342657342657, |
|
"eval_runtime": 1.7527, |
|
"eval_samples_per_second": 326.349, |
|
"eval_steps_per_second": 20.539, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 10.619550704956055, |
|
"learning_rate": 9.641839665080363e-05, |
|
"loss": 0.9378, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 9.948221206665039, |
|
"learning_rate": 9.055104900071376e-05, |
|
"loss": 0.8129, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 7.1508331298828125, |
|
"learning_rate": 8.22794094596864e-05, |
|
"loss": 0.7151, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8479020979020979, |
|
"eval_f1_macro": 0.829473167087361, |
|
"eval_f1_micro": 0.8479020979020979, |
|
"eval_f1_weighted": 0.8300987833238631, |
|
"eval_loss": 0.5089119672775269, |
|
"eval_precision_macro": 0.836193234751419, |
|
"eval_precision_micro": 0.8479020979020979, |
|
"eval_precision_weighted": 0.8361691693898202, |
|
"eval_recall_macro": 0.8466836734693878, |
|
"eval_recall_micro": 0.8479020979020979, |
|
"eval_recall_weighted": 0.8479020979020979, |
|
"eval_runtime": 1.7558, |
|
"eval_samples_per_second": 325.77, |
|
"eval_steps_per_second": 20.503, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"grad_norm": 2.9531726837158203, |
|
"learning_rate": 7.209390765564318e-05, |
|
"loss": 0.5656, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"grad_norm": 10.758095741271973, |
|
"learning_rate": 6.0598447063242855e-05, |
|
"loss": 0.5336, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"grad_norm": 4.116607666015625, |
|
"learning_rate": 4.847459926610619e-05, |
|
"loss": 0.5038, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8531468531468531, |
|
"eval_f1_macro": 0.846184381645929, |
|
"eval_f1_micro": 0.8531468531468531, |
|
"eval_f1_weighted": 0.8463781132711151, |
|
"eval_loss": 0.5155654549598694, |
|
"eval_precision_macro": 0.8566878178421709, |
|
"eval_precision_micro": 0.8531468531468531, |
|
"eval_precision_weighted": 0.8564943001399524, |
|
"eval_recall_macro": 0.8526360544217687, |
|
"eval_recall_micro": 0.8531468531468531, |
|
"eval_recall_weighted": 0.8531468531468531, |
|
"eval_runtime": 1.7505, |
|
"eval_samples_per_second": 326.761, |
|
"eval_steps_per_second": 20.565, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"grad_norm": 1.0205471515655518, |
|
"learning_rate": 3.644119323817915e-05, |
|
"loss": 0.4418, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"grad_norm": 3.8727076053619385, |
|
"learning_rate": 2.5211695615868458e-05, |
|
"loss": 0.4395, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"grad_norm": 6.586015224456787, |
|
"learning_rate": 1.5540241344564915e-05, |
|
"loss": 0.3997, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8479020979020979, |
|
"eval_f1_macro": 0.8387454620589124, |
|
"eval_f1_micro": 0.8479020979020979, |
|
"eval_f1_weighted": 0.839048143374511, |
|
"eval_loss": 0.46745064854621887, |
|
"eval_precision_macro": 0.8497877221797597, |
|
"eval_precision_micro": 0.8479020979020979, |
|
"eval_precision_weighted": 0.849814874011776, |
|
"eval_recall_macro": 0.8473639455782312, |
|
"eval_recall_micro": 0.8479020979020979, |
|
"eval_recall_weighted": 0.8479020979020979, |
|
"eval_runtime": 1.7526, |
|
"eval_samples_per_second": 326.368, |
|
"eval_steps_per_second": 20.541, |
|
"step": 1144 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 1430, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"total_flos": 1210783301566464.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|