{ "best_metric": 0.46745064854621887, "best_model_checkpoint": "secdisclosure-28l/checkpoint-1144", "epoch": 4.0, "eval_steps": 500, "global_step": 1144, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.35, "grad_norm": 6.856657028198242, "learning_rate": 6.993006993006993e-05, "loss": 3.041, "step": 100 }, { "epoch": 0.7, "grad_norm": 6.033031940460205, "learning_rate": 9.953357444566039e-05, "loss": 1.4738, "step": 200 }, { "epoch": 1.0, "eval_accuracy": 0.7657342657342657, "eval_f1_macro": 0.7322810229739891, "eval_f1_micro": 0.7657342657342657, "eval_f1_weighted": 0.7319583902073576, "eval_loss": 0.7600058317184448, "eval_precision_macro": 0.7772121039163471, "eval_precision_micro": 0.7657342657342657, "eval_precision_weighted": 0.777820015465549, "eval_recall_macro": 0.7660714285714286, "eval_recall_micro": 0.7657342657342657, "eval_recall_weighted": 0.7657342657342657, "eval_runtime": 1.7527, "eval_samples_per_second": 326.349, "eval_steps_per_second": 20.539, "step": 286 }, { "epoch": 1.05, "grad_norm": 10.619550704956055, "learning_rate": 9.641839665080363e-05, "loss": 0.9378, "step": 300 }, { "epoch": 1.4, "grad_norm": 9.948221206665039, "learning_rate": 9.055104900071376e-05, "loss": 0.8129, "step": 400 }, { "epoch": 1.75, "grad_norm": 7.1508331298828125, "learning_rate": 8.22794094596864e-05, "loss": 0.7151, "step": 500 }, { "epoch": 2.0, "eval_accuracy": 0.8479020979020979, "eval_f1_macro": 0.829473167087361, "eval_f1_micro": 0.8479020979020979, "eval_f1_weighted": 0.8300987833238631, "eval_loss": 0.5089119672775269, "eval_precision_macro": 0.836193234751419, "eval_precision_micro": 0.8479020979020979, "eval_precision_weighted": 0.8361691693898202, "eval_recall_macro": 0.8466836734693878, "eval_recall_micro": 0.8479020979020979, "eval_recall_weighted": 0.8479020979020979, "eval_runtime": 1.7558, "eval_samples_per_second": 325.77, "eval_steps_per_second": 20.503, "step": 572 }, { "epoch": 2.1, "grad_norm": 2.9531726837158203, "learning_rate": 7.209390765564318e-05, "loss": 0.5656, "step": 600 }, { "epoch": 2.45, "grad_norm": 10.758095741271973, "learning_rate": 6.0598447063242855e-05, "loss": 0.5336, "step": 700 }, { "epoch": 2.8, "grad_norm": 4.116607666015625, "learning_rate": 4.847459926610619e-05, "loss": 0.5038, "step": 800 }, { "epoch": 3.0, "eval_accuracy": 0.8531468531468531, "eval_f1_macro": 0.846184381645929, "eval_f1_micro": 0.8531468531468531, "eval_f1_weighted": 0.8463781132711151, "eval_loss": 0.5155654549598694, "eval_precision_macro": 0.8566878178421709, "eval_precision_micro": 0.8531468531468531, "eval_precision_weighted": 0.8564943001399524, "eval_recall_macro": 0.8526360544217687, "eval_recall_micro": 0.8531468531468531, "eval_recall_weighted": 0.8531468531468531, "eval_runtime": 1.7505, "eval_samples_per_second": 326.761, "eval_steps_per_second": 20.565, "step": 858 }, { "epoch": 3.15, "grad_norm": 1.0205471515655518, "learning_rate": 3.644119323817915e-05, "loss": 0.4418, "step": 900 }, { "epoch": 3.5, "grad_norm": 3.8727076053619385, "learning_rate": 2.5211695615868458e-05, "loss": 0.4395, "step": 1000 }, { "epoch": 3.85, "grad_norm": 6.586015224456787, "learning_rate": 1.5540241344564915e-05, "loss": 0.3997, "step": 1100 }, { "epoch": 4.0, "eval_accuracy": 0.8479020979020979, "eval_f1_macro": 0.8387454620589124, "eval_f1_micro": 0.8479020979020979, "eval_f1_weighted": 0.839048143374511, "eval_loss": 0.46745064854621887, "eval_precision_macro": 0.8497877221797597, "eval_precision_micro": 0.8479020979020979, "eval_precision_weighted": 0.849814874011776, "eval_recall_macro": 0.8473639455782312, "eval_recall_micro": 0.8479020979020979, "eval_recall_weighted": 0.8479020979020979, "eval_runtime": 1.7526, "eval_samples_per_second": 326.368, "eval_steps_per_second": 20.541, "step": 1144 } ], "logging_steps": 100, "max_steps": 1430, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "total_flos": 1210783301566464.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }