{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 25, "global_step": 294, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.17006802721088435, "grad_norm": 1.6777013540267944, "learning_rate": 2.0833333333333336e-05, "loss": 0.6393, "step": 25 }, { "epoch": 0.17006802721088435, "eval_accuracy": 0.8759578544061303, "eval_auc": 0.9551645887113986, "eval_f1": 0.7757575757575758, "eval_loss": 0.4472915232181549, "eval_precision": 0.9032258064516129, "eval_recall": 0.6798179059180577, "eval_runtime": 3.1514, "eval_samples_per_second": 662.564, "eval_steps_per_second": 2.856, "step": 25 }, { "epoch": 0.3401360544217687, "grad_norm": 1.745863676071167, "learning_rate": 2.464764460404427e-05, "loss": 0.2377, "step": 50 }, { "epoch": 0.3401360544217687, "eval_accuracy": 0.9439655172413793, "eval_auc": 0.9875004114850522, "eval_f1": 0.9164882226980728, "eval_loss": 0.15234950184822083, "eval_precision": 0.8652291105121294, "eval_recall": 0.9742033383915023, "eval_runtime": 3.1506, "eval_samples_per_second": 662.733, "eval_steps_per_second": 2.857, "step": 50 }, { "epoch": 0.5102040816326531, "grad_norm": 2.064204454421997, "learning_rate": 2.3250178002596257e-05, "loss": 0.1276, "step": 75 }, { "epoch": 0.5102040816326531, "eval_accuracy": 0.9640804597701149, "eval_auc": 0.9937093227115326, "eval_f1": 0.9433106575963719, "eval_loss": 0.09969615936279297, "eval_precision": 0.9397590361445783, "eval_recall": 0.9468892261001517, "eval_runtime": 3.1599, "eval_samples_per_second": 660.782, "eval_steps_per_second": 2.848, "step": 75 }, { "epoch": 0.6802721088435374, "grad_norm": 1.5831186771392822, "learning_rate": 2.090825467126566e-05, "loss": 0.1035, "step": 100 }, { "epoch": 0.6802721088435374, "eval_accuracy": 0.9664750957854407, "eval_auc": 0.9949464326104294, "eval_f1": 0.9470499243570348, "eval_loss": 0.08703920990228653, "eval_precision": 0.9441930618401206, "eval_recall": 0.9499241274658573, "eval_runtime": 3.1587, "eval_samples_per_second": 661.03, "eval_steps_per_second": 2.849, "step": 100 }, { "epoch": 0.8503401360544217, "grad_norm": 1.721102237701416, "learning_rate": 1.7827624249789604e-05, "loss": 0.0727, "step": 125 }, { "epoch": 0.8503401360544217, "eval_accuracy": 0.9669540229885057, "eval_auc": 0.9950313843631433, "eval_f1": 0.9473684210526315, "eval_loss": 0.08850479125976562, "eval_precision": 0.9524539877300614, "eval_recall": 0.9423368740515933, "eval_runtime": 3.1669, "eval_samples_per_second": 659.316, "eval_steps_per_second": 2.842, "step": 125 }, { "epoch": 1.0204081632653061, "grad_norm": 1.1454665660858154, "learning_rate": 1.4278935478416066e-05, "loss": 0.0885, "step": 150 }, { "epoch": 1.0204081632653061, "eval_accuracy": 0.9712643678160919, "eval_auc": 0.9956770176837693, "eval_f1": 0.9541984732824428, "eval_loss": 0.08041754364967346, "eval_precision": 0.9600614439324117, "eval_recall": 0.9484066767830045, "eval_runtime": 3.1728, "eval_samples_per_second": 658.102, "eval_steps_per_second": 2.837, "step": 150 }, { "epoch": 1.1904761904761905, "grad_norm": 0.6928611397743225, "learning_rate": 1.0573958356820683e-05, "loss": 0.0614, "step": 175 }, { "epoch": 1.1904761904761905, "eval_accuracy": 0.9746168582375478, "eval_auc": 0.9961580569835119, "eval_f1": 0.9594491201224178, "eval_loss": 0.07542683929204941, "eval_precision": 0.9675925925925926, "eval_recall": 0.9514415781487102, "eval_runtime": 3.1686, "eval_samples_per_second": 658.971, "eval_steps_per_second": 2.84, "step": 175 }, { "epoch": 1.3605442176870748, "grad_norm": 0.8894994854927063, "learning_rate": 7.038193595383008e-06, "loss": 0.0448, "step": 200 }, { "epoch": 1.3605442176870748, "eval_accuracy": 0.9712643678160919, "eval_auc": 0.9961644283649654, "eval_f1": 0.9541984732824428, "eval_loss": 0.07568268477916718, "eval_precision": 0.9600614439324117, "eval_recall": 0.9484066767830045, "eval_runtime": 3.162, "eval_samples_per_second": 660.342, "eval_steps_per_second": 2.846, "step": 200 }, { "epoch": 1.5306122448979593, "grad_norm": 0.8013057708740234, "learning_rate": 3.98227575507636e-06, "loss": 0.0547, "step": 225 }, { "epoch": 1.5306122448979593, "eval_accuracy": 0.9717432950191571, "eval_auc": 0.9963555698085719, "eval_f1": 0.954858454475899, "eval_loss": 0.07406975328922272, "eval_precision": 0.9629629629629629, "eval_recall": 0.9468892261001517, "eval_runtime": 3.1666, "eval_samples_per_second": 659.381, "eval_steps_per_second": 2.842, "step": 225 }, { "epoch": 1.7006802721088436, "grad_norm": 1.3701196908950806, "learning_rate": 1.6746824526945163e-06, "loss": 0.048, "step": 250 }, { "epoch": 1.7006802721088436, "eval_accuracy": 0.9746168582375478, "eval_auc": 0.9967229861390597, "eval_f1": 0.9601203912716328, "eval_loss": 0.07189524918794632, "eval_precision": 0.9522388059701492, "eval_recall": 0.9681335356600911, "eval_runtime": 3.1579, "eval_samples_per_second": 661.202, "eval_steps_per_second": 2.85, "step": 250 }, { "epoch": 1.870748299319728, "grad_norm": 1.6245123147964478, "learning_rate": 3.181472637875868e-07, "loss": 0.0473, "step": 275 }, { "epoch": 1.870748299319728, "eval_accuracy": 0.975095785440613, "eval_auc": 0.9967537811494185, "eval_f1": 0.9606060606060606, "eval_loss": 0.07112736254930496, "eval_precision": 0.9591527987897126, "eval_recall": 0.9620637329286799, "eval_runtime": 3.1642, "eval_samples_per_second": 659.887, "eval_steps_per_second": 2.844, "step": 275 } ], "logging_steps": 25, "max_steps": 294, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.01 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4985013284110336.0, "train_batch_size": 32, "trial_name": null, "trial_params": null }