{ "best_metric": null, "best_model_checkpoint": null, "epoch": 15.0, "eval_steps": 500, "global_step": 1875, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.7046678635547576, "eval_f1": 0.0, "eval_loss": 2.1335229873657227, "eval_precision": 0.0, "eval_recall": 0.0, "eval_runtime": 0.4061, "eval_samples_per_second": 246.215, "eval_steps_per_second": 32.008, "step": 125 }, { "epoch": 2.0, "eval_accuracy": 0.7522441651705566, "eval_f1": 0.14012738853503184, "eval_loss": 2.0110883712768555, "eval_precision": 0.55, "eval_recall": 0.08029197080291971, "eval_runtime": 0.3251, "eval_samples_per_second": 307.591, "eval_steps_per_second": 39.987, "step": 250 }, { "epoch": 3.0, "eval_accuracy": 0.7630161579892281, "eval_f1": 0.19540229885057472, "eval_loss": 1.8779761791229248, "eval_precision": 0.4594594594594595, "eval_recall": 0.12408759124087591, "eval_runtime": 0.3807, "eval_samples_per_second": 262.671, "eval_steps_per_second": 34.147, "step": 375 }, { "epoch": 4.0, "grad_norm": 1.3616102933883667, "learning_rate": 1.4666666666666666e-05, "loss": 2.0165, "step": 500 }, { "epoch": 4.0, "eval_accuracy": 0.8788150807899462, "eval_f1": 0.55, "eval_loss": 1.7138484716415405, "eval_precision": 0.6407766990291263, "eval_recall": 0.48175182481751827, "eval_runtime": 0.3747, "eval_samples_per_second": 266.905, "eval_steps_per_second": 34.698, "step": 500 }, { "epoch": 5.0, "eval_accuracy": 0.8940754039497307, "eval_f1": 0.6439393939393939, "eval_loss": 1.5681630373001099, "eval_precision": 0.6692913385826772, "eval_recall": 0.6204379562043796, "eval_runtime": 0.4653, "eval_samples_per_second": 214.902, "eval_steps_per_second": 27.937, "step": 625 }, { "epoch": 6.0, "eval_accuracy": 0.9371633752244165, "eval_f1": 0.7970479704797049, "eval_loss": 1.4361474514007568, "eval_precision": 0.8059701492537313, "eval_recall": 0.7883211678832117, "eval_runtime": 0.4338, "eval_samples_per_second": 230.535, "eval_steps_per_second": 29.97, "step": 750 }, { "epoch": 7.0, "eval_accuracy": 0.9488330341113106, "eval_f1": 0.8475836431226766, "eval_loss": 1.3189666271209717, "eval_precision": 0.8636363636363636, "eval_recall": 0.8321167883211679, "eval_runtime": 0.462, "eval_samples_per_second": 216.445, "eval_steps_per_second": 28.138, "step": 875 }, { "epoch": 8.0, "grad_norm": 1.242187738418579, "learning_rate": 9.333333333333334e-06, "loss": 1.461, "step": 1000 }, { "epoch": 8.0, "eval_accuracy": 0.9605026929982047, "eval_f1": 0.8623188405797102, "eval_loss": 1.2221076488494873, "eval_precision": 0.8561151079136691, "eval_recall": 0.8686131386861314, "eval_runtime": 0.4073, "eval_samples_per_second": 245.517, "eval_steps_per_second": 31.917, "step": 1000 }, { "epoch": 9.0, "eval_accuracy": 0.966786355475763, "eval_f1": 0.9051094890510949, "eval_loss": 1.1402738094329834, "eval_precision": 0.9051094890510949, "eval_recall": 0.9051094890510949, "eval_runtime": 0.4198, "eval_samples_per_second": 238.187, "eval_steps_per_second": 30.964, "step": 1125 }, { "epoch": 10.0, "eval_accuracy": 0.966786355475763, "eval_f1": 0.8864468864468864, "eval_loss": 1.0731350183486938, "eval_precision": 0.8897058823529411, "eval_recall": 0.8832116788321168, "eval_runtime": 0.3515, "eval_samples_per_second": 284.456, "eval_steps_per_second": 36.979, "step": 1250 }, { "epoch": 11.0, "eval_accuracy": 0.9676840215439856, "eval_f1": 0.9025270758122744, "eval_loss": 1.0196468830108643, "eval_precision": 0.8928571428571429, "eval_recall": 0.9124087591240876, "eval_runtime": 0.414, "eval_samples_per_second": 241.566, "eval_steps_per_second": 31.404, "step": 1375 }, { "epoch": 12.0, "grad_norm": 1.1379125118255615, "learning_rate": 4.000000000000001e-06, "loss": 1.0954, "step": 1500 }, { "epoch": 12.0, "eval_accuracy": 0.9685816876122083, "eval_f1": 0.9057971014492754, "eval_loss": 0.9813730120658875, "eval_precision": 0.8992805755395683, "eval_recall": 0.9124087591240876, "eval_runtime": 0.3827, "eval_samples_per_second": 261.27, "eval_steps_per_second": 33.965, "step": 1500 }, { "epoch": 13.0, "eval_accuracy": 0.9703770197486535, "eval_f1": 0.9057971014492754, "eval_loss": 0.955005943775177, "eval_precision": 0.8992805755395683, "eval_recall": 0.9124087591240876, "eval_runtime": 0.4058, "eval_samples_per_second": 246.447, "eval_steps_per_second": 32.038, "step": 1625 }, { "epoch": 14.0, "eval_accuracy": 0.9694793536804309, "eval_f1": 0.9025270758122744, "eval_loss": 0.9408783912658691, "eval_precision": 0.8928571428571429, "eval_recall": 0.9124087591240876, "eval_runtime": 0.3995, "eval_samples_per_second": 250.335, "eval_steps_per_second": 32.544, "step": 1750 }, { "epoch": 15.0, "eval_accuracy": 0.9703770197486535, "eval_f1": 0.9057971014492754, "eval_loss": 0.9354953169822693, "eval_precision": 0.8992805755395683, "eval_recall": 0.9124087591240876, "eval_runtime": 0.4049, "eval_samples_per_second": 246.945, "eval_steps_per_second": 32.103, "step": 1875 } ], "logging_steps": 500, "max_steps": 1875, "num_input_tokens_seen": 0, "num_train_epochs": 15, "save_steps": 500, "total_flos": 153789736715280.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }