{ "best_metric": 0.8648728562980484, "best_model_checkpoint": "./models/entities/test_v4/checkpoint-3500", "epoch": 9.979267449896337, "eval_steps": 500, "global_step": 3610, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.38217000691085, "grad_norm": 0.9774827361106873, "learning_rate": 0.0002, "loss": 0.5764, "step": 500 }, { "epoch": 1.38217000691085, "eval_accuracy": 0.9593812772012053, "eval_f1": 0.7594292979234355, "eval_loss": 0.1411387324333191, "eval_precision": 0.708673872923807, "eval_recall": 0.8180158247108947, "eval_runtime": 199.3049, "eval_samples_per_second": 24.886, "eval_steps_per_second": 24.886, "step": 500 }, { "epoch": 2.7643400138217, "grad_norm": 0.7694200873374939, "learning_rate": 0.00016784565916398716, "loss": 0.09, "step": 1000 }, { "epoch": 2.7643400138217, "eval_accuracy": 0.9687447689989956, "eval_f1": 0.8259538274605103, "eval_loss": 0.11496158689260483, "eval_precision": 0.792852477372399, "eval_recall": 0.8619395414891459, "eval_runtime": 48.3856, "eval_samples_per_second": 102.51, "eval_steps_per_second": 102.51, "step": 1000 }, { "epoch": 4.14651002073255, "grad_norm": 0.625320315361023, "learning_rate": 0.0001356913183279743, "loss": 0.0463, "step": 1500 }, { "epoch": 4.14651002073255, "eval_accuracy": 0.9703716103113492, "eval_f1": 0.8380395025603511, "eval_loss": 0.13039253652095795, "eval_precision": 0.8069878839109609, "eval_recall": 0.8715763846622033, "eval_runtime": 46.9044, "eval_samples_per_second": 105.747, "eval_steps_per_second": 105.747, "step": 1500 }, { "epoch": 5.5286800276434, "grad_norm": 0.4629702866077423, "learning_rate": 0.00010353697749196143, "loss": 0.023, "step": 2000 }, { "epoch": 5.5286800276434, "eval_accuracy": 0.9716950535654503, "eval_f1": 0.8417248620403379, "eval_loss": 0.1349513679742813, "eval_precision": 0.8115641774178359, "eval_recall": 0.8742138364779874, "eval_runtime": 47.4889, "eval_samples_per_second": 104.446, "eval_steps_per_second": 104.446, "step": 2000 }, { "epoch": 6.91085003455425, "grad_norm": 0.1776706427335739, "learning_rate": 7.138263665594856e-05, "loss": 0.0147, "step": 2500 }, { "epoch": 6.91085003455425, "eval_accuracy": 0.9736305239370606, "eval_f1": 0.8591340450771056, "eval_loss": 0.14083334803581238, "eval_precision": 0.8377674956622325, "eval_recall": 0.8816189896530736, "eval_runtime": 47.4474, "eval_samples_per_second": 104.537, "eval_steps_per_second": 104.537, "step": 2500 }, { "epoch": 8.2930200414651, "grad_norm": 0.7263190746307373, "learning_rate": 3.92282958199357e-05, "loss": 0.0078, "step": 3000 }, { "epoch": 8.2930200414651, "eval_accuracy": 0.9740594660194175, "eval_f1": 0.8621834580504485, "eval_loss": 0.14879639446735382, "eval_precision": 0.8428446855924814, "eval_recall": 0.8824305132886995, "eval_runtime": 49.4794, "eval_samples_per_second": 100.244, "eval_steps_per_second": 100.244, "step": 3000 }, { "epoch": 9.675190048375951, "grad_norm": 0.21800673007965088, "learning_rate": 7.07395498392283e-06, "loss": 0.0045, "step": 3500 }, { "epoch": 9.675190048375951, "eval_accuracy": 0.9743680950786743, "eval_f1": 0.8648728562980484, "eval_loss": 0.16495844721794128, "eval_precision": 0.8410005750431282, "eval_recall": 0.8901399878271454, "eval_runtime": 46.0436, "eval_samples_per_second": 107.724, "eval_steps_per_second": 107.724, "step": 3500 }, { "epoch": 9.979267449896337, "step": 3610, "total_flos": 6112468258365600.0, "train_loss": 0.10573644003214268, "train_runtime": 1226.1602, "train_samples_per_second": 188.784, "train_steps_per_second": 2.944 } ], "logging_steps": 500, "max_steps": 3610, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6112468258365600.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }