{ "best_metric": 0.6880093812942505, "best_model_checkpoint": "/content/drive/MyDrive/NLP/HW_2/LORA_AUG_GREATER/checkpoint-2250", "epoch": 1.027749229188078, "eval_steps": 250, "global_step": 3000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08564576909900651, "grad_norm": 13.67185115814209, "learning_rate": 4.9143542309009935e-05, "loss": 0.968, "step": 250 }, { "epoch": 0.08564576909900651, "eval_accuracy": 0.6621503496503497, "eval_f1": 0.6578961107049218, "eval_loss": 0.7989072799682617, "eval_precision": 0.6884699634047233, "eval_recall": 0.6621503496503497, "eval_runtime": 154.1603, "eval_samples_per_second": 14.842, "eval_steps_per_second": 0.467, "step": 250 }, { "epoch": 0.17129153819801302, "grad_norm": 6.074775218963623, "learning_rate": 4.8287084618019874e-05, "loss": 0.6005, "step": 500 }, { "epoch": 0.17129153819801302, "eval_accuracy": 0.7145979020979021, "eval_f1": 0.7094250928713455, "eval_loss": 0.7962299585342407, "eval_precision": 0.7248927170802172, "eval_recall": 0.7145979020979021, "eval_runtime": 156.9154, "eval_samples_per_second": 14.581, "eval_steps_per_second": 0.459, "step": 500 }, { "epoch": 0.2569373072970195, "grad_norm": 4.536829471588135, "learning_rate": 4.7430626927029806e-05, "loss": 0.5234, "step": 750 }, { "epoch": 0.2569373072970195, "eval_accuracy": 0.7447552447552448, "eval_f1": 0.7415547688528572, "eval_loss": 0.7522953152656555, "eval_precision": 0.7440074461201359, "eval_recall": 0.7447552447552448, "eval_runtime": 156.5246, "eval_samples_per_second": 14.618, "eval_steps_per_second": 0.46, "step": 750 }, { "epoch": 0.34258307639602603, "grad_norm": 6.346406936645508, "learning_rate": 4.6574169236039745e-05, "loss": 0.4843, "step": 1000 }, { "epoch": 0.34258307639602603, "eval_accuracy": 0.7097902097902098, "eval_f1": 0.6943907906472824, "eval_loss": 0.9078171849250793, "eval_precision": 0.7203113605498284, "eval_recall": 0.7097902097902098, "eval_runtime": 156.7861, "eval_samples_per_second": 14.593, "eval_steps_per_second": 0.459, "step": 1000 }, { "epoch": 0.42822884549503254, "grad_norm": 4.173954486846924, "learning_rate": 4.571771154504968e-05, "loss": 0.4593, "step": 1250 }, { "epoch": 0.42822884549503254, "eval_accuracy": 0.729458041958042, "eval_f1": 0.7221199642365762, "eval_loss": 0.8215415477752686, "eval_precision": 0.739759876127429, "eval_recall": 0.729458041958042, "eval_runtime": 156.9233, "eval_samples_per_second": 14.58, "eval_steps_per_second": 0.459, "step": 1250 }, { "epoch": 0.513874614594039, "grad_norm": 5.52806282043457, "learning_rate": 4.486125385405961e-05, "loss": 0.4274, "step": 1500 }, { "epoch": 0.513874614594039, "eval_accuracy": 0.736013986013986, "eval_f1": 0.7246914620886431, "eval_loss": 0.8586153388023376, "eval_precision": 0.7426058097276803, "eval_recall": 0.736013986013986, "eval_runtime": 154.5125, "eval_samples_per_second": 14.808, "eval_steps_per_second": 0.466, "step": 1500 }, { "epoch": 0.5995203836930456, "grad_norm": 4.5602898597717285, "learning_rate": 4.400479616306955e-05, "loss": 0.4272, "step": 1750 }, { "epoch": 0.5995203836930456, "eval_accuracy": 0.7596153846153846, "eval_f1": 0.75381427721012, "eval_loss": 0.7090545296669006, "eval_precision": 0.7584790142699676, "eval_recall": 0.7596153846153846, "eval_runtime": 154.0482, "eval_samples_per_second": 14.852, "eval_steps_per_second": 0.467, "step": 1750 }, { "epoch": 0.6851661527920521, "grad_norm": 4.625553607940674, "learning_rate": 4.314833847207948e-05, "loss": 0.3975, "step": 2000 }, { "epoch": 0.6851661527920521, "eval_accuracy": 0.7543706293706294, "eval_f1": 0.745514070883235, "eval_loss": 0.7486010193824768, "eval_precision": 0.7586667809302097, "eval_recall": 0.7543706293706294, "eval_runtime": 154.7216, "eval_samples_per_second": 14.788, "eval_steps_per_second": 0.465, "step": 2000 }, { "epoch": 0.7708119218910586, "grad_norm": 4.490630626678467, "learning_rate": 4.229188078108942e-05, "loss": 0.3916, "step": 2250 }, { "epoch": 0.7708119218910586, "eval_accuracy": 0.7539335664335665, "eval_f1": 0.7428581792552783, "eval_loss": 0.6880093812942505, "eval_precision": 0.7522960627196311, "eval_recall": 0.7539335664335665, "eval_runtime": 154.2223, "eval_samples_per_second": 14.836, "eval_steps_per_second": 0.467, "step": 2250 }, { "epoch": 0.8564576909900651, "grad_norm": 2.426281452178955, "learning_rate": 4.143542309009935e-05, "loss": 0.3835, "step": 2500 }, { "epoch": 0.8564576909900651, "eval_accuracy": 0.7556818181818182, "eval_f1": 0.7470266895815383, "eval_loss": 0.7790956497192383, "eval_precision": 0.7554129003944599, "eval_recall": 0.7556818181818182, "eval_runtime": 158.6172, "eval_samples_per_second": 14.425, "eval_steps_per_second": 0.454, "step": 2500 }, { "epoch": 0.9421034600890716, "grad_norm": 2.668764591217041, "learning_rate": 4.0578965399109283e-05, "loss": 0.3744, "step": 2750 }, { "epoch": 0.9421034600890716, "eval_accuracy": 0.7552447552447552, "eval_f1": 0.7477654252537256, "eval_loss": 0.7511053085327148, "eval_precision": 0.7591641056843698, "eval_recall": 0.7552447552447552, "eval_runtime": 158.6161, "eval_samples_per_second": 14.425, "eval_steps_per_second": 0.454, "step": 2750 }, { "epoch": 1.027749229188078, "grad_norm": 4.129087448120117, "learning_rate": 3.972250770811922e-05, "loss": 0.3902, "step": 3000 }, { "epoch": 1.027749229188078, "eval_accuracy": 0.7683566433566433, "eval_f1": 0.7647199540664621, "eval_loss": 0.6985490918159485, "eval_precision": 0.7701853452045943, "eval_recall": 0.7683566433566433, "eval_runtime": 150.2872, "eval_samples_per_second": 15.224, "eval_steps_per_second": 0.479, "step": 3000 } ], "logging_steps": 250, "max_steps": 14595, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 250, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 10, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 9.7057888093675e+16, "train_batch_size": 32, "trial_name": null, "trial_params": null }