{ "best_metric": 0.17333222008850296, "best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-2127", "epoch": 9.988249118683902, "eval_steps": 500, "global_step": 4250, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.9988249118683902, "eval_accuracy": 0.7849561289082346, "eval_f1": 0.1613083366573594, "eval_loss": 0.6610585451126099, "eval_precision": 0.08832008386476806, "eval_recall": 0.9292279411764706, "eval_runtime": 14.7703, "eval_samples_per_second": 461.06, "eval_steps_per_second": 57.683, "step": 425 }, { "epoch": 1.1750881316098707, "grad_norm": 1.4406206607818604, "learning_rate": 4.411764705882353e-05, "loss": 0.3349, "step": 500 }, { "epoch": 2.0, "eval_accuracy": 0.7551182940392986, "eval_f1": 0.14506880733944952, "eval_loss": 0.9204075932502747, "eval_precision": 0.07866915422885572, "eval_recall": 0.9301470588235294, "eval_runtime": 14.7453, "eval_samples_per_second": 461.844, "eval_steps_per_second": 57.781, "step": 851 }, { "epoch": 2.3501762632197414, "grad_norm": 2.988006591796875, "learning_rate": 3.8235294117647055e-05, "loss": 0.1788, "step": 1000 }, { "epoch": 2.99882491186839, "eval_accuracy": 0.7645035495077375, "eval_f1": 0.15487907225146869, "eval_loss": 0.9544711709022522, "eval_precision": 0.0844496214327315, "eval_recall": 0.9329044117647058, "eval_runtime": 14.8107, "eval_samples_per_second": 459.803, "eval_steps_per_second": 57.526, "step": 1276 }, { "epoch": 3.525264394829612, "grad_norm": 1.1645787954330444, "learning_rate": 3.235294117647059e-05, "loss": 0.1227, "step": 1500 }, { "epoch": 4.0, "eval_accuracy": 0.7692064756203056, "eval_f1": 0.1618332674832082, "eval_loss": 1.0923608541488647, "eval_precision": 0.08852770813521224, "eval_recall": 0.9411764705882353, "eval_runtime": 14.8211, "eval_samples_per_second": 459.48, "eval_steps_per_second": 57.486, "step": 1702 }, { "epoch": 4.700352526439483, "grad_norm": 1.1214195489883423, "learning_rate": 2.647058823529412e-05, "loss": 0.0856, "step": 2000 }, { "epoch": 4.9988249118683905, "eval_accuracy": 0.7932840841995413, "eval_f1": 0.17333222008850296, "eval_loss": 1.0502684116363525, "eval_precision": 0.09532555790247038, "eval_recall": 0.9540441176470589, "eval_runtime": 14.797, "eval_samples_per_second": 460.229, "eval_steps_per_second": 57.579, "step": 2127 }, { "epoch": 5.875440658049354, "grad_norm": 1.1390776634216309, "learning_rate": 2.058823529411765e-05, "loss": 0.0597, "step": 2500 }, { "epoch": 6.0, "eval_accuracy": 0.7787771018990209, "eval_f1": 0.16632958498503356, "eval_loss": 1.2641881704330444, "eval_precision": 0.0911913421449481, "eval_recall": 0.9448529411764706, "eval_runtime": 14.5753, "eval_samples_per_second": 467.228, "eval_steps_per_second": 58.455, "step": 2553 }, { "epoch": 6.9988249118683905, "eval_accuracy": 0.7828758564817994, "eval_f1": 0.16898640903880793, "eval_loss": 1.3261910676956177, "eval_precision": 0.09275570735214812, "eval_recall": 0.9485294117647058, "eval_runtime": 14.5617, "eval_samples_per_second": 467.665, "eval_steps_per_second": 58.51, "step": 2978 }, { "epoch": 7.050528789659224, "grad_norm": 0.5195357799530029, "learning_rate": 1.4705882352941177e-05, "loss": 0.0458, "step": 3000 }, { "epoch": 8.0, "eval_accuracy": 0.7848943386381424, "eval_f1": 0.1687846203975236, "eval_loss": 1.3697636127471924, "eval_precision": 0.09259921344297461, "eval_recall": 0.9522058823529411, "eval_runtime": 14.6608, "eval_samples_per_second": 464.504, "eval_steps_per_second": 58.114, "step": 3404 }, { "epoch": 8.225616921269095, "grad_norm": 0.8723571300506592, "learning_rate": 8.823529411764707e-06, "loss": 0.0343, "step": 3500 }, { "epoch": 8.99882491186839, "eval_accuracy": 0.782223625853049, "eval_f1": 0.165499476776946, "eval_loss": 1.4433350563049316, "eval_precision": 0.09069254521393913, "eval_recall": 0.9448529411764706, "eval_runtime": 15.0504, "eval_samples_per_second": 452.478, "eval_steps_per_second": 56.61, "step": 3829 }, { "epoch": 9.400705052878966, "grad_norm": 0.6075822710990906, "learning_rate": 2.9411764705882355e-06, "loss": 0.0292, "step": 4000 }, { "epoch": 9.988249118683902, "eval_accuracy": 0.7820794485561674, "eval_f1": 0.16674769081186194, "eval_loss": 1.4861844778060913, "eval_precision": 0.0914341567442687, "eval_recall": 0.9457720588235294, "eval_runtime": 15.3988, "eval_samples_per_second": 442.243, "eval_steps_per_second": 55.329, "step": 4250 }, { "epoch": 9.988249118683902, "step": 4250, "total_flos": 1.2649810588547778e+16, "train_loss": 0.10639642311544979, "train_runtime": 1208.2019, "train_samples_per_second": 225.368, "train_steps_per_second": 3.518 } ], "logging_steps": 500, "max_steps": 4250, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.2649810588547778e+16, "train_batch_size": 32, "trial_name": null, "trial_params": null }