|
{ |
|
"best_metric": 0.17333222008850296, |
|
"best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-2127", |
|
"epoch": 9.988249118683902, |
|
"eval_steps": 500, |
|
"global_step": 4250, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.9988249118683902, |
|
"eval_accuracy": 0.7849561289082346, |
|
"eval_f1": 0.1613083366573594, |
|
"eval_loss": 0.6610585451126099, |
|
"eval_precision": 0.08832008386476806, |
|
"eval_recall": 0.9292279411764706, |
|
"eval_runtime": 14.7703, |
|
"eval_samples_per_second": 461.06, |
|
"eval_steps_per_second": 57.683, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.1750881316098707, |
|
"grad_norm": 1.4406206607818604, |
|
"learning_rate": 4.411764705882353e-05, |
|
"loss": 0.3349, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7551182940392986, |
|
"eval_f1": 0.14506880733944952, |
|
"eval_loss": 0.9204075932502747, |
|
"eval_precision": 0.07866915422885572, |
|
"eval_recall": 0.9301470588235294, |
|
"eval_runtime": 14.7453, |
|
"eval_samples_per_second": 461.844, |
|
"eval_steps_per_second": 57.781, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 2.3501762632197414, |
|
"grad_norm": 2.988006591796875, |
|
"learning_rate": 3.8235294117647055e-05, |
|
"loss": 0.1788, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.99882491186839, |
|
"eval_accuracy": 0.7645035495077375, |
|
"eval_f1": 0.15487907225146869, |
|
"eval_loss": 0.9544711709022522, |
|
"eval_precision": 0.0844496214327315, |
|
"eval_recall": 0.9329044117647058, |
|
"eval_runtime": 14.8107, |
|
"eval_samples_per_second": 459.803, |
|
"eval_steps_per_second": 57.526, |
|
"step": 1276 |
|
}, |
|
{ |
|
"epoch": 3.525264394829612, |
|
"grad_norm": 1.1645787954330444, |
|
"learning_rate": 3.235294117647059e-05, |
|
"loss": 0.1227, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7692064756203056, |
|
"eval_f1": 0.1618332674832082, |
|
"eval_loss": 1.0923608541488647, |
|
"eval_precision": 0.08852770813521224, |
|
"eval_recall": 0.9411764705882353, |
|
"eval_runtime": 14.8211, |
|
"eval_samples_per_second": 459.48, |
|
"eval_steps_per_second": 57.486, |
|
"step": 1702 |
|
}, |
|
{ |
|
"epoch": 4.700352526439483, |
|
"grad_norm": 1.1214195489883423, |
|
"learning_rate": 2.647058823529412e-05, |
|
"loss": 0.0856, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.9988249118683905, |
|
"eval_accuracy": 0.7932840841995413, |
|
"eval_f1": 0.17333222008850296, |
|
"eval_loss": 1.0502684116363525, |
|
"eval_precision": 0.09532555790247038, |
|
"eval_recall": 0.9540441176470589, |
|
"eval_runtime": 14.797, |
|
"eval_samples_per_second": 460.229, |
|
"eval_steps_per_second": 57.579, |
|
"step": 2127 |
|
}, |
|
{ |
|
"epoch": 5.875440658049354, |
|
"grad_norm": 1.1390776634216309, |
|
"learning_rate": 2.058823529411765e-05, |
|
"loss": 0.0597, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7787771018990209, |
|
"eval_f1": 0.16632958498503356, |
|
"eval_loss": 1.2641881704330444, |
|
"eval_precision": 0.0911913421449481, |
|
"eval_recall": 0.9448529411764706, |
|
"eval_runtime": 14.5753, |
|
"eval_samples_per_second": 467.228, |
|
"eval_steps_per_second": 58.455, |
|
"step": 2553 |
|
}, |
|
{ |
|
"epoch": 6.9988249118683905, |
|
"eval_accuracy": 0.7828758564817994, |
|
"eval_f1": 0.16898640903880793, |
|
"eval_loss": 1.3261910676956177, |
|
"eval_precision": 0.09275570735214812, |
|
"eval_recall": 0.9485294117647058, |
|
"eval_runtime": 14.5617, |
|
"eval_samples_per_second": 467.665, |
|
"eval_steps_per_second": 58.51, |
|
"step": 2978 |
|
}, |
|
{ |
|
"epoch": 7.050528789659224, |
|
"grad_norm": 0.5195357799530029, |
|
"learning_rate": 1.4705882352941177e-05, |
|
"loss": 0.0458, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7848943386381424, |
|
"eval_f1": 0.1687846203975236, |
|
"eval_loss": 1.3697636127471924, |
|
"eval_precision": 0.09259921344297461, |
|
"eval_recall": 0.9522058823529411, |
|
"eval_runtime": 14.6608, |
|
"eval_samples_per_second": 464.504, |
|
"eval_steps_per_second": 58.114, |
|
"step": 3404 |
|
}, |
|
{ |
|
"epoch": 8.225616921269095, |
|
"grad_norm": 0.8723571300506592, |
|
"learning_rate": 8.823529411764707e-06, |
|
"loss": 0.0343, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 8.99882491186839, |
|
"eval_accuracy": 0.782223625853049, |
|
"eval_f1": 0.165499476776946, |
|
"eval_loss": 1.4433350563049316, |
|
"eval_precision": 0.09069254521393913, |
|
"eval_recall": 0.9448529411764706, |
|
"eval_runtime": 15.0504, |
|
"eval_samples_per_second": 452.478, |
|
"eval_steps_per_second": 56.61, |
|
"step": 3829 |
|
}, |
|
{ |
|
"epoch": 9.400705052878966, |
|
"grad_norm": 0.6075822710990906, |
|
"learning_rate": 2.9411764705882355e-06, |
|
"loss": 0.0292, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 9.988249118683902, |
|
"eval_accuracy": 0.7820794485561674, |
|
"eval_f1": 0.16674769081186194, |
|
"eval_loss": 1.4861844778060913, |
|
"eval_precision": 0.0914341567442687, |
|
"eval_recall": 0.9457720588235294, |
|
"eval_runtime": 15.3988, |
|
"eval_samples_per_second": 442.243, |
|
"eval_steps_per_second": 55.329, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 9.988249118683902, |
|
"step": 4250, |
|
"total_flos": 1.2649810588547778e+16, |
|
"train_loss": 0.10639642311544979, |
|
"train_runtime": 1208.2019, |
|
"train_samples_per_second": 225.368, |
|
"train_steps_per_second": 3.518 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 4250, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.2649810588547778e+16, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|