|
{ |
|
"best_metric": 0.8648728562980484, |
|
"best_model_checkpoint": "./models/entities/test_v4/checkpoint-3500", |
|
"epoch": 9.979267449896337, |
|
"eval_steps": 500, |
|
"global_step": 3610, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.38217000691085, |
|
"grad_norm": 0.9774827361106873, |
|
"learning_rate": 0.0002, |
|
"loss": 0.5764, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.38217000691085, |
|
"eval_accuracy": 0.9593812772012053, |
|
"eval_f1": 0.7594292979234355, |
|
"eval_loss": 0.1411387324333191, |
|
"eval_precision": 0.708673872923807, |
|
"eval_recall": 0.8180158247108947, |
|
"eval_runtime": 199.3049, |
|
"eval_samples_per_second": 24.886, |
|
"eval_steps_per_second": 24.886, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.7643400138217, |
|
"grad_norm": 0.7694200873374939, |
|
"learning_rate": 0.00016784565916398716, |
|
"loss": 0.09, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.7643400138217, |
|
"eval_accuracy": 0.9687447689989956, |
|
"eval_f1": 0.8259538274605103, |
|
"eval_loss": 0.11496158689260483, |
|
"eval_precision": 0.792852477372399, |
|
"eval_recall": 0.8619395414891459, |
|
"eval_runtime": 48.3856, |
|
"eval_samples_per_second": 102.51, |
|
"eval_steps_per_second": 102.51, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.14651002073255, |
|
"grad_norm": 0.625320315361023, |
|
"learning_rate": 0.0001356913183279743, |
|
"loss": 0.0463, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.14651002073255, |
|
"eval_accuracy": 0.9703716103113492, |
|
"eval_f1": 0.8380395025603511, |
|
"eval_loss": 0.13039253652095795, |
|
"eval_precision": 0.8069878839109609, |
|
"eval_recall": 0.8715763846622033, |
|
"eval_runtime": 46.9044, |
|
"eval_samples_per_second": 105.747, |
|
"eval_steps_per_second": 105.747, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.5286800276434, |
|
"grad_norm": 0.4629702866077423, |
|
"learning_rate": 0.00010353697749196143, |
|
"loss": 0.023, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 5.5286800276434, |
|
"eval_accuracy": 0.9716950535654503, |
|
"eval_f1": 0.8417248620403379, |
|
"eval_loss": 0.1349513679742813, |
|
"eval_precision": 0.8115641774178359, |
|
"eval_recall": 0.8742138364779874, |
|
"eval_runtime": 47.4889, |
|
"eval_samples_per_second": 104.446, |
|
"eval_steps_per_second": 104.446, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 6.91085003455425, |
|
"grad_norm": 0.1776706427335739, |
|
"learning_rate": 7.138263665594856e-05, |
|
"loss": 0.0147, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 6.91085003455425, |
|
"eval_accuracy": 0.9736305239370606, |
|
"eval_f1": 0.8591340450771056, |
|
"eval_loss": 0.14083334803581238, |
|
"eval_precision": 0.8377674956622325, |
|
"eval_recall": 0.8816189896530736, |
|
"eval_runtime": 47.4474, |
|
"eval_samples_per_second": 104.537, |
|
"eval_steps_per_second": 104.537, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 8.2930200414651, |
|
"grad_norm": 0.7263190746307373, |
|
"learning_rate": 3.92282958199357e-05, |
|
"loss": 0.0078, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 8.2930200414651, |
|
"eval_accuracy": 0.9740594660194175, |
|
"eval_f1": 0.8621834580504485, |
|
"eval_loss": 0.14879639446735382, |
|
"eval_precision": 0.8428446855924814, |
|
"eval_recall": 0.8824305132886995, |
|
"eval_runtime": 49.4794, |
|
"eval_samples_per_second": 100.244, |
|
"eval_steps_per_second": 100.244, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 9.675190048375951, |
|
"grad_norm": 0.21800673007965088, |
|
"learning_rate": 7.07395498392283e-06, |
|
"loss": 0.0045, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 9.675190048375951, |
|
"eval_accuracy": 0.9743680950786743, |
|
"eval_f1": 0.8648728562980484, |
|
"eval_loss": 0.16495844721794128, |
|
"eval_precision": 0.8410005750431282, |
|
"eval_recall": 0.8901399878271454, |
|
"eval_runtime": 46.0436, |
|
"eval_samples_per_second": 107.724, |
|
"eval_steps_per_second": 107.724, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 9.979267449896337, |
|
"step": 3610, |
|
"total_flos": 6112468258365600.0, |
|
"train_loss": 0.10573644003214268, |
|
"train_runtime": 1226.1602, |
|
"train_samples_per_second": 188.784, |
|
"train_steps_per_second": 2.944 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 3610, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 6112468258365600.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|