|
{ |
|
"best_metric": 0.8090804377039739, |
|
"best_model_checkpoint": "./models/entities/test_v6/checkpoint-7000", |
|
"epoch": 19.997926744989634, |
|
"eval_steps": 7000, |
|
"global_step": 7220, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.3842432619212164, |
|
"grad_norm": 7.1875, |
|
"learning_rate": 0.0002, |
|
"loss": 8.369, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.7684865238424328, |
|
"grad_norm": 4.84375, |
|
"learning_rate": 0.00018511904761904765, |
|
"loss": 1.059, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.1548030407740155, |
|
"grad_norm": 5.28125, |
|
"learning_rate": 0.00017023809523809523, |
|
"loss": 0.6034, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.5390463026952315, |
|
"grad_norm": 3.90625, |
|
"learning_rate": 0.00015535714285714287, |
|
"loss": 0.3745, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 6.923289564616448, |
|
"grad_norm": 3.515625, |
|
"learning_rate": 0.00014047619047619049, |
|
"loss": 0.2708, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 8.309606081548031, |
|
"grad_norm": 3.203125, |
|
"learning_rate": 0.0001255952380952381, |
|
"loss": 0.1967, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 9.693849343469246, |
|
"grad_norm": 2.3125, |
|
"learning_rate": 0.00011071428571428572, |
|
"loss": 0.1566, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 11.080165860400829, |
|
"grad_norm": 3.03125, |
|
"learning_rate": 9.583333333333334e-05, |
|
"loss": 0.1318, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 12.464409122322046, |
|
"grad_norm": 4.84375, |
|
"learning_rate": 8.095238095238096e-05, |
|
"loss": 0.1131, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 13.848652384243263, |
|
"grad_norm": 2.484375, |
|
"learning_rate": 6.607142857142857e-05, |
|
"loss": 0.1041, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 15.234968901174845, |
|
"grad_norm": 3.078125, |
|
"learning_rate": 5.119047619047619e-05, |
|
"loss": 0.0986, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 16.619212163096062, |
|
"grad_norm": 2.53125, |
|
"learning_rate": 3.630952380952381e-05, |
|
"loss": 0.0951, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 18.005528680027645, |
|
"grad_norm": 3.53125, |
|
"learning_rate": 2.1428571428571428e-05, |
|
"loss": 0.0921, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 19.389771941948858, |
|
"grad_norm": 1.671875, |
|
"learning_rate": 6.547619047619048e-06, |
|
"loss": 0.0925, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 19.389771941948858, |
|
"eval_accuracy": 0.9699217442249749, |
|
"eval_f1": 0.8090804377039739, |
|
"eval_loss": 0.15359356999397278, |
|
"eval_precision": 0.7678083439606486, |
|
"eval_recall": 0.8550415905863258, |
|
"eval_runtime": 169.5033, |
|
"eval_samples_per_second": 29.262, |
|
"eval_steps_per_second": 29.262, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 19.997926744989634, |
|
"step": 7220, |
|
"total_flos": 6132708568035504.0, |
|
"train_loss": 0.8169754918592458, |
|
"train_runtime": 915.3849, |
|
"train_samples_per_second": 505.754, |
|
"train_steps_per_second": 7.887 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 7220, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 7000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 6132708568035504.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|