|
{ |
|
"best_metric": 0.983181371856266, |
|
"best_model_checkpoint": "/home3/s5431786/nlp-final-project/results/roberta-large-e-snli-classification-nli_explanation-base-b16/checkpoint-2800", |
|
"epoch": 0.081547064305685, |
|
"global_step": 2800, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 7.765482430596001e-07, |
|
"loss": 1.0726, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_accuracy": 0.7982117455801666, |
|
"eval_f1": 0.7974911320075254, |
|
"eval_loss": 0.8326770067214966, |
|
"eval_runtime": 12.8669, |
|
"eval_samples_per_second": 764.91, |
|
"eval_steps_per_second": 47.875, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.5530964861192002e-06, |
|
"loss": 0.3233, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_accuracy": 0.9642349116033326, |
|
"eval_f1": 0.9642718853707861, |
|
"eval_loss": 0.14329373836517334, |
|
"eval_runtime": 12.933, |
|
"eval_samples_per_second": 760.998, |
|
"eval_steps_per_second": 47.63, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.3296447291788007e-06, |
|
"loss": 0.173, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_accuracy": 0.9762243446453973, |
|
"eval_f1": 0.9761597449883214, |
|
"eval_loss": 0.1054357960820198, |
|
"eval_runtime": 12.9969, |
|
"eval_samples_per_second": 757.258, |
|
"eval_steps_per_second": 47.396, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 3.1061929722384003e-06, |
|
"loss": 0.1452, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_accuracy": 0.9799837431416378, |
|
"eval_f1": 0.9799316333368723, |
|
"eval_loss": 0.09648650884628296, |
|
"eval_runtime": 13.0065, |
|
"eval_samples_per_second": 756.7, |
|
"eval_steps_per_second": 47.361, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.882741215298001e-06, |
|
"loss": 0.1449, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_accuracy": 0.9813046128835603, |
|
"eval_f1": 0.9812341372941731, |
|
"eval_loss": 0.09331633150577545, |
|
"eval_runtime": 12.921, |
|
"eval_samples_per_second": 761.706, |
|
"eval_steps_per_second": 47.674, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.659289458357601e-06, |
|
"loss": 0.1303, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_accuracy": 0.9815078236130867, |
|
"eval_f1": 0.9814520761782233, |
|
"eval_loss": 0.10890379548072815, |
|
"eval_runtime": 12.8683, |
|
"eval_samples_per_second": 764.824, |
|
"eval_steps_per_second": 47.87, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 5.435837701417202e-06, |
|
"loss": 0.1372, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_accuracy": 0.9832351148140622, |
|
"eval_f1": 0.983181371856266, |
|
"eval_loss": 0.07784133404493332, |
|
"eval_runtime": 12.8322, |
|
"eval_samples_per_second": 766.977, |
|
"eval_steps_per_second": 48.004, |
|
"step": 2800 |
|
} |
|
], |
|
"max_steps": 103008, |
|
"num_train_epochs": 3, |
|
"total_flos": 5492559907900896.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|