|
{ |
|
"best_metric": 0.959878879636639, |
|
"best_model_checkpoint": "trained_models/CoNLL/checkpoint-10000", |
|
"epoch": 0.7121492664862555, |
|
"global_step": 10000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.821962683378436e-05, |
|
"loss": 0.166, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_accuracy_score": 0.9851641291226977, |
|
"eval_f1": 0.9190357439733999, |
|
"eval_loss": 0.08048456907272339, |
|
"eval_precision": 0.9080157687253614, |
|
"eval_recall": 0.9303264893975093, |
|
"eval_runtime": 27.4476, |
|
"eval_samples_per_second": 118.517, |
|
"eval_steps_per_second": 14.828, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.643925366756873e-05, |
|
"loss": 0.0586, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_accuracy_score": 0.9874810170943499, |
|
"eval_f1": 0.9253980161707094, |
|
"eval_loss": 0.061091016978025436, |
|
"eval_precision": 0.9167630057803469, |
|
"eval_recall": 0.9341972399865365, |
|
"eval_runtime": 26.4887, |
|
"eval_samples_per_second": 122.807, |
|
"eval_steps_per_second": 15.365, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.465888050135308e-05, |
|
"loss": 0.0549, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_accuracy_score": 0.9892138156613839, |
|
"eval_f1": 0.9333000665335994, |
|
"eval_loss": 0.051654551178216934, |
|
"eval_precision": 0.9225583689575797, |
|
"eval_recall": 0.9442948502187816, |
|
"eval_runtime": 27.064, |
|
"eval_samples_per_second": 120.197, |
|
"eval_steps_per_second": 15.038, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.287850733513745e-05, |
|
"loss": 0.0408, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_accuracy_score": 0.9887660137845099, |
|
"eval_f1": 0.9369867605161722, |
|
"eval_loss": 0.05154793709516525, |
|
"eval_precision": 0.9330774365821095, |
|
"eval_recall": 0.9409289801413665, |
|
"eval_runtime": 34.9374, |
|
"eval_samples_per_second": 93.109, |
|
"eval_steps_per_second": 11.649, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.109813416892181e-05, |
|
"loss": 0.0313, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_accuracy_score": 0.9901094194151319, |
|
"eval_f1": 0.9437919463087249, |
|
"eval_loss": 0.049433596432209015, |
|
"eval_precision": 0.9409501505520241, |
|
"eval_recall": 0.946650959272972, |
|
"eval_runtime": 34.757, |
|
"eval_samples_per_second": 93.593, |
|
"eval_steps_per_second": 11.71, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.9317761002706174e-05, |
|
"loss": 0.0296, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_accuracy_score": 0.9906740391729294, |
|
"eval_f1": 0.9458756398422422, |
|
"eval_loss": 0.053381938487291336, |
|
"eval_precision": 0.9432635983263599, |
|
"eval_recall": 0.9485021878155503, |
|
"eval_runtime": 36.167, |
|
"eval_samples_per_second": 89.944, |
|
"eval_steps_per_second": 11.253, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.7537387836490526e-05, |
|
"loss": 0.0293, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_accuracy_score": 0.9910634321093416, |
|
"eval_f1": 0.9481096487551345, |
|
"eval_loss": 0.042416807264089584, |
|
"eval_precision": 0.9445465174544847, |
|
"eval_recall": 0.9516997643890945, |
|
"eval_runtime": 34.7402, |
|
"eval_samples_per_second": 93.638, |
|
"eval_steps_per_second": 11.716, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.575701467027489e-05, |
|
"loss": 0.0226, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_accuracy_score": 0.9903041158833379, |
|
"eval_f1": 0.9465661641541039, |
|
"eval_loss": 0.04618750512599945, |
|
"eval_precision": 0.9421473824608203, |
|
"eval_recall": 0.9510265903736116, |
|
"eval_runtime": 28.4523, |
|
"eval_samples_per_second": 114.332, |
|
"eval_steps_per_second": 14.305, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.397664150405925e-05, |
|
"loss": 0.0207, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_accuracy_score": 0.9913944161052919, |
|
"eval_f1": 0.9520862948407058, |
|
"eval_loss": 0.04386541247367859, |
|
"eval_precision": 0.9461525677247797, |
|
"eval_recall": 0.9580949175361831, |
|
"eval_runtime": 25.9577, |
|
"eval_samples_per_second": 125.319, |
|
"eval_steps_per_second": 15.679, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.219626833784362e-05, |
|
"loss": 0.0166, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_accuracy_score": 0.9914528250457537, |
|
"eval_f1": 0.9523649781952366, |
|
"eval_loss": 0.044772420078516006, |
|
"eval_precision": 0.949180875961217, |
|
"eval_recall": 0.9555705149781218, |
|
"eval_runtime": 28.1693, |
|
"eval_samples_per_second": 115.48, |
|
"eval_steps_per_second": 14.448, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.041589517162797e-05, |
|
"loss": 0.0159, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_accuracy_score": 0.9914333553989331, |
|
"eval_f1": 0.9543924604510265, |
|
"eval_loss": 0.046620924025774, |
|
"eval_precision": 0.9543924604510265, |
|
"eval_recall": 0.9543924604510265, |
|
"eval_runtime": 27.5426, |
|
"eval_samples_per_second": 118.108, |
|
"eval_steps_per_second": 14.777, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.8635522005412335e-05, |
|
"loss": 0.0124, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_accuracy_score": 0.9917254001012422, |
|
"eval_f1": 0.9533227185116904, |
|
"eval_loss": 0.04490247741341591, |
|
"eval_precision": 0.9494241362043064, |
|
"eval_recall": 0.9572534500168294, |
|
"eval_runtime": 22.1337, |
|
"eval_samples_per_second": 146.971, |
|
"eval_steps_per_second": 18.388, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.6855148839196698e-05, |
|
"loss": 0.0104, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_accuracy_score": 0.9911607803434446, |
|
"eval_f1": 0.9488426702448842, |
|
"eval_loss": 0.04854311794042587, |
|
"eval_precision": 0.9456703443664326, |
|
"eval_recall": 0.9520363513968361, |
|
"eval_runtime": 22.2143, |
|
"eval_samples_per_second": 146.437, |
|
"eval_steps_per_second": 18.322, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.507477567298106e-05, |
|
"loss": 0.0116, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_accuracy_score": 0.9924652466804252, |
|
"eval_f1": 0.958029655692385, |
|
"eval_loss": 0.04380907490849495, |
|
"eval_precision": 0.9537948290241868, |
|
"eval_recall": 0.9623022551329519, |
|
"eval_runtime": 22.1806, |
|
"eval_samples_per_second": 146.659, |
|
"eval_steps_per_second": 18.349, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.329440250676542e-05, |
|
"loss": 0.008, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_accuracy_score": 0.9924652466804252, |
|
"eval_f1": 0.9584347972121924, |
|
"eval_loss": 0.046966083347797394, |
|
"eval_precision": 0.9564270152505446, |
|
"eval_recall": 0.9604510265903736, |
|
"eval_runtime": 22.1907, |
|
"eval_samples_per_second": 146.593, |
|
"eval_steps_per_second": 18.341, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.151402934054978e-05, |
|
"loss": 0.0106, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_accuracy_score": 0.9920758537440131, |
|
"eval_f1": 0.9552964857837792, |
|
"eval_loss": 0.04950818791985512, |
|
"eval_precision": 0.9521819093797024, |
|
"eval_recall": 0.9584315045439246, |
|
"eval_runtime": 22.1428, |
|
"eval_samples_per_second": 146.91, |
|
"eval_steps_per_second": 18.381, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9733656174334143e-05, |
|
"loss": 0.0089, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_accuracy_score": 0.9921732019781161, |
|
"eval_f1": 0.9558589496607757, |
|
"eval_loss": 0.04179555922746658, |
|
"eval_precision": 0.9514757378689345, |
|
"eval_recall": 0.9602827330865029, |
|
"eval_runtime": 22.1266, |
|
"eval_samples_per_second": 147.018, |
|
"eval_steps_per_second": 18.394, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7953283008118502e-05, |
|
"loss": 0.0067, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_accuracy_score": 0.9919979751567306, |
|
"eval_f1": 0.9558872861455887, |
|
"eval_loss": 0.049222081899642944, |
|
"eval_precision": 0.9526914075560013, |
|
"eval_recall": 0.9591046785594076, |
|
"eval_runtime": 22.1688, |
|
"eval_samples_per_second": 146.738, |
|
"eval_steps_per_second": 18.359, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.6172909841902865e-05, |
|
"loss": 0.0054, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_accuracy_score": 0.9919979751567306, |
|
"eval_f1": 0.9572004028197382, |
|
"eval_loss": 0.0546395517885685, |
|
"eval_precision": 0.9546367592902578, |
|
"eval_recall": 0.9597778525748906, |
|
"eval_runtime": 23.9981, |
|
"eval_samples_per_second": 135.552, |
|
"eval_steps_per_second": 16.96, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.4392536675687223e-05, |
|
"loss": 0.0055, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_accuracy_score": 0.9924068377399634, |
|
"eval_f1": 0.959878879636639, |
|
"eval_loss": 0.05072605982422829, |
|
"eval_precision": 0.9594753657306205, |
|
"eval_recall": 0.9602827330865029, |
|
"eval_runtime": 22.125, |
|
"eval_samples_per_second": 147.028, |
|
"eval_steps_per_second": 18.396, |
|
"step": 10000 |
|
} |
|
], |
|
"max_steps": 14042, |
|
"num_train_epochs": 1, |
|
"total_flos": 5226265866240000.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|