|
{ |
|
"best_metric": 0.8242820390801596, |
|
"best_model_checkpoint": "turkish_multilabel_intent_bert-base-turkish-128k-uncased/checkpoint-384", |
|
"epoch": 2.887218045112782, |
|
"global_step": 384, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.7987362585716124e-05, |
|
"loss": 0.0139, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_loss": 0.009882427752017975, |
|
"eval_macro f1": 0.3392960248919277, |
|
"eval_micro f1": 0.7193911317008603, |
|
"eval_runtime": 2.3212, |
|
"eval_samples_per_second": 461.408, |
|
"eval_steps_per_second": 7.324, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.491617138023029e-05, |
|
"loss": 0.0082, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_loss": 0.00571652315557003, |
|
"eval_macro f1": 0.5888766985486856, |
|
"eval_micro f1": 0.8291472868217056, |
|
"eval_runtime": 2.3143, |
|
"eval_samples_per_second": 462.783, |
|
"eval_steps_per_second": 7.346, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.184498017474446e-05, |
|
"loss": 0.0062, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_loss": 0.004936754237860441, |
|
"eval_macro f1": 0.6479513036422692, |
|
"eval_micro f1": 0.8529040404040404, |
|
"eval_runtime": 2.335, |
|
"eval_samples_per_second": 458.676, |
|
"eval_steps_per_second": 7.281, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.877378896925863e-05, |
|
"loss": 0.0046, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_loss": 0.0039069755002856255, |
|
"eval_macro f1": 0.7036941962393284, |
|
"eval_micro f1": 0.8735053492762743, |
|
"eval_runtime": 2.3196, |
|
"eval_samples_per_second": 461.727, |
|
"eval_steps_per_second": 7.329, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.57025977637728e-05, |
|
"loss": 0.0035, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_loss": 0.003501879284158349, |
|
"eval_macro f1": 0.7315796800388726, |
|
"eval_micro f1": 0.8865718799368089, |
|
"eval_runtime": 2.316, |
|
"eval_samples_per_second": 462.435, |
|
"eval_steps_per_second": 7.34, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 3.263140655828696e-05, |
|
"loss": 0.0037, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"eval_loss": 0.0034342026337981224, |
|
"eval_macro f1": 0.7462213705474623, |
|
"eval_micro f1": 0.8902361199744736, |
|
"eval_runtime": 2.392, |
|
"eval_samples_per_second": 447.74, |
|
"eval_steps_per_second": 7.107, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.956021535280113e-05, |
|
"loss": 0.0032, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"eval_loss": 0.0032742032781243324, |
|
"eval_macro f1": 0.7279673266730304, |
|
"eval_micro f1": 0.8998085513720486, |
|
"eval_runtime": 2.3105, |
|
"eval_samples_per_second": 463.546, |
|
"eval_steps_per_second": 7.358, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 2.64890241473153e-05, |
|
"loss": 0.0033, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"eval_loss": 0.003130522323772311, |
|
"eval_macro f1": 0.7717739019762423, |
|
"eval_micro f1": 0.912258064516129, |
|
"eval_runtime": 2.3, |
|
"eval_samples_per_second": 465.662, |
|
"eval_steps_per_second": 7.391, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 2.3417832941829465e-05, |
|
"loss": 0.0025, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"eval_loss": 0.0031702849082648754, |
|
"eval_macro f1": 0.7845634595933292, |
|
"eval_micro f1": 0.9168036829990136, |
|
"eval_runtime": 2.3461, |
|
"eval_samples_per_second": 456.499, |
|
"eval_steps_per_second": 7.246, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 2.0346641736343635e-05, |
|
"loss": 0.0022, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"eval_loss": 0.0030924060847610235, |
|
"eval_macro f1": 0.7838887334413882, |
|
"eval_micro f1": 0.9131561892417368, |
|
"eval_runtime": 2.2918, |
|
"eval_samples_per_second": 467.308, |
|
"eval_steps_per_second": 7.418, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 1.7275450530857802e-05, |
|
"loss": 0.0022, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"eval_loss": 0.0030099288560450077, |
|
"eval_macro f1": 0.7712848231573721, |
|
"eval_micro f1": 0.908565928777671, |
|
"eval_runtime": 2.3124, |
|
"eval_samples_per_second": 463.146, |
|
"eval_steps_per_second": 7.352, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.420425932537197e-05, |
|
"loss": 0.0022, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"eval_loss": 0.0029520909301936626, |
|
"eval_macro f1": 0.8242820390801596, |
|
"eval_micro f1": 0.913988964621876, |
|
"eval_runtime": 2.3289, |
|
"eval_samples_per_second": 459.872, |
|
"eval_steps_per_second": 7.3, |
|
"step": 384 |
|
} |
|
], |
|
"max_steps": 532, |
|
"num_train_epochs": 4, |
|
"total_flos": 630598146840000.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|