|
{ |
|
"best_metric": 0.05716780200600624, |
|
"best_model_checkpoint": "/data/jcanete/all_results/pos/albeto_xxlarge/epochs_4_bs_16_lr_5e-6/checkpoint-1800", |
|
"epoch": 4.0, |
|
"global_step": 3580, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.22, |
|
"eval_accuracy": 0.9740770234323376, |
|
"eval_f1": 0.9703136222169021, |
|
"eval_loss": 0.09671590477228165, |
|
"eval_precision": 0.9689025240552224, |
|
"eval_recall": 0.9717288365866816, |
|
"eval_runtime": 12.1146, |
|
"eval_samples_per_second": 136.529, |
|
"eval_steps_per_second": 8.585, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_accuracy": 0.978109486453974, |
|
"eval_f1": 0.9763370891925068, |
|
"eval_loss": 0.08312373608350754, |
|
"eval_precision": 0.9753928372018824, |
|
"eval_recall": 0.9772831711654113, |
|
"eval_runtime": 12.0635, |
|
"eval_samples_per_second": 137.107, |
|
"eval_steps_per_second": 8.621, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.305865921787709e-06, |
|
"loss": 0.2346, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_accuracy": 0.9802951491841887, |
|
"eval_f1": 0.9781281181400918, |
|
"eval_loss": 0.06705235689878464, |
|
"eval_precision": 0.9769778149853495, |
|
"eval_recall": 0.979281133244091, |
|
"eval_runtime": 12.0607, |
|
"eval_samples_per_second": 137.14, |
|
"eval_steps_per_second": 8.623, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_accuracy": 0.9816167126954812, |
|
"eval_f1": 0.980212056468521, |
|
"eval_loss": 0.06424280256032944, |
|
"eval_precision": 0.9796252319849933, |
|
"eval_recall": 0.9807995844238876, |
|
"eval_runtime": 12.0651, |
|
"eval_samples_per_second": 137.09, |
|
"eval_steps_per_second": 8.62, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.607541899441341e-06, |
|
"loss": 0.0581, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"eval_accuracy": 0.9821419494756104, |
|
"eval_f1": 0.9813241767565355, |
|
"eval_loss": 0.06397537887096405, |
|
"eval_precision": 0.9805313971116253, |
|
"eval_recall": 0.9821182393958162, |
|
"eval_runtime": 12.0587, |
|
"eval_samples_per_second": 137.163, |
|
"eval_steps_per_second": 8.624, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"eval_accuracy": 0.982734958743498, |
|
"eval_f1": 0.9817945902784708, |
|
"eval_loss": 0.06401154398918152, |
|
"eval_precision": 0.9809330062426455, |
|
"eval_recall": 0.9826576891570598, |
|
"eval_runtime": 12.05, |
|
"eval_samples_per_second": 137.262, |
|
"eval_steps_per_second": 8.631, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"eval_accuracy": 0.9834635129869029, |
|
"eval_f1": 0.982266245955985, |
|
"eval_loss": 0.05947747826576233, |
|
"eval_precision": 0.9818152783599816, |
|
"eval_recall": 0.9827176280194202, |
|
"eval_runtime": 12.0881, |
|
"eval_samples_per_second": 136.829, |
|
"eval_steps_per_second": 8.604, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.9092178770949727e-06, |
|
"loss": 0.04, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"eval_accuracy": 0.9830229918164721, |
|
"eval_f1": 0.982217698479224, |
|
"eval_loss": 0.05940423533320427, |
|
"eval_precision": 0.9811407268595124, |
|
"eval_recall": 0.9832970370222374, |
|
"eval_runtime": 12.0517, |
|
"eval_samples_per_second": 137.242, |
|
"eval_steps_per_second": 8.629, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"eval_accuracy": 0.9839718066450924, |
|
"eval_f1": 0.9833950734391069, |
|
"eval_loss": 0.05716780200600624, |
|
"eval_precision": 0.9829141716566866, |
|
"eval_recall": 0.9838764460250544, |
|
"eval_runtime": 12.0638, |
|
"eval_samples_per_second": 137.105, |
|
"eval_steps_per_second": 8.621, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 2.2108938547486037e-06, |
|
"loss": 0.0327, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"eval_accuracy": 0.983700716694058, |
|
"eval_f1": 0.9826579207475964, |
|
"eval_loss": 0.06271136552095413, |
|
"eval_precision": 0.9820794252644183, |
|
"eval_recall": 0.9832370981598769, |
|
"eval_runtime": 12.0524, |
|
"eval_samples_per_second": 137.234, |
|
"eval_steps_per_second": 8.629, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"eval_accuracy": 0.9840734653767303, |
|
"eval_f1": 0.9836376523674989, |
|
"eval_loss": 0.06178496032953262, |
|
"eval_precision": 0.982980166806337, |
|
"eval_recall": 0.9842960180615772, |
|
"eval_runtime": 12.0504, |
|
"eval_samples_per_second": 137.257, |
|
"eval_steps_per_second": 8.63, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"eval_accuracy": 0.9839887497670321, |
|
"eval_f1": 0.983732286767129, |
|
"eval_loss": 0.0652877539396286, |
|
"eval_precision": 0.9833885716567504, |
|
"eval_recall": 0.9840762422329224, |
|
"eval_runtime": 12.0582, |
|
"eval_samples_per_second": 137.168, |
|
"eval_steps_per_second": 8.625, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 1.5125698324022347e-06, |
|
"loss": 0.022, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"eval_accuracy": 0.9843445553277647, |
|
"eval_f1": 0.9838023527532006, |
|
"eval_loss": 0.061138641089200974, |
|
"eval_precision": 0.9834487990895842, |
|
"eval_recall": 0.9841561607160696, |
|
"eval_runtime": 12.0686, |
|
"eval_samples_per_second": 137.05, |
|
"eval_steps_per_second": 8.617, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"eval_accuracy": 0.984378441571644, |
|
"eval_f1": 0.9841013042523019, |
|
"eval_loss": 0.06497478485107422, |
|
"eval_precision": 0.9837869137232195, |
|
"eval_recall": 0.984415895786298, |
|
"eval_runtime": 12.0336, |
|
"eval_samples_per_second": 137.449, |
|
"eval_steps_per_second": 8.642, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 8.142458100558661e-07, |
|
"loss": 0.0161, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"eval_accuracy": 0.9844631571813423, |
|
"eval_f1": 0.9838933546357782, |
|
"eval_loss": 0.06659159064292908, |
|
"eval_precision": 0.9834710743801653, |
|
"eval_recall": 0.984315997682364, |
|
"eval_runtime": 12.058, |
|
"eval_samples_per_second": 137.17, |
|
"eval_steps_per_second": 8.625, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"eval_accuracy": 0.9840056928889718, |
|
"eval_f1": 0.983372447196285, |
|
"eval_loss": 0.06790520250797272, |
|
"eval_precision": 0.9830484785560258, |
|
"eval_recall": 0.9836966294379733, |
|
"eval_runtime": 12.0261, |
|
"eval_samples_per_second": 137.534, |
|
"eval_steps_per_second": 8.648, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"eval_accuracy": 0.9842767828400061, |
|
"eval_f1": 0.9837931757586651, |
|
"eval_loss": 0.06859102845191956, |
|
"eval_precision": 0.9833905613670846, |
|
"eval_recall": 0.9841961199576432, |
|
"eval_runtime": 12.0273, |
|
"eval_samples_per_second": 137.521, |
|
"eval_steps_per_second": 8.647, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 1.1592178770949721e-07, |
|
"loss": 0.012, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 3580, |
|
"total_flos": 5364557434522752.0, |
|
"train_loss": 0.05827242064409416, |
|
"train_runtime": 5244.9021, |
|
"train_samples_per_second": 10.91, |
|
"train_steps_per_second": 0.683 |
|
} |
|
], |
|
"max_steps": 3580, |
|
"num_train_epochs": 4, |
|
"total_flos": 5364557434522752.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|