lombardata's picture
🍻 cheers
63c1043
{
"best_metric": 0.20891834795475006,
"best_model_checkpoint": "/home1/datahome/mcontini/multilabelTest/huggingface_multilabel/models/dino-large-2023_12_06-with_custom_head/checkpoint-7504",
"epoch": 14.0,
"eval_steps": 500,
"global_step": 7504,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.93,
"learning_rate": 0.01,
"loss": 0.4706,
"step": 500
},
{
"epoch": 1.0,
"eval_accuracy": 0.4269381922115041,
"eval_f1_macro": 0.6875675206337933,
"eval_f1_micro": 0.7388882010647516,
"eval_loss": 0.45332154631614685,
"eval_roc_auc": 0.8315732046841607,
"eval_runtime": 721.2005,
"eval_samples_per_second": 3.881,
"eval_steps_per_second": 0.243,
"learning_rate": 0.01,
"step": 536
},
{
"epoch": 1.87,
"learning_rate": 0.01,
"loss": 0.4045,
"step": 1000
},
{
"epoch": 2.0,
"eval_accuracy": 0.4390853876384423,
"eval_f1_macro": 0.7188299260461222,
"eval_f1_micro": 0.7669365643385565,
"eval_loss": 0.42623651027679443,
"eval_roc_auc": 0.8634183746872706,
"eval_runtime": 732.1356,
"eval_samples_per_second": 3.823,
"eval_steps_per_second": 0.239,
"learning_rate": 0.01,
"step": 1072
},
{
"epoch": 2.8,
"learning_rate": 0.01,
"loss": 0.3973,
"step": 1500
},
{
"epoch": 3.0,
"eval_accuracy": 0.45373347624151483,
"eval_f1_macro": 0.7176495048915421,
"eval_f1_micro": 0.7600811564798376,
"eval_loss": 0.47221142053604126,
"eval_roc_auc": 0.8371515138013037,
"eval_runtime": 722.5503,
"eval_samples_per_second": 3.874,
"eval_steps_per_second": 0.242,
"learning_rate": 0.01,
"step": 1608
},
{
"epoch": 3.73,
"learning_rate": 0.01,
"loss": 0.3961,
"step": 2000
},
{
"epoch": 4.0,
"eval_accuracy": 0.3762057877813505,
"eval_f1_macro": 0.6912769640128906,
"eval_f1_micro": 0.7527505610597187,
"eval_loss": 0.6075000166893005,
"eval_roc_auc": 0.8724199945622111,
"eval_runtime": 721.5253,
"eval_samples_per_second": 3.879,
"eval_steps_per_second": 0.243,
"learning_rate": 0.01,
"step": 2144
},
{
"epoch": 4.66,
"learning_rate": 0.01,
"loss": 0.3751,
"step": 2500
},
{
"epoch": 5.0,
"eval_accuracy": 0.43515541264737406,
"eval_f1_macro": 0.7510721393725562,
"eval_f1_micro": 0.7884265637916782,
"eval_loss": 0.3916389048099518,
"eval_roc_auc": 0.8925436873239415,
"eval_runtime": 719.4104,
"eval_samples_per_second": 3.891,
"eval_steps_per_second": 0.243,
"learning_rate": 0.01,
"step": 2680
},
{
"epoch": 5.6,
"learning_rate": 0.01,
"loss": 0.365,
"step": 3000
},
{
"epoch": 6.0,
"eval_accuracy": 0.4105037513397642,
"eval_f1_macro": 0.7065890629165198,
"eval_f1_micro": 0.7659574468085106,
"eval_loss": 0.5255631804466248,
"eval_roc_auc": 0.8535409911580616,
"eval_runtime": 733.8873,
"eval_samples_per_second": 3.814,
"eval_steps_per_second": 0.238,
"learning_rate": 0.01,
"step": 3216
},
{
"epoch": 6.53,
"learning_rate": 0.01,
"loss": 0.3565,
"step": 3500
},
{
"epoch": 7.0,
"eval_accuracy": 0.41014648088603073,
"eval_f1_macro": 0.6946929982635759,
"eval_f1_micro": 0.7292831407628276,
"eval_loss": 0.5707747936248779,
"eval_roc_auc": 0.8254485110934348,
"eval_runtime": 708.4289,
"eval_samples_per_second": 3.951,
"eval_steps_per_second": 0.247,
"learning_rate": 0.01,
"step": 3752
},
{
"epoch": 7.46,
"learning_rate": 0.01,
"loss": 0.3807,
"step": 4000
},
{
"epoch": 8.0,
"eval_accuracy": 0.45909253304751696,
"eval_f1_macro": 0.7145318802135835,
"eval_f1_micro": 0.7811138014527845,
"eval_loss": 0.47700363397598267,
"eval_roc_auc": 0.8609308266040633,
"eval_runtime": 693.0017,
"eval_samples_per_second": 4.039,
"eval_steps_per_second": 0.253,
"learning_rate": 0.01,
"step": 4288
},
{
"epoch": 8.4,
"learning_rate": 0.01,
"loss": 0.3462,
"step": 4500
},
{
"epoch": 9.0,
"eval_accuracy": 0.4451589853519114,
"eval_f1_macro": 0.7521525226987047,
"eval_f1_micro": 0.7880472550382212,
"eval_loss": 0.4611993134021759,
"eval_roc_auc": 0.8774820735887874,
"eval_runtime": 688.9161,
"eval_samples_per_second": 4.063,
"eval_steps_per_second": 0.254,
"learning_rate": 0.01,
"step": 4824
},
{
"epoch": 9.33,
"learning_rate": 0.01,
"loss": 0.38,
"step": 5000
},
{
"epoch": 10.0,
"eval_accuracy": 0.46123615576991783,
"eval_f1_macro": 0.7516573553144021,
"eval_f1_micro": 0.7942691374104554,
"eval_loss": 0.4558601677417755,
"eval_roc_auc": 0.8746914793507129,
"eval_runtime": 694.0203,
"eval_samples_per_second": 4.033,
"eval_steps_per_second": 0.252,
"learning_rate": 0.01,
"step": 5360
},
{
"epoch": 10.26,
"learning_rate": 0.01,
"loss": 0.3472,
"step": 5500
},
{
"epoch": 11.0,
"eval_accuracy": 0.4040728831725616,
"eval_f1_macro": 0.7314807716558429,
"eval_f1_micro": 0.7708779443254817,
"eval_loss": 0.5080511569976807,
"eval_roc_auc": 0.8980409880768458,
"eval_runtime": 691.0636,
"eval_samples_per_second": 4.05,
"eval_steps_per_second": 0.253,
"learning_rate": 0.01,
"step": 5896
},
{
"epoch": 11.19,
"learning_rate": 0.001,
"loss": 0.3167,
"step": 6000
},
{
"epoch": 12.0,
"eval_accuracy": 0.5141121829224723,
"eval_f1_macro": 0.7990159125560728,
"eval_f1_micro": 0.826813005251667,
"eval_loss": 0.2364267259836197,
"eval_roc_auc": 0.8944540329754681,
"eval_runtime": 711.5081,
"eval_samples_per_second": 3.934,
"eval_steps_per_second": 0.246,
"learning_rate": 0.001,
"step": 6432
},
{
"epoch": 12.13,
"learning_rate": 0.001,
"loss": 0.1322,
"step": 6500
},
{
"epoch": 13.0,
"eval_accuracy": 0.49231868524473027,
"eval_f1_macro": 0.793143079384716,
"eval_f1_micro": 0.8209398474349269,
"eval_loss": 0.22222588956356049,
"eval_roc_auc": 0.8951348180852069,
"eval_runtime": 708.4317,
"eval_samples_per_second": 3.951,
"eval_steps_per_second": 0.247,
"learning_rate": 0.001,
"step": 6968
},
{
"epoch": 13.06,
"learning_rate": 0.001,
"loss": 0.1074,
"step": 7000
},
{
"epoch": 13.99,
"learning_rate": 0.001,
"loss": 0.0958,
"step": 7500
},
{
"epoch": 14.0,
"eval_accuracy": 0.5051804215791355,
"eval_f1_macro": 0.797475400821107,
"eval_f1_micro": 0.8286666276962646,
"eval_loss": 0.20891834795475006,
"eval_roc_auc": 0.8984516760867104,
"eval_runtime": 704.2208,
"eval_samples_per_second": 3.975,
"eval_steps_per_second": 0.249,
"learning_rate": 0.001,
"step": 7504
},
{
"epoch": 14.0,
"learning_rate": 0.001,
"step": 7504,
"total_flos": 3.2978988517930205e+19,
"train_loss": 0.32461663065498064,
"train_runtime": 40410.0145,
"train_samples_per_second": 2.968,
"train_steps_per_second": 0.186
}
],
"logging_steps": 500,
"max_steps": 7504,
"num_train_epochs": 14,
"save_steps": 500,
"total_flos": 3.2978988517930205e+19,
"trial_name": null,
"trial_params": null
}