|
{ |
|
"best_metric": 0.9985872380503885, |
|
"best_model_checkpoint": "vit_base_aihub_model_py/checkpoint-745", |
|
"epoch": 4.983277591973244, |
|
"global_step": 745, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 1.3773, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 1.2997, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2e-05, |
|
"loss": 1.134, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.6666666666666667e-05, |
|
"loss": 0.9478, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.7246, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4e-05, |
|
"loss": 0.5165, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.666666666666667e-05, |
|
"loss": 0.3778, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.9626865671641794e-05, |
|
"loss": 0.2721, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.888059701492538e-05, |
|
"loss": 0.217, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.813432835820896e-05, |
|
"loss": 0.186, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.738805970149254e-05, |
|
"loss": 0.1654, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.664179104477612e-05, |
|
"loss": 0.1533, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.58955223880597e-05, |
|
"loss": 0.1389, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.5149253731343286e-05, |
|
"loss": 0.1235, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9857546503414175, |
|
"eval_f1": 0.9829586496374041, |
|
"eval_loss": 0.09358736127614975, |
|
"eval_precision": 0.9845287058827537, |
|
"eval_recall": 0.9814155430620309, |
|
"eval_runtime": 111.262, |
|
"eval_samples_per_second": 76.342, |
|
"eval_steps_per_second": 0.602, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.440298507462687e-05, |
|
"loss": 0.1106, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 4.3656716417910446e-05, |
|
"loss": 0.1162, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 4.2910447761194036e-05, |
|
"loss": 0.1059, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 4.216417910447761e-05, |
|
"loss": 0.1115, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 4.1417910447761195e-05, |
|
"loss": 0.0924, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 4.067164179104478e-05, |
|
"loss": 0.0972, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 3.992537313432836e-05, |
|
"loss": 0.0909, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 3.9179104477611945e-05, |
|
"loss": 0.0977, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 3.843283582089552e-05, |
|
"loss": 0.0931, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 3.7686567164179104e-05, |
|
"loss": 0.0756, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 3.694029850746269e-05, |
|
"loss": 0.083, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 3.619402985074627e-05, |
|
"loss": 0.0751, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 3.5447761194029854e-05, |
|
"loss": 0.0666, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 3.470149253731344e-05, |
|
"loss": 0.0673, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 3.395522388059701e-05, |
|
"loss": 0.067, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.987756063103367, |
|
"eval_f1": 0.9859294798786484, |
|
"eval_loss": 0.06216855347156525, |
|
"eval_precision": 0.990943040361106, |
|
"eval_recall": 0.9812800326186175, |
|
"eval_runtime": 108.9574, |
|
"eval_samples_per_second": 77.957, |
|
"eval_steps_per_second": 0.615, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 3.32089552238806e-05, |
|
"loss": 0.0754, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 3.246268656716418e-05, |
|
"loss": 0.0657, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 3.171641791044776e-05, |
|
"loss": 0.0683, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 3.0970149253731346e-05, |
|
"loss": 0.0569, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 3.0223880597014926e-05, |
|
"loss": 0.0515, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 2.9477611940298512e-05, |
|
"loss": 0.0557, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 2.8731343283582092e-05, |
|
"loss": 0.0558, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 2.7985074626865672e-05, |
|
"loss": 0.0592, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 2.7238805970149255e-05, |
|
"loss": 0.0502, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 2.6492537313432835e-05, |
|
"loss": 0.0567, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 2.574626865671642e-05, |
|
"loss": 0.0539, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.0512, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 2.4253731343283584e-05, |
|
"loss": 0.0544, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.3507462686567168e-05, |
|
"loss": 0.0561, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 2.2761194029850747e-05, |
|
"loss": 0.049, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9968212856133741, |
|
"eval_f1": 0.9964206807723841, |
|
"eval_loss": 0.03217490762472153, |
|
"eval_precision": 0.9969469418365854, |
|
"eval_recall": 0.99589824183686, |
|
"eval_runtime": 106.3062, |
|
"eval_samples_per_second": 79.901, |
|
"eval_steps_per_second": 0.63, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 2.201492537313433e-05, |
|
"loss": 0.0493, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 2.126865671641791e-05, |
|
"loss": 0.0544, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 2.0522388059701493e-05, |
|
"loss": 0.0495, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 1.9776119402985073e-05, |
|
"loss": 0.044, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 1.9029850746268656e-05, |
|
"loss": 0.0452, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 1.828358208955224e-05, |
|
"loss": 0.0437, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 1.7537313432835823e-05, |
|
"loss": 0.0374, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 1.6791044776119406e-05, |
|
"loss": 0.0389, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 1.6044776119402986e-05, |
|
"loss": 0.0321, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 1.529850746268657e-05, |
|
"loss": 0.0347, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 1.455223880597015e-05, |
|
"loss": 0.0359, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 1.3805970149253733e-05, |
|
"loss": 0.0369, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 1.3059701492537313e-05, |
|
"loss": 0.0367, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 1.2313432835820896e-05, |
|
"loss": 0.0353, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 1.1567164179104478e-05, |
|
"loss": 0.0477, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9977631269131152, |
|
"eval_f1": 0.997511338692231, |
|
"eval_loss": 0.024851497262716293, |
|
"eval_precision": 0.9985286326587551, |
|
"eval_recall": 0.9965020764725261, |
|
"eval_runtime": 103.7864, |
|
"eval_samples_per_second": 81.841, |
|
"eval_steps_per_second": 0.646, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 1.082089552238806e-05, |
|
"loss": 0.0382, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 1.0074626865671643e-05, |
|
"loss": 0.0347, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 9.328358208955226e-06, |
|
"loss": 0.0304, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 8.582089552238805e-06, |
|
"loss": 0.0373, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 7.835820895522389e-06, |
|
"loss": 0.0311, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 7.08955223880597e-06, |
|
"loss": 0.0332, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 6.343283582089552e-06, |
|
"loss": 0.0334, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 5.597014925373135e-06, |
|
"loss": 0.0329, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 4.850746268656717e-06, |
|
"loss": 0.0332, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 4.1044776119402985e-06, |
|
"loss": 0.0303, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"learning_rate": 3.358208955223881e-06, |
|
"loss": 0.0276, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"learning_rate": 2.6119402985074627e-06, |
|
"loss": 0.0332, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"learning_rate": 1.8656716417910446e-06, |
|
"loss": 0.0317, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"learning_rate": 1.119402985074627e-06, |
|
"loss": 0.0315, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 3.7313432835820895e-07, |
|
"loss": 0.0336, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"eval_accuracy": 0.9985872380503885, |
|
"eval_f1": 0.9985770990024514, |
|
"eval_loss": 0.021681277081370354, |
|
"eval_precision": 0.9989954885489135, |
|
"eval_recall": 0.998161142953993, |
|
"eval_runtime": 106.6587, |
|
"eval_samples_per_second": 79.637, |
|
"eval_steps_per_second": 0.628, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"step": 745, |
|
"total_flos": 2.9526217173796848e+19, |
|
"train_loss": 0.14726725356690837, |
|
"train_runtime": 5241.8792, |
|
"train_samples_per_second": 72.911, |
|
"train_steps_per_second": 0.142 |
|
} |
|
], |
|
"max_steps": 745, |
|
"num_train_epochs": 5, |
|
"total_flos": 2.9526217173796848e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|