{ "best_metric": 1.555110216140747, "best_model_checkpoint": "vit-base-patch16-224-in21k/checkpoint-1450", "epoch": 50.0, "eval_steps": 500, "global_step": 1450, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.14214463840399003, "eval_loss": 2.1949472427368164, "eval_runtime": 16.6682, "eval_samples_per_second": 72.173, "eval_steps_per_second": 2.28, "step": 29 }, { "epoch": 2.0, "eval_accuracy": 0.16874480465502908, "eval_loss": 2.186838388442993, "eval_runtime": 16.7522, "eval_samples_per_second": 71.811, "eval_steps_per_second": 2.268, "step": 58 }, { "epoch": 3.0, "eval_accuracy": 0.18952618453865336, "eval_loss": 2.1766607761383057, "eval_runtime": 16.8234, "eval_samples_per_second": 71.508, "eval_steps_per_second": 2.259, "step": 87 }, { "epoch": 4.0, "eval_accuracy": 0.2144638403990025, "eval_loss": 2.165771722793579, "eval_runtime": 16.9209, "eval_samples_per_second": 71.096, "eval_steps_per_second": 2.246, "step": 116 }, { "epoch": 5.0, "eval_accuracy": 0.2427265170407315, "eval_loss": 2.153604030609131, "eval_runtime": 16.8242, "eval_samples_per_second": 71.504, "eval_steps_per_second": 2.259, "step": 145 }, { "epoch": 6.0, "eval_accuracy": 0.2809642560266002, "eval_loss": 2.1398112773895264, "eval_runtime": 16.8785, "eval_samples_per_second": 71.274, "eval_steps_per_second": 2.251, "step": 174 }, { "epoch": 7.0, "eval_accuracy": 0.31504571903574397, "eval_loss": 2.1249942779541016, "eval_runtime": 16.7869, "eval_samples_per_second": 71.663, "eval_steps_per_second": 2.264, "step": 203 }, { "epoch": 8.0, "eval_accuracy": 0.3516209476309227, "eval_loss": 2.1092069149017334, "eval_runtime": 16.6708, "eval_samples_per_second": 72.162, "eval_steps_per_second": 2.279, "step": 232 }, { "epoch": 9.0, "eval_accuracy": 0.3865336658354115, "eval_loss": 2.0922772884368896, "eval_runtime": 16.7596, "eval_samples_per_second": 71.78, "eval_steps_per_second": 2.267, "step": 261 }, { "epoch": 10.0, "eval_accuracy": 0.4172901080631754, "eval_loss": 2.0741829872131348, "eval_runtime": 17.2756, "eval_samples_per_second": 69.636, "eval_steps_per_second": 2.2, "step": 290 }, { "epoch": 11.0, "eval_accuracy": 0.45054031587697424, "eval_loss": 2.0549581050872803, "eval_runtime": 16.8587, "eval_samples_per_second": 71.358, "eval_steps_per_second": 2.254, "step": 319 }, { "epoch": 12.0, "eval_accuracy": 0.4746467165419784, "eval_loss": 2.0356059074401855, "eval_runtime": 16.8686, "eval_samples_per_second": 71.316, "eval_steps_per_second": 2.253, "step": 348 }, { "epoch": 13.0, "eval_accuracy": 0.5103906899418121, "eval_loss": 2.0160348415374756, "eval_runtime": 16.8138, "eval_samples_per_second": 71.548, "eval_steps_per_second": 2.26, "step": 377 }, { "epoch": 14.0, "eval_accuracy": 0.5278470490440565, "eval_loss": 1.996354341506958, "eval_runtime": 16.8252, "eval_samples_per_second": 71.5, "eval_steps_per_second": 2.259, "step": 406 }, { "epoch": 15.0, "eval_accuracy": 0.5411471321695761, "eval_loss": 1.9763015508651733, "eval_runtime": 16.7531, "eval_samples_per_second": 71.808, "eval_steps_per_second": 2.268, "step": 435 }, { "epoch": 16.0, "eval_accuracy": 0.5627597672485453, "eval_loss": 1.9563554525375366, "eval_runtime": 16.7782, "eval_samples_per_second": 71.7, "eval_steps_per_second": 2.265, "step": 464 }, { "epoch": 17.0, "eval_accuracy": 0.5935162094763092, "eval_loss": 1.9363466501235962, "eval_runtime": 16.8283, "eval_samples_per_second": 71.487, "eval_steps_per_second": 2.258, "step": 493 }, { "epoch": 17.24137931034483, "grad_norm": 1.170919418334961, "learning_rate": 1.3571428571428572e-06, "loss": 2.0616, "step": 500 }, { "epoch": 18.0, "eval_accuracy": 0.6059850374064838, "eval_loss": 1.9157662391662598, "eval_runtime": 16.8442, "eval_samples_per_second": 71.419, "eval_steps_per_second": 2.256, "step": 522 }, { "epoch": 19.0, "eval_accuracy": 0.6184538653366584, "eval_loss": 1.8954347372055054, "eval_runtime": 16.8002, "eval_samples_per_second": 71.606, "eval_steps_per_second": 2.262, "step": 551 }, { "epoch": 20.0, "eval_accuracy": 0.6275976724854531, "eval_loss": 1.87549889087677, "eval_runtime": 16.9097, "eval_samples_per_second": 71.143, "eval_steps_per_second": 2.247, "step": 580 }, { "epoch": 21.0, "eval_accuracy": 0.6433915211970075, "eval_loss": 1.856285572052002, "eval_runtime": 16.8322, "eval_samples_per_second": 71.47, "eval_steps_per_second": 2.258, "step": 609 }, { "epoch": 22.0, "eval_accuracy": 0.6525353283458022, "eval_loss": 1.8366234302520752, "eval_runtime": 16.8751, "eval_samples_per_second": 71.289, "eval_steps_per_second": 2.252, "step": 638 }, { "epoch": 23.0, "eval_accuracy": 0.6600166251039069, "eval_loss": 1.8174903392791748, "eval_runtime": 16.8399, "eval_samples_per_second": 71.437, "eval_steps_per_second": 2.257, "step": 667 }, { "epoch": 24.0, "eval_accuracy": 0.6699916874480466, "eval_loss": 1.799251914024353, "eval_runtime": 16.95, "eval_samples_per_second": 70.973, "eval_steps_per_second": 2.242, "step": 696 }, { "epoch": 25.0, "eval_accuracy": 0.684123025768911, "eval_loss": 1.7816163301467896, "eval_runtime": 17.3768, "eval_samples_per_second": 69.23, "eval_steps_per_second": 2.187, "step": 725 }, { "epoch": 26.0, "eval_accuracy": 0.6965918536990856, "eval_loss": 1.7638013362884521, "eval_runtime": 16.8626, "eval_samples_per_second": 71.341, "eval_steps_per_second": 2.254, "step": 754 }, { "epoch": 27.0, "eval_accuracy": 0.6982543640897756, "eval_loss": 1.7467155456542969, "eval_runtime": 16.6815, "eval_samples_per_second": 72.116, "eval_steps_per_second": 2.278, "step": 783 }, { "epoch": 28.0, "eval_accuracy": 0.7024106400665004, "eval_loss": 1.730850100517273, "eval_runtime": 16.731, "eval_samples_per_second": 71.902, "eval_steps_per_second": 2.271, "step": 812 }, { "epoch": 29.0, "eval_accuracy": 0.7090606816292602, "eval_loss": 1.714859127998352, "eval_runtime": 16.8823, "eval_samples_per_second": 71.258, "eval_steps_per_second": 2.251, "step": 841 }, { "epoch": 30.0, "eval_accuracy": 0.7182044887780549, "eval_loss": 1.6998004913330078, "eval_runtime": 16.9754, "eval_samples_per_second": 70.867, "eval_steps_per_second": 2.239, "step": 870 }, { "epoch": 31.0, "eval_accuracy": 0.7240232751454697, "eval_loss": 1.6857184171676636, "eval_runtime": 16.864, "eval_samples_per_second": 71.335, "eval_steps_per_second": 2.253, "step": 899 }, { "epoch": 32.0, "eval_accuracy": 0.7339983374896093, "eval_loss": 1.6722785234451294, "eval_runtime": 17.0525, "eval_samples_per_second": 70.547, "eval_steps_per_second": 2.228, "step": 928 }, { "epoch": 33.0, "eval_accuracy": 0.7348295926849543, "eval_loss": 1.6595127582550049, "eval_runtime": 17.129, "eval_samples_per_second": 70.232, "eval_steps_per_second": 2.218, "step": 957 }, { "epoch": 34.0, "eval_accuracy": 0.7389858686616791, "eval_loss": 1.6473374366760254, "eval_runtime": 16.9347, "eval_samples_per_second": 71.037, "eval_steps_per_second": 2.244, "step": 986 }, { "epoch": 34.48275862068966, "grad_norm": 1.199028491973877, "learning_rate": 6.428571428571429e-07, "loss": 1.7089, "step": 1000 }, { "epoch": 35.0, "eval_accuracy": 0.741479634247714, "eval_loss": 1.6358741521835327, "eval_runtime": 16.7459, "eval_samples_per_second": 71.839, "eval_steps_per_second": 2.269, "step": 1015 }, { "epoch": 36.0, "eval_accuracy": 0.744804655029094, "eval_loss": 1.6255922317504883, "eval_runtime": 16.7298, "eval_samples_per_second": 71.907, "eval_steps_per_second": 2.271, "step": 1044 }, { "epoch": 37.0, "eval_accuracy": 0.7506234413965087, "eval_loss": 1.6154671907424927, "eval_runtime": 16.6855, "eval_samples_per_second": 72.098, "eval_steps_per_second": 2.277, "step": 1073 }, { "epoch": 38.0, "eval_accuracy": 0.7522859517871987, "eval_loss": 1.6062015295028687, "eval_runtime": 16.7723, "eval_samples_per_second": 71.725, "eval_steps_per_second": 2.266, "step": 1102 }, { "epoch": 39.0, "eval_accuracy": 0.7531172069825436, "eval_loss": 1.5981698036193848, "eval_runtime": 16.7879, "eval_samples_per_second": 71.659, "eval_steps_per_second": 2.264, "step": 1131 }, { "epoch": 40.0, "eval_accuracy": 0.7564422277639236, "eval_loss": 1.5905556678771973, "eval_runtime": 16.7358, "eval_samples_per_second": 71.882, "eval_steps_per_second": 2.271, "step": 1160 }, { "epoch": 41.0, "eval_accuracy": 0.7581047381546134, "eval_loss": 1.5838948488235474, "eval_runtime": 16.7294, "eval_samples_per_second": 71.909, "eval_steps_per_second": 2.271, "step": 1189 }, { "epoch": 42.0, "eval_accuracy": 0.7622610141313383, "eval_loss": 1.5777734518051147, "eval_runtime": 16.7829, "eval_samples_per_second": 71.68, "eval_steps_per_second": 2.264, "step": 1218 }, { "epoch": 43.0, "eval_accuracy": 0.7605985037406484, "eval_loss": 1.5723505020141602, "eval_runtime": 16.8898, "eval_samples_per_second": 71.226, "eval_steps_per_second": 2.25, "step": 1247 }, { "epoch": 44.0, "eval_accuracy": 0.7622610141313383, "eval_loss": 1.5677645206451416, "eval_runtime": 16.9782, "eval_samples_per_second": 70.856, "eval_steps_per_second": 2.238, "step": 1276 }, { "epoch": 45.0, "eval_accuracy": 0.7630922693266833, "eval_loss": 1.5637906789779663, "eval_runtime": 16.962, "eval_samples_per_second": 70.923, "eval_steps_per_second": 2.24, "step": 1305 }, { "epoch": 46.0, "eval_accuracy": 0.7672485453034081, "eval_loss": 1.560693383216858, "eval_runtime": 16.8368, "eval_samples_per_second": 71.451, "eval_steps_per_second": 2.257, "step": 1334 }, { "epoch": 47.0, "eval_accuracy": 0.7672485453034081, "eval_loss": 1.558199405670166, "eval_runtime": 16.8053, "eval_samples_per_second": 71.585, "eval_steps_per_second": 2.261, "step": 1363 }, { "epoch": 48.0, "eval_accuracy": 0.7689110556940981, "eval_loss": 1.5564790964126587, "eval_runtime": 16.8812, "eval_samples_per_second": 71.263, "eval_steps_per_second": 2.251, "step": 1392 }, { "epoch": 49.0, "eval_accuracy": 0.7689110556940981, "eval_loss": 1.5554701089859009, "eval_runtime": 16.7556, "eval_samples_per_second": 71.797, "eval_steps_per_second": 2.268, "step": 1421 }, { "epoch": 50.0, "eval_accuracy": 0.7689110556940981, "eval_loss": 1.555110216140747, "eval_runtime": 16.8736, "eval_samples_per_second": 71.295, "eval_steps_per_second": 2.252, "step": 1450 } ], "logging_steps": 500, "max_steps": 1450, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.98634115081943e+18, "train_batch_size": 64, "trial_name": null, "trial_params": null }