{ "best_metric": 0.06278952211141586, "best_model_checkpoint": "/vit-base-patch32-224-in21k/checkpoint-217", "epoch": 40.0, "global_step": 280, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_f1": 0.8607594936708861, "eval_loss": 0.4529457688331604, "eval_runtime": 8.6926, "eval_samples_per_second": 9.088, "eval_steps_per_second": 0.115, "step": 7 }, { "epoch": 1.43, "learning_rate": 1.928571428571429e-05, "loss": 0.5024, "step": 10 }, { "epoch": 2.0, "eval_f1": 0.8607594936708861, "eval_loss": 0.39888718724250793, "eval_runtime": 9.0793, "eval_samples_per_second": 8.701, "eval_steps_per_second": 0.11, "step": 14 }, { "epoch": 2.86, "learning_rate": 1.8571428571428575e-05, "loss": 0.3533, "step": 20 }, { "epoch": 3.0, "eval_f1": 0.8607594936708861, "eval_loss": 0.37408992648124695, "eval_runtime": 9.0097, "eval_samples_per_second": 8.768, "eval_steps_per_second": 0.111, "step": 21 }, { "epoch": 4.0, "eval_f1": 0.8607594936708861, "eval_loss": 0.3160648047924042, "eval_runtime": 9.1428, "eval_samples_per_second": 8.641, "eval_steps_per_second": 0.109, "step": 28 }, { "epoch": 4.29, "learning_rate": 1.785714285714286e-05, "loss": 0.285, "step": 30 }, { "epoch": 5.0, "eval_f1": 0.8607594936708861, "eval_loss": 0.282362163066864, "eval_runtime": 9.1103, "eval_samples_per_second": 8.671, "eval_steps_per_second": 0.11, "step": 35 }, { "epoch": 5.71, "learning_rate": 1.7142857142857142e-05, "loss": 0.2491, "step": 40 }, { "epoch": 6.0, "eval_f1": 0.8607594936708861, "eval_loss": 0.2700817883014679, "eval_runtime": 8.8127, "eval_samples_per_second": 8.964, "eval_steps_per_second": 0.113, "step": 42 }, { "epoch": 7.0, "eval_f1": 0.9113924050632911, "eval_loss": 0.2062235176563263, "eval_runtime": 8.5646, "eval_samples_per_second": 9.224, "eval_steps_per_second": 0.117, "step": 49 }, { "epoch": 7.14, "learning_rate": 1.642857142857143e-05, "loss": 0.2032, "step": 50 }, { "epoch": 8.0, "eval_f1": 0.9493670886075949, "eval_loss": 0.20497918128967285, "eval_runtime": 8.6022, "eval_samples_per_second": 9.184, "eval_steps_per_second": 0.116, "step": 56 }, { "epoch": 8.57, "learning_rate": 1.5714285714285715e-05, "loss": 0.157, "step": 60 }, { "epoch": 9.0, "eval_f1": 0.9493670886075949, "eval_loss": 0.20131482183933258, "eval_runtime": 8.5286, "eval_samples_per_second": 9.263, "eval_steps_per_second": 0.117, "step": 63 }, { "epoch": 10.0, "learning_rate": 1.5000000000000002e-05, "loss": 0.1127, "step": 70 }, { "epoch": 10.0, "eval_f1": 0.9367088607594937, "eval_loss": 0.19601519405841827, "eval_runtime": 8.52, "eval_samples_per_second": 9.272, "eval_steps_per_second": 0.117, "step": 70 }, { "epoch": 11.0, "eval_f1": 0.9493670886075949, "eval_loss": 0.14171478152275085, "eval_runtime": 8.4584, "eval_samples_per_second": 9.34, "eval_steps_per_second": 0.118, "step": 77 }, { "epoch": 11.43, "learning_rate": 1.4285714285714287e-05, "loss": 0.0903, "step": 80 }, { "epoch": 12.0, "eval_f1": 0.9493670886075949, "eval_loss": 0.1306915283203125, "eval_runtime": 8.4328, "eval_samples_per_second": 9.368, "eval_steps_per_second": 0.119, "step": 84 }, { "epoch": 12.86, "learning_rate": 1.3571428571428574e-05, "loss": 0.0922, "step": 90 }, { "epoch": 13.0, "eval_f1": 0.9873417721518988, "eval_loss": 0.08702569454908371, "eval_runtime": 8.8303, "eval_samples_per_second": 8.947, "eval_steps_per_second": 0.113, "step": 91 }, { "epoch": 14.0, "eval_f1": 0.9240506329113924, "eval_loss": 0.20480988919734955, "eval_runtime": 8.6551, "eval_samples_per_second": 9.128, "eval_steps_per_second": 0.116, "step": 98 }, { "epoch": 14.29, "learning_rate": 1.2857142857142859e-05, "loss": 0.0595, "step": 100 }, { "epoch": 15.0, "eval_f1": 0.9620253164556962, "eval_loss": 0.12036000937223434, "eval_runtime": 8.6389, "eval_samples_per_second": 9.145, "eval_steps_per_second": 0.116, "step": 105 }, { "epoch": 15.71, "learning_rate": 1.2142857142857142e-05, "loss": 0.0527, "step": 110 }, { "epoch": 16.0, "eval_f1": 0.9367088607594937, "eval_loss": 0.2552852928638458, "eval_runtime": 8.4875, "eval_samples_per_second": 9.308, "eval_steps_per_second": 0.118, "step": 112 }, { "epoch": 17.0, "eval_f1": 0.9367088607594937, "eval_loss": 0.16753825545310974, "eval_runtime": 8.4252, "eval_samples_per_second": 9.377, "eval_steps_per_second": 0.119, "step": 119 }, { "epoch": 17.14, "learning_rate": 1.1428571428571429e-05, "loss": 0.0477, "step": 120 }, { "epoch": 18.0, "eval_f1": 0.9240506329113924, "eval_loss": 0.22650040686130524, "eval_runtime": 8.392, "eval_samples_per_second": 9.414, "eval_steps_per_second": 0.119, "step": 126 }, { "epoch": 18.57, "learning_rate": 1.0714285714285714e-05, "loss": 0.0411, "step": 130 }, { "epoch": 19.0, "eval_f1": 0.9367088607594937, "eval_loss": 0.1900627613067627, "eval_runtime": 8.4937, "eval_samples_per_second": 9.301, "eval_steps_per_second": 0.118, "step": 133 }, { "epoch": 20.0, "learning_rate": 1e-05, "loss": 0.0299, "step": 140 }, { "epoch": 20.0, "eval_f1": 0.9240506329113924, "eval_loss": 0.2422873079776764, "eval_runtime": 8.7448, "eval_samples_per_second": 9.034, "eval_steps_per_second": 0.114, "step": 140 }, { "epoch": 21.0, "eval_f1": 0.9873417721518988, "eval_loss": 0.06394638121128082, "eval_runtime": 8.7204, "eval_samples_per_second": 9.059, "eval_steps_per_second": 0.115, "step": 147 }, { "epoch": 21.43, "learning_rate": 9.285714285714288e-06, "loss": 0.0487, "step": 150 }, { "epoch": 22.0, "eval_f1": 0.9493670886075949, "eval_loss": 0.12548162043094635, "eval_runtime": 8.9171, "eval_samples_per_second": 8.859, "eval_steps_per_second": 0.112, "step": 154 }, { "epoch": 22.86, "learning_rate": 8.571428571428571e-06, "loss": 0.0359, "step": 160 }, { "epoch": 23.0, "eval_f1": 0.9493670886075949, "eval_loss": 0.12127607315778732, "eval_runtime": 8.6147, "eval_samples_per_second": 9.17, "eval_steps_per_second": 0.116, "step": 161 }, { "epoch": 24.0, "eval_f1": 0.9746835443037974, "eval_loss": 0.07274330407381058, "eval_runtime": 8.5807, "eval_samples_per_second": 9.207, "eval_steps_per_second": 0.117, "step": 168 }, { "epoch": 24.29, "learning_rate": 7.857142857142858e-06, "loss": 0.0302, "step": 170 }, { "epoch": 25.0, "eval_f1": 0.9493670886075949, "eval_loss": 0.11162865161895752, "eval_runtime": 8.6735, "eval_samples_per_second": 9.108, "eval_steps_per_second": 0.115, "step": 175 }, { "epoch": 25.71, "learning_rate": 7.1428571428571436e-06, "loss": 0.0304, "step": 180 }, { "epoch": 26.0, "eval_f1": 0.9493670886075949, "eval_loss": 0.10622164607048035, "eval_runtime": 8.8211, "eval_samples_per_second": 8.956, "eval_steps_per_second": 0.113, "step": 182 }, { "epoch": 27.0, "eval_f1": 0.9240506329113924, "eval_loss": 0.20966486632823944, "eval_runtime": 8.7082, "eval_samples_per_second": 9.072, "eval_steps_per_second": 0.115, "step": 189 }, { "epoch": 27.14, "learning_rate": 6.4285714285714295e-06, "loss": 0.0274, "step": 190 }, { "epoch": 28.0, "eval_f1": 0.9493670886075949, "eval_loss": 0.1276017129421234, "eval_runtime": 8.676, "eval_samples_per_second": 9.106, "eval_steps_per_second": 0.115, "step": 196 }, { "epoch": 28.57, "learning_rate": 5.7142857142857145e-06, "loss": 0.0291, "step": 200 }, { "epoch": 29.0, "eval_f1": 0.9493670886075949, "eval_loss": 0.09670199453830719, "eval_runtime": 8.5086, "eval_samples_per_second": 9.285, "eval_steps_per_second": 0.118, "step": 203 }, { "epoch": 30.0, "learning_rate": 5e-06, "loss": 0.0202, "step": 210 }, { "epoch": 30.0, "eval_f1": 0.9746835443037974, "eval_loss": 0.07649976760149002, "eval_runtime": 8.9676, "eval_samples_per_second": 8.81, "eval_steps_per_second": 0.112, "step": 210 }, { "epoch": 31.0, "eval_f1": 0.9873417721518988, "eval_loss": 0.06278952211141586, "eval_runtime": 8.7232, "eval_samples_per_second": 9.056, "eval_steps_per_second": 0.115, "step": 217 }, { "epoch": 31.43, "learning_rate": 4.2857142857142855e-06, "loss": 0.0232, "step": 220 }, { "epoch": 32.0, "eval_f1": 0.9493670886075949, "eval_loss": 0.13882263004779816, "eval_runtime": 8.6516, "eval_samples_per_second": 9.131, "eval_steps_per_second": 0.116, "step": 224 }, { "epoch": 32.86, "learning_rate": 3.5714285714285718e-06, "loss": 0.0264, "step": 230 }, { "epoch": 33.0, "eval_f1": 0.9493670886075949, "eval_loss": 0.10616844147443771, "eval_runtime": 8.6952, "eval_samples_per_second": 9.085, "eval_steps_per_second": 0.115, "step": 231 }, { "epoch": 34.0, "eval_f1": 0.9493670886075949, "eval_loss": 0.1320488601922989, "eval_runtime": 8.7896, "eval_samples_per_second": 8.988, "eval_steps_per_second": 0.114, "step": 238 }, { "epoch": 34.29, "learning_rate": 2.8571428571428573e-06, "loss": 0.0219, "step": 240 }, { "epoch": 35.0, "eval_f1": 0.9493670886075949, "eval_loss": 0.1528194099664688, "eval_runtime": 8.8303, "eval_samples_per_second": 8.946, "eval_steps_per_second": 0.113, "step": 245 }, { "epoch": 35.71, "learning_rate": 2.1428571428571427e-06, "loss": 0.0194, "step": 250 }, { "epoch": 36.0, "eval_f1": 0.9493670886075949, "eval_loss": 0.1746273934841156, "eval_runtime": 8.7909, "eval_samples_per_second": 8.987, "eval_steps_per_second": 0.114, "step": 252 }, { "epoch": 37.0, "eval_f1": 0.9493670886075949, "eval_loss": 0.16089513897895813, "eval_runtime": 8.8367, "eval_samples_per_second": 8.94, "eval_steps_per_second": 0.113, "step": 259 }, { "epoch": 37.14, "learning_rate": 1.4285714285714286e-06, "loss": 0.0204, "step": 260 }, { "epoch": 38.0, "eval_f1": 0.9493670886075949, "eval_loss": 0.14817634224891663, "eval_runtime": 8.8819, "eval_samples_per_second": 8.895, "eval_steps_per_second": 0.113, "step": 266 }, { "epoch": 38.57, "learning_rate": 7.142857142857143e-07, "loss": 0.0217, "step": 270 }, { "epoch": 39.0, "eval_f1": 0.9493670886075949, "eval_loss": 0.152223601937294, "eval_runtime": 8.8958, "eval_samples_per_second": 8.881, "eval_steps_per_second": 0.112, "step": 273 }, { "epoch": 40.0, "learning_rate": 0.0, "loss": 0.0216, "step": 280 }, { "epoch": 40.0, "eval_f1": 0.9493670886075949, "eval_loss": 0.14991530776023865, "eval_runtime": 8.6557, "eval_samples_per_second": 9.127, "eval_steps_per_second": 0.116, "step": 280 }, { "epoch": 40.0, "step": 280, "total_flos": 1.4091487038849024e+18, "train_loss": 0.09473916946777276, "train_runtime": 2533.8499, "train_samples_per_second": 7.041, "train_steps_per_second": 0.111 } ], "max_steps": 280, "num_train_epochs": 40, "total_flos": 1.4091487038849024e+18, "trial_name": null, "trial_params": null }