{ "best_metric": 0.7666666666666667, "best_model_checkpoint": "swinv2-tiny-patch4-window8-256-dmae-va-U5-42C/checkpoint-232", "epoch": 37.935483870967744, "eval_steps": 500, "global_step": 294, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.9032258064516129, "eval_accuracy": 0.35, "eval_loss": 1.3925895690917969, "eval_runtime": 1.9549, "eval_samples_per_second": 30.693, "eval_steps_per_second": 1.023, "step": 7 }, { "epoch": 1.2903225806451613, "grad_norm": 3.2569632530212402, "learning_rate": 3.3333333333333333e-06, "loss": 1.4087, "step": 10 }, { "epoch": 1.935483870967742, "eval_accuracy": 0.4166666666666667, "eval_loss": 1.3364887237548828, "eval_runtime": 1.315, "eval_samples_per_second": 45.628, "eval_steps_per_second": 1.521, "step": 15 }, { "epoch": 2.5806451612903225, "grad_norm": 2.8684847354888916, "learning_rate": 6.666666666666667e-06, "loss": 1.3807, "step": 20 }, { "epoch": 2.967741935483871, "eval_accuracy": 0.4166666666666667, "eval_loss": 1.2813018560409546, "eval_runtime": 1.3067, "eval_samples_per_second": 45.917, "eval_steps_per_second": 1.531, "step": 23 }, { "epoch": 3.870967741935484, "grad_norm": 3.6313109397888184, "learning_rate": 1e-05, "loss": 1.35, "step": 30 }, { "epoch": 4.0, "eval_accuracy": 0.4, "eval_loss": 1.2406634092330933, "eval_runtime": 1.3069, "eval_samples_per_second": 45.909, "eval_steps_per_second": 1.53, "step": 31 }, { "epoch": 4.903225806451613, "eval_accuracy": 0.48333333333333334, "eval_loss": 1.2116148471832275, "eval_runtime": 1.3014, "eval_samples_per_second": 46.103, "eval_steps_per_second": 1.537, "step": 38 }, { "epoch": 5.161290322580645, "grad_norm": 3.7117483615875244, "learning_rate": 9.621212121212122e-06, "loss": 1.2933, "step": 40 }, { "epoch": 5.935483870967742, "eval_accuracy": 0.48333333333333334, "eval_loss": 1.165272831916809, "eval_runtime": 1.3337, "eval_samples_per_second": 44.987, "eval_steps_per_second": 1.5, "step": 46 }, { "epoch": 6.451612903225806, "grad_norm": 4.500431060791016, "learning_rate": 9.242424242424244e-06, "loss": 1.2426, "step": 50 }, { "epoch": 6.967741935483871, "eval_accuracy": 0.5166666666666667, "eval_loss": 1.115120768547058, "eval_runtime": 1.3716, "eval_samples_per_second": 43.745, "eval_steps_per_second": 1.458, "step": 54 }, { "epoch": 7.741935483870968, "grad_norm": 4.069227695465088, "learning_rate": 8.863636363636365e-06, "loss": 1.1771, "step": 60 }, { "epoch": 8.0, "eval_accuracy": 0.6, "eval_loss": 1.0440610647201538, "eval_runtime": 1.9614, "eval_samples_per_second": 30.59, "eval_steps_per_second": 1.02, "step": 62 }, { "epoch": 8.903225806451612, "eval_accuracy": 0.5666666666666667, "eval_loss": 0.9990090727806091, "eval_runtime": 1.6529, "eval_samples_per_second": 36.299, "eval_steps_per_second": 1.21, "step": 69 }, { "epoch": 9.03225806451613, "grad_norm": 6.175364971160889, "learning_rate": 8.484848484848486e-06, "loss": 1.0983, "step": 70 }, { "epoch": 9.935483870967742, "eval_accuracy": 0.6333333333333333, "eval_loss": 0.9455800652503967, "eval_runtime": 1.336, "eval_samples_per_second": 44.911, "eval_steps_per_second": 1.497, "step": 77 }, { "epoch": 10.32258064516129, "grad_norm": 5.833083152770996, "learning_rate": 8.106060606060606e-06, "loss": 1.0338, "step": 80 }, { "epoch": 10.967741935483872, "eval_accuracy": 0.6833333333333333, "eval_loss": 0.9160192012786865, "eval_runtime": 1.3605, "eval_samples_per_second": 44.101, "eval_steps_per_second": 1.47, "step": 85 }, { "epoch": 11.612903225806452, "grad_norm": 7.081052303314209, "learning_rate": 7.727272727272727e-06, "loss": 0.9665, "step": 90 }, { "epoch": 12.0, "eval_accuracy": 0.6833333333333333, "eval_loss": 0.8940223455429077, "eval_runtime": 1.6293, "eval_samples_per_second": 36.825, "eval_steps_per_second": 1.228, "step": 93 }, { "epoch": 12.903225806451612, "grad_norm": 10.133336067199707, "learning_rate": 7.348484848484849e-06, "loss": 0.9133, "step": 100 }, { "epoch": 12.903225806451612, "eval_accuracy": 0.6, "eval_loss": 0.8753000497817993, "eval_runtime": 1.851, "eval_samples_per_second": 32.415, "eval_steps_per_second": 1.081, "step": 100 }, { "epoch": 13.935483870967742, "eval_accuracy": 0.6666666666666666, "eval_loss": 0.8517557978630066, "eval_runtime": 1.7325, "eval_samples_per_second": 34.633, "eval_steps_per_second": 1.154, "step": 108 }, { "epoch": 14.193548387096774, "grad_norm": 7.324928283691406, "learning_rate": 6.969696969696971e-06, "loss": 0.8521, "step": 110 }, { "epoch": 14.967741935483872, "eval_accuracy": 0.65, "eval_loss": 0.8514594435691833, "eval_runtime": 1.3473, "eval_samples_per_second": 44.535, "eval_steps_per_second": 1.484, "step": 116 }, { "epoch": 15.483870967741936, "grad_norm": 8.610685348510742, "learning_rate": 6.590909090909091e-06, "loss": 0.8461, "step": 120 }, { "epoch": 16.0, "eval_accuracy": 0.65, "eval_loss": 0.8406927585601807, "eval_runtime": 1.3128, "eval_samples_per_second": 45.703, "eval_steps_per_second": 1.523, "step": 124 }, { "epoch": 16.774193548387096, "grad_norm": 8.986543655395508, "learning_rate": 6.212121212121213e-06, "loss": 0.808, "step": 130 }, { "epoch": 16.903225806451612, "eval_accuracy": 0.65, "eval_loss": 0.8217867016792297, "eval_runtime": 1.3566, "eval_samples_per_second": 44.227, "eval_steps_per_second": 1.474, "step": 131 }, { "epoch": 17.93548387096774, "eval_accuracy": 0.6833333333333333, "eval_loss": 0.817019522190094, "eval_runtime": 1.3417, "eval_samples_per_second": 44.72, "eval_steps_per_second": 1.491, "step": 139 }, { "epoch": 18.06451612903226, "grad_norm": 14.52387523651123, "learning_rate": 5.833333333333334e-06, "loss": 0.7779, "step": 140 }, { "epoch": 18.967741935483872, "eval_accuracy": 0.7166666666666667, "eval_loss": 0.7972093224525452, "eval_runtime": 1.333, "eval_samples_per_second": 45.011, "eval_steps_per_second": 1.5, "step": 147 }, { "epoch": 19.35483870967742, "grad_norm": 9.242515563964844, "learning_rate": 5.4545454545454545e-06, "loss": 0.758, "step": 150 }, { "epoch": 20.0, "eval_accuracy": 0.7333333333333333, "eval_loss": 0.7816523313522339, "eval_runtime": 1.3563, "eval_samples_per_second": 44.24, "eval_steps_per_second": 1.475, "step": 155 }, { "epoch": 20.64516129032258, "grad_norm": 10.172256469726562, "learning_rate": 5.075757575757576e-06, "loss": 0.7416, "step": 160 }, { "epoch": 20.903225806451612, "eval_accuracy": 0.7166666666666667, "eval_loss": 0.7677698135375977, "eval_runtime": 1.3666, "eval_samples_per_second": 43.906, "eval_steps_per_second": 1.464, "step": 162 }, { "epoch": 21.93548387096774, "grad_norm": 9.232048988342285, "learning_rate": 4.696969696969698e-06, "loss": 0.7344, "step": 170 }, { "epoch": 21.93548387096774, "eval_accuracy": 0.7166666666666667, "eval_loss": 0.7650207281112671, "eval_runtime": 1.3874, "eval_samples_per_second": 43.248, "eval_steps_per_second": 1.442, "step": 170 }, { "epoch": 22.967741935483872, "eval_accuracy": 0.7333333333333333, "eval_loss": 0.7427585124969482, "eval_runtime": 1.3402, "eval_samples_per_second": 44.768, "eval_steps_per_second": 1.492, "step": 178 }, { "epoch": 23.225806451612904, "grad_norm": 8.738112449645996, "learning_rate": 4.3181818181818185e-06, "loss": 0.7091, "step": 180 }, { "epoch": 24.0, "eval_accuracy": 0.75, "eval_loss": 0.7280309200286865, "eval_runtime": 1.3549, "eval_samples_per_second": 44.284, "eval_steps_per_second": 1.476, "step": 186 }, { "epoch": 24.516129032258064, "grad_norm": 9.839088439941406, "learning_rate": 3.93939393939394e-06, "loss": 0.6876, "step": 190 }, { "epoch": 24.903225806451612, "eval_accuracy": 0.75, "eval_loss": 0.7235161066055298, "eval_runtime": 1.346, "eval_samples_per_second": 44.577, "eval_steps_per_second": 1.486, "step": 193 }, { "epoch": 25.806451612903224, "grad_norm": 9.253539085388184, "learning_rate": 3.560606060606061e-06, "loss": 0.6887, "step": 200 }, { "epoch": 25.93548387096774, "eval_accuracy": 0.75, "eval_loss": 0.7277832627296448, "eval_runtime": 1.4422, "eval_samples_per_second": 41.604, "eval_steps_per_second": 1.387, "step": 201 }, { "epoch": 26.967741935483872, "eval_accuracy": 0.75, "eval_loss": 0.7264487147331238, "eval_runtime": 1.3795, "eval_samples_per_second": 43.494, "eval_steps_per_second": 1.45, "step": 209 }, { "epoch": 27.096774193548388, "grad_norm": 9.031746864318848, "learning_rate": 3.181818181818182e-06, "loss": 0.6897, "step": 210 }, { "epoch": 28.0, "eval_accuracy": 0.75, "eval_loss": 0.7228216528892517, "eval_runtime": 1.5018, "eval_samples_per_second": 39.952, "eval_steps_per_second": 1.332, "step": 217 }, { "epoch": 28.387096774193548, "grad_norm": 9.238479614257812, "learning_rate": 2.803030303030303e-06, "loss": 0.6637, "step": 220 }, { "epoch": 28.903225806451612, "eval_accuracy": 0.75, "eval_loss": 0.7162961363792419, "eval_runtime": 1.8314, "eval_samples_per_second": 32.762, "eval_steps_per_second": 1.092, "step": 224 }, { "epoch": 29.677419354838708, "grad_norm": 11.410670280456543, "learning_rate": 2.4242424242424244e-06, "loss": 0.6924, "step": 230 }, { "epoch": 29.93548387096774, "eval_accuracy": 0.7666666666666667, "eval_loss": 0.7072709798812866, "eval_runtime": 1.7495, "eval_samples_per_second": 34.295, "eval_steps_per_second": 1.143, "step": 232 }, { "epoch": 30.967741935483872, "grad_norm": 9.04150676727295, "learning_rate": 2.0454545454545457e-06, "loss": 0.6234, "step": 240 }, { "epoch": 30.967741935483872, "eval_accuracy": 0.7666666666666667, "eval_loss": 0.7057181596755981, "eval_runtime": 1.7336, "eval_samples_per_second": 34.609, "eval_steps_per_second": 1.154, "step": 240 }, { "epoch": 32.0, "eval_accuracy": 0.7666666666666667, "eval_loss": 0.7090018391609192, "eval_runtime": 1.9304, "eval_samples_per_second": 31.081, "eval_steps_per_second": 1.036, "step": 248 }, { "epoch": 32.25806451612903, "grad_norm": 10.956854820251465, "learning_rate": 1.6666666666666667e-06, "loss": 0.6652, "step": 250 }, { "epoch": 32.903225806451616, "eval_accuracy": 0.7666666666666667, "eval_loss": 0.7051603198051453, "eval_runtime": 2.0298, "eval_samples_per_second": 29.559, "eval_steps_per_second": 0.985, "step": 255 }, { "epoch": 33.54838709677419, "grad_norm": 11.52700424194336, "learning_rate": 1.287878787878788e-06, "loss": 0.6343, "step": 260 }, { "epoch": 33.935483870967744, "eval_accuracy": 0.7666666666666667, "eval_loss": 0.7009490132331848, "eval_runtime": 1.5868, "eval_samples_per_second": 37.811, "eval_steps_per_second": 1.26, "step": 263 }, { "epoch": 34.83870967741935, "grad_norm": 10.979878425598145, "learning_rate": 9.090909090909091e-07, "loss": 0.6327, "step": 270 }, { "epoch": 34.96774193548387, "eval_accuracy": 0.7666666666666667, "eval_loss": 0.7016925811767578, "eval_runtime": 1.3715, "eval_samples_per_second": 43.748, "eval_steps_per_second": 1.458, "step": 271 }, { "epoch": 36.0, "eval_accuracy": 0.7666666666666667, "eval_loss": 0.7023322582244873, "eval_runtime": 1.3676, "eval_samples_per_second": 43.872, "eval_steps_per_second": 1.462, "step": 279 }, { "epoch": 36.12903225806452, "grad_norm": 8.543764114379883, "learning_rate": 5.303030303030304e-07, "loss": 0.6339, "step": 280 }, { "epoch": 36.903225806451616, "eval_accuracy": 0.7666666666666667, "eval_loss": 0.7027052044868469, "eval_runtime": 1.3882, "eval_samples_per_second": 43.221, "eval_steps_per_second": 1.441, "step": 286 }, { "epoch": 37.41935483870968, "grad_norm": 10.100813865661621, "learning_rate": 1.5151515151515152e-07, "loss": 0.6275, "step": 290 }, { "epoch": 37.935483870967744, "eval_accuracy": 0.7666666666666667, "eval_loss": 0.7030532956123352, "eval_runtime": 1.3783, "eval_samples_per_second": 43.53, "eval_steps_per_second": 1.451, "step": 294 }, { "epoch": 37.935483870967744, "step": 294, "total_flos": 1.2027310550050406e+18, "train_loss": 0.8700437156521544, "train_runtime": 913.4545, "train_samples_per_second": 44.784, "train_steps_per_second": 0.322 } ], "logging_steps": 10, "max_steps": 294, "num_input_tokens_seen": 0, "num_train_epochs": 42, "save_steps": 500, "total_flos": 1.2027310550050406e+18, "train_batch_size": 32, "trial_name": null, "trial_params": null }