{ "best_metric": 0.8297674418604651, "best_model_checkpoint": "resnet-18-finetuned-resnet-18-1/checkpoint-450", "epoch": 49.95238095238095, "global_step": 500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.95, "learning_rate": 1e-05, "loss": 5.5547, "step": 10 }, { "epoch": 0.95, "eval_accuracy": 0.004186046511627907, "eval_loss": 5.4745025634765625, "eval_runtime": 6.1733, "eval_samples_per_second": 348.276, "eval_steps_per_second": 0.81, "step": 10 }, { "epoch": 1.95, "learning_rate": 2e-05, "loss": 5.668, "step": 20 }, { "epoch": 1.95, "eval_accuracy": 0.019069767441860466, "eval_loss": 5.200634479522705, "eval_runtime": 5.9989, "eval_samples_per_second": 358.401, "eval_steps_per_second": 0.833, "step": 20 }, { "epoch": 2.95, "learning_rate": 3e-05, "loss": 5.3055, "step": 30 }, { "epoch": 2.95, "eval_accuracy": 0.09023255813953489, "eval_loss": 4.731231212615967, "eval_runtime": 5.9716, "eval_samples_per_second": 360.039, "eval_steps_per_second": 0.837, "step": 30 }, { "epoch": 3.95, "learning_rate": 4e-05, "loss": 4.7641, "step": 40 }, { "epoch": 3.95, "eval_accuracy": 0.21813953488372093, "eval_loss": 4.053351879119873, "eval_runtime": 5.9761, "eval_samples_per_second": 359.764, "eval_steps_per_second": 0.837, "step": 40 }, { "epoch": 4.95, "learning_rate": 5e-05, "loss": 4.0761, "step": 50 }, { "epoch": 4.95, "eval_accuracy": 0.34325581395348836, "eval_loss": 3.1956703662872314, "eval_runtime": 6.1144, "eval_samples_per_second": 351.631, "eval_steps_per_second": 0.818, "step": 50 }, { "epoch": 5.95, "learning_rate": 4.888888888888889e-05, "loss": 3.3846, "step": 60 }, { "epoch": 5.95, "eval_accuracy": 0.4786046511627907, "eval_loss": 2.493035078048706, "eval_runtime": 5.9735, "eval_samples_per_second": 359.926, "eval_steps_per_second": 0.837, "step": 60 }, { "epoch": 6.95, "learning_rate": 4.7777777777777784e-05, "loss": 2.8319, "step": 70 }, { "epoch": 6.95, "eval_accuracy": 0.5623255813953488, "eval_loss": 2.0251505374908447, "eval_runtime": 5.9942, "eval_samples_per_second": 358.678, "eval_steps_per_second": 0.834, "step": 70 }, { "epoch": 7.95, "learning_rate": 4.666666666666667e-05, "loss": 2.4358, "step": 80 }, { "epoch": 7.95, "eval_accuracy": 0.6334883720930232, "eval_loss": 1.6915886402130127, "eval_runtime": 5.9886, "eval_samples_per_second": 359.016, "eval_steps_per_second": 0.835, "step": 80 }, { "epoch": 8.95, "learning_rate": 4.555555555555556e-05, "loss": 2.1433, "step": 90 }, { "epoch": 8.95, "eval_accuracy": 0.6637209302325582, "eval_loss": 1.456493616104126, "eval_runtime": 6.0018, "eval_samples_per_second": 358.223, "eval_steps_per_second": 0.833, "step": 90 }, { "epoch": 9.95, "learning_rate": 4.4444444444444447e-05, "loss": 1.9315, "step": 100 }, { "epoch": 9.95, "eval_accuracy": 0.7013953488372093, "eval_loss": 1.2676024436950684, "eval_runtime": 5.9984, "eval_samples_per_second": 358.426, "eval_steps_per_second": 0.834, "step": 100 }, { "epoch": 10.95, "learning_rate": 4.3333333333333334e-05, "loss": 1.7746, "step": 110 }, { "epoch": 10.95, "eval_accuracy": 0.7237209302325581, "eval_loss": 1.1530412435531616, "eval_runtime": 5.9931, "eval_samples_per_second": 358.749, "eval_steps_per_second": 0.834, "step": 110 }, { "epoch": 11.95, "learning_rate": 4.222222222222222e-05, "loss": 1.6467, "step": 120 }, { "epoch": 11.95, "eval_accuracy": 0.74, "eval_loss": 1.0685006380081177, "eval_runtime": 6.2755, "eval_samples_per_second": 342.602, "eval_steps_per_second": 0.797, "step": 120 }, { "epoch": 12.95, "learning_rate": 4.111111111111111e-05, "loss": 1.546, "step": 130 }, { "epoch": 12.95, "eval_accuracy": 0.7572093023255814, "eval_loss": 0.999431312084198, "eval_runtime": 5.987, "eval_samples_per_second": 359.114, "eval_steps_per_second": 0.835, "step": 130 }, { "epoch": 13.95, "learning_rate": 4e-05, "loss": 1.4734, "step": 140 }, { "epoch": 13.95, "eval_accuracy": 0.7660465116279069, "eval_loss": 0.946732223033905, "eval_runtime": 5.9853, "eval_samples_per_second": 359.213, "eval_steps_per_second": 0.835, "step": 140 }, { "epoch": 14.95, "learning_rate": 3.888888888888889e-05, "loss": 1.4163, "step": 150 }, { "epoch": 14.95, "eval_accuracy": 0.7753488372093024, "eval_loss": 0.9061232209205627, "eval_runtime": 5.9975, "eval_samples_per_second": 358.485, "eval_steps_per_second": 0.834, "step": 150 }, { "epoch": 15.95, "learning_rate": 3.777777777777778e-05, "loss": 1.3593, "step": 160 }, { "epoch": 15.95, "eval_accuracy": 0.7832558139534884, "eval_loss": 0.8717327117919922, "eval_runtime": 5.9972, "eval_samples_per_second": 358.502, "eval_steps_per_second": 0.834, "step": 160 }, { "epoch": 16.95, "learning_rate": 3.6666666666666666e-05, "loss": 1.3129, "step": 170 }, { "epoch": 16.95, "eval_accuracy": 0.7902325581395349, "eval_loss": 0.8438239097595215, "eval_runtime": 5.9855, "eval_samples_per_second": 359.203, "eval_steps_per_second": 0.835, "step": 170 }, { "epoch": 17.95, "learning_rate": 3.555555555555556e-05, "loss": 1.2843, "step": 180 }, { "epoch": 17.95, "eval_accuracy": 0.7986046511627907, "eval_loss": 0.8182681798934937, "eval_runtime": 5.985, "eval_samples_per_second": 359.23, "eval_steps_per_second": 0.835, "step": 180 }, { "epoch": 18.95, "learning_rate": 3.444444444444445e-05, "loss": 1.2527, "step": 190 }, { "epoch": 18.95, "eval_accuracy": 0.804186046511628, "eval_loss": 0.8000912070274353, "eval_runtime": 5.9829, "eval_samples_per_second": 359.357, "eval_steps_per_second": 0.836, "step": 190 }, { "epoch": 19.95, "learning_rate": 3.3333333333333335e-05, "loss": 1.2127, "step": 200 }, { "epoch": 19.95, "eval_accuracy": 0.804186046511628, "eval_loss": 0.7860467433929443, "eval_runtime": 5.9859, "eval_samples_per_second": 359.177, "eval_steps_per_second": 0.835, "step": 200 }, { "epoch": 20.95, "learning_rate": 3.222222222222223e-05, "loss": 1.1854, "step": 210 }, { "epoch": 20.95, "eval_accuracy": 0.8093023255813954, "eval_loss": 0.7675830125808716, "eval_runtime": 6.0054, "eval_samples_per_second": 358.008, "eval_steps_per_second": 0.833, "step": 210 }, { "epoch": 21.95, "learning_rate": 3.111111111111111e-05, "loss": 1.1574, "step": 220 }, { "epoch": 21.95, "eval_accuracy": 0.8079069767441861, "eval_loss": 0.7555623650550842, "eval_runtime": 5.9751, "eval_samples_per_second": 359.829, "eval_steps_per_second": 0.837, "step": 220 }, { "epoch": 22.95, "learning_rate": 3e-05, "loss": 1.1283, "step": 230 }, { "epoch": 22.95, "eval_accuracy": 0.8130232558139535, "eval_loss": 0.7396910190582275, "eval_runtime": 5.9779, "eval_samples_per_second": 359.658, "eval_steps_per_second": 0.836, "step": 230 }, { "epoch": 23.95, "learning_rate": 2.8888888888888888e-05, "loss": 1.1302, "step": 240 }, { "epoch": 23.95, "eval_accuracy": 0.8111627906976744, "eval_loss": 0.7319375276565552, "eval_runtime": 5.9855, "eval_samples_per_second": 359.199, "eval_steps_per_second": 0.835, "step": 240 }, { "epoch": 24.95, "learning_rate": 2.777777777777778e-05, "loss": 1.1032, "step": 250 }, { "epoch": 24.95, "eval_accuracy": 0.8176744186046512, "eval_loss": 0.718932569026947, "eval_runtime": 5.977, "eval_samples_per_second": 359.71, "eval_steps_per_second": 0.837, "step": 250 }, { "epoch": 25.95, "learning_rate": 2.6666666666666667e-05, "loss": 1.0891, "step": 260 }, { "epoch": 25.95, "eval_accuracy": 0.82, "eval_loss": 0.7135240435600281, "eval_runtime": 5.9814, "eval_samples_per_second": 359.449, "eval_steps_per_second": 0.836, "step": 260 }, { "epoch": 26.95, "learning_rate": 2.5555555555555554e-05, "loss": 1.0738, "step": 270 }, { "epoch": 26.95, "eval_accuracy": 0.8195348837209302, "eval_loss": 0.7008457779884338, "eval_runtime": 5.9894, "eval_samples_per_second": 358.966, "eval_steps_per_second": 0.835, "step": 270 }, { "epoch": 27.95, "learning_rate": 2.4444444444444445e-05, "loss": 1.0665, "step": 280 }, { "epoch": 27.95, "eval_accuracy": 0.8218604651162791, "eval_loss": 0.6941251754760742, "eval_runtime": 5.9997, "eval_samples_per_second": 358.352, "eval_steps_per_second": 0.833, "step": 280 }, { "epoch": 28.95, "learning_rate": 2.3333333333333336e-05, "loss": 1.0354, "step": 290 }, { "epoch": 28.95, "eval_accuracy": 0.8223255813953488, "eval_loss": 0.690199613571167, "eval_runtime": 5.992, "eval_samples_per_second": 358.814, "eval_steps_per_second": 0.834, "step": 290 }, { "epoch": 29.95, "learning_rate": 2.2222222222222223e-05, "loss": 1.0404, "step": 300 }, { "epoch": 29.95, "eval_accuracy": 0.8237209302325581, "eval_loss": 0.684846043586731, "eval_runtime": 5.9904, "eval_samples_per_second": 358.908, "eval_steps_per_second": 0.835, "step": 300 }, { "epoch": 30.95, "learning_rate": 2.111111111111111e-05, "loss": 1.0251, "step": 310 }, { "epoch": 30.95, "eval_accuracy": 0.8218604651162791, "eval_loss": 0.678667426109314, "eval_runtime": 5.9867, "eval_samples_per_second": 359.129, "eval_steps_per_second": 0.835, "step": 310 }, { "epoch": 31.95, "learning_rate": 2e-05, "loss": 1.0127, "step": 320 }, { "epoch": 31.95, "eval_accuracy": 0.8246511627906977, "eval_loss": 0.673900306224823, "eval_runtime": 5.9929, "eval_samples_per_second": 358.756, "eval_steps_per_second": 0.834, "step": 320 }, { "epoch": 32.95, "learning_rate": 1.888888888888889e-05, "loss": 1.0023, "step": 330 }, { "epoch": 32.95, "eval_accuracy": 0.8255813953488372, "eval_loss": 0.6712960004806519, "eval_runtime": 5.9994, "eval_samples_per_second": 358.368, "eval_steps_per_second": 0.833, "step": 330 }, { "epoch": 33.95, "learning_rate": 1.777777777777778e-05, "loss": 1.0012, "step": 340 }, { "epoch": 33.95, "eval_accuracy": 0.8246511627906977, "eval_loss": 0.6670580506324768, "eval_runtime": 6.0038, "eval_samples_per_second": 358.108, "eval_steps_per_second": 0.833, "step": 340 }, { "epoch": 34.95, "learning_rate": 1.6666666666666667e-05, "loss": 0.9835, "step": 350 }, { "epoch": 34.95, "eval_accuracy": 0.8251162790697675, "eval_loss": 0.6612110733985901, "eval_runtime": 5.9862, "eval_samples_per_second": 359.158, "eval_steps_per_second": 0.835, "step": 350 }, { "epoch": 35.95, "learning_rate": 1.5555555555555555e-05, "loss": 0.982, "step": 360 }, { "epoch": 35.95, "eval_accuracy": 0.8251162790697675, "eval_loss": 0.6587132215499878, "eval_runtime": 6.0051, "eval_samples_per_second": 358.031, "eval_steps_per_second": 0.833, "step": 360 }, { "epoch": 36.95, "learning_rate": 1.4444444444444444e-05, "loss": 0.9849, "step": 370 }, { "epoch": 36.95, "eval_accuracy": 0.8251162790697675, "eval_loss": 0.6563166975975037, "eval_runtime": 6.0073, "eval_samples_per_second": 357.899, "eval_steps_per_second": 0.832, "step": 370 }, { "epoch": 37.95, "learning_rate": 1.3333333333333333e-05, "loss": 0.9645, "step": 380 }, { "epoch": 37.95, "eval_accuracy": 0.8232558139534883, "eval_loss": 0.652894914150238, "eval_runtime": 6.0024, "eval_samples_per_second": 358.193, "eval_steps_per_second": 0.833, "step": 380 }, { "epoch": 38.95, "learning_rate": 1.2222222222222222e-05, "loss": 0.947, "step": 390 }, { "epoch": 38.95, "eval_accuracy": 0.8283720930232558, "eval_loss": 0.6512119770050049, "eval_runtime": 5.9854, "eval_samples_per_second": 359.208, "eval_steps_per_second": 0.835, "step": 390 }, { "epoch": 39.95, "learning_rate": 1.1111111111111112e-05, "loss": 0.9563, "step": 400 }, { "epoch": 39.95, "eval_accuracy": 0.8265116279069767, "eval_loss": 0.6485108733177185, "eval_runtime": 5.9994, "eval_samples_per_second": 358.371, "eval_steps_per_second": 0.833, "step": 400 }, { "epoch": 40.95, "learning_rate": 1e-05, "loss": 0.9619, "step": 410 }, { "epoch": 40.95, "eval_accuracy": 0.826046511627907, "eval_loss": 0.6457317471504211, "eval_runtime": 5.9978, "eval_samples_per_second": 358.466, "eval_steps_per_second": 0.834, "step": 410 }, { "epoch": 41.95, "learning_rate": 8.88888888888889e-06, "loss": 0.9399, "step": 420 }, { "epoch": 41.95, "eval_accuracy": 0.8283720930232558, "eval_loss": 0.6446535587310791, "eval_runtime": 5.9698, "eval_samples_per_second": 360.144, "eval_steps_per_second": 0.838, "step": 420 }, { "epoch": 42.95, "learning_rate": 7.777777777777777e-06, "loss": 0.9423, "step": 430 }, { "epoch": 42.95, "eval_accuracy": 0.8288372093023256, "eval_loss": 0.6421455144882202, "eval_runtime": 5.9718, "eval_samples_per_second": 360.027, "eval_steps_per_second": 0.837, "step": 430 }, { "epoch": 43.95, "learning_rate": 6.666666666666667e-06, "loss": 0.9482, "step": 440 }, { "epoch": 43.95, "eval_accuracy": 0.8283720930232558, "eval_loss": 0.6426512002944946, "eval_runtime": 6.012, "eval_samples_per_second": 357.619, "eval_steps_per_second": 0.832, "step": 440 }, { "epoch": 44.95, "learning_rate": 5.555555555555556e-06, "loss": 0.9315, "step": 450 }, { "epoch": 44.95, "eval_accuracy": 0.8297674418604651, "eval_loss": 0.6420783400535583, "eval_runtime": 6.2892, "eval_samples_per_second": 341.856, "eval_steps_per_second": 0.795, "step": 450 }, { "epoch": 45.95, "learning_rate": 4.444444444444445e-06, "loss": 0.9411, "step": 460 }, { "epoch": 45.95, "eval_accuracy": 0.8293023255813954, "eval_loss": 0.6401400566101074, "eval_runtime": 5.9778, "eval_samples_per_second": 359.664, "eval_steps_per_second": 0.836, "step": 460 }, { "epoch": 46.95, "learning_rate": 3.3333333333333333e-06, "loss": 0.9249, "step": 470 }, { "epoch": 46.95, "eval_accuracy": 0.8297674418604651, "eval_loss": 0.639735758304596, "eval_runtime": 5.9937, "eval_samples_per_second": 358.711, "eval_steps_per_second": 0.834, "step": 470 }, { "epoch": 47.95, "learning_rate": 2.2222222222222225e-06, "loss": 0.9361, "step": 480 }, { "epoch": 47.95, "eval_accuracy": 0.8293023255813954, "eval_loss": 0.6406731605529785, "eval_runtime": 5.9924, "eval_samples_per_second": 358.789, "eval_steps_per_second": 0.834, "step": 480 }, { "epoch": 48.95, "learning_rate": 1.1111111111111112e-06, "loss": 0.952, "step": 490 }, { "epoch": 48.95, "eval_accuracy": 0.8297674418604651, "eval_loss": 0.6389557719230652, "eval_runtime": 5.9868, "eval_samples_per_second": 359.122, "eval_steps_per_second": 0.835, "step": 490 }, { "epoch": 49.95, "learning_rate": 0.0, "loss": 0.9358, "step": 500 }, { "epoch": 49.95, "eval_accuracy": 0.8297674418604651, "eval_loss": 0.6392757296562195, "eval_runtime": 6.0005, "eval_samples_per_second": 358.304, "eval_steps_per_second": 0.833, "step": 500 }, { "epoch": 49.95, "step": 500, "total_flos": 1.0940562332139848e+19, "train_loss": 1.65314315032959, "train_runtime": 3746.3389, "train_samples_per_second": 286.947, "train_steps_per_second": 0.133 } ], "max_steps": 500, "num_train_epochs": 50, "total_flos": 1.0940562332139848e+19, "trial_name": null, "trial_params": null }