{ "best_metric": 0.9130434782608695, "best_model_checkpoint": "vit-base-patch16-224-ve-U13b-80RX3\\checkpoint-875", "epoch": 39.61165048543689, "eval_steps": 500, "global_step": 2040, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.19, "learning_rate": 4.647058823529412e-06, "loss": 1.3862, "step": 10 }, { "epoch": 0.39, "learning_rate": 9.294117647058824e-06, "loss": 1.3855, "step": 20 }, { "epoch": 0.58, "learning_rate": 1.3941176470588236e-05, "loss": 1.3804, "step": 30 }, { "epoch": 0.78, "learning_rate": 1.8588235294117647e-05, "loss": 1.3673, "step": 40 }, { "epoch": 0.97, "learning_rate": 2.3235294117647057e-05, "loss": 1.33, "step": 50 }, { "epoch": 0.99, "eval_accuracy": 0.34782608695652173, "eval_loss": 1.3133331537246704, "eval_runtime": 1.067, "eval_samples_per_second": 43.112, "eval_steps_per_second": 5.623, "step": 51 }, { "epoch": 1.17, "learning_rate": 2.7882352941176473e-05, "loss": 1.2898, "step": 60 }, { "epoch": 1.36, "learning_rate": 3.252941176470588e-05, "loss": 1.213, "step": 70 }, { "epoch": 1.55, "learning_rate": 3.7176470588235295e-05, "loss": 1.1789, "step": 80 }, { "epoch": 1.75, "learning_rate": 4.182352941176471e-05, "loss": 1.0432, "step": 90 }, { "epoch": 1.94, "learning_rate": 4.6470588235294114e-05, "loss": 1.0288, "step": 100 }, { "epoch": 2.0, "eval_accuracy": 0.5652173913043478, "eval_loss": 1.0045453310012817, "eval_runtime": 0.9508, "eval_samples_per_second": 48.379, "eval_steps_per_second": 6.31, "step": 103 }, { "epoch": 2.14, "learning_rate": 4.720433436532508e-05, "loss": 0.9473, "step": 110 }, { "epoch": 2.33, "learning_rate": 4.695975232198143e-05, "loss": 0.9482, "step": 120 }, { "epoch": 2.52, "learning_rate": 4.671517027863777e-05, "loss": 0.8323, "step": 130 }, { "epoch": 2.72, "learning_rate": 4.6470588235294114e-05, "loss": 0.7445, "step": 140 }, { "epoch": 2.91, "learning_rate": 4.6226006191950463e-05, "loss": 0.7322, "step": 150 }, { "epoch": 2.99, "eval_accuracy": 0.8043478260869565, "eval_loss": 0.7308945059776306, "eval_runtime": 0.826, "eval_samples_per_second": 55.688, "eval_steps_per_second": 7.264, "step": 154 }, { "epoch": 3.11, "learning_rate": 4.5981424148606813e-05, "loss": 0.6374, "step": 160 }, { "epoch": 3.3, "learning_rate": 4.5736842105263157e-05, "loss": 0.5266, "step": 170 }, { "epoch": 3.5, "learning_rate": 4.5492260061919506e-05, "loss": 0.5564, "step": 180 }, { "epoch": 3.69, "learning_rate": 4.524767801857585e-05, "loss": 0.537, "step": 190 }, { "epoch": 3.88, "learning_rate": 4.50030959752322e-05, "loss": 0.5476, "step": 200 }, { "epoch": 4.0, "eval_accuracy": 0.782608695652174, "eval_loss": 0.6316179633140564, "eval_runtime": 0.8212, "eval_samples_per_second": 56.017, "eval_steps_per_second": 7.307, "step": 206 }, { "epoch": 4.08, "learning_rate": 4.475851393188855e-05, "loss": 0.405, "step": 210 }, { "epoch": 4.27, "learning_rate": 4.451393188854489e-05, "loss": 0.3837, "step": 220 }, { "epoch": 4.47, "learning_rate": 4.4269349845201236e-05, "loss": 0.4488, "step": 230 }, { "epoch": 4.66, "learning_rate": 4.4024767801857586e-05, "loss": 0.3847, "step": 240 }, { "epoch": 4.85, "learning_rate": 4.3780185758513935e-05, "loss": 0.2863, "step": 250 }, { "epoch": 4.99, "eval_accuracy": 0.8043478260869565, "eval_loss": 0.5598096251487732, "eval_runtime": 0.8302, "eval_samples_per_second": 55.408, "eval_steps_per_second": 7.227, "step": 257 }, { "epoch": 5.05, "learning_rate": 4.353560371517028e-05, "loss": 0.3397, "step": 260 }, { "epoch": 5.24, "learning_rate": 4.329102167182663e-05, "loss": 0.2941, "step": 270 }, { "epoch": 5.44, "learning_rate": 4.304643962848297e-05, "loss": 0.288, "step": 280 }, { "epoch": 5.63, "learning_rate": 4.2801857585139315e-05, "loss": 0.3423, "step": 290 }, { "epoch": 5.83, "learning_rate": 4.2557275541795665e-05, "loss": 0.3149, "step": 300 }, { "epoch": 6.0, "eval_accuracy": 0.8478260869565217, "eval_loss": 0.5427684783935547, "eval_runtime": 0.8152, "eval_samples_per_second": 56.427, "eval_steps_per_second": 7.36, "step": 309 }, { "epoch": 6.02, "learning_rate": 4.2312693498452015e-05, "loss": 0.2982, "step": 310 }, { "epoch": 6.21, "learning_rate": 4.206811145510836e-05, "loss": 0.2823, "step": 320 }, { "epoch": 6.41, "learning_rate": 4.182352941176471e-05, "loss": 0.2447, "step": 330 }, { "epoch": 6.6, "learning_rate": 4.157894736842105e-05, "loss": 0.3232, "step": 340 }, { "epoch": 6.8, "learning_rate": 4.13343653250774e-05, "loss": 0.3179, "step": 350 }, { "epoch": 6.99, "learning_rate": 4.1089783281733744e-05, "loss": 0.1489, "step": 360 }, { "epoch": 6.99, "eval_accuracy": 0.8695652173913043, "eval_loss": 0.5149533152580261, "eval_runtime": 0.7717, "eval_samples_per_second": 59.609, "eval_steps_per_second": 7.775, "step": 360 }, { "epoch": 7.18, "learning_rate": 4.0845201238390094e-05, "loss": 0.1946, "step": 370 }, { "epoch": 7.38, "learning_rate": 4.060061919504644e-05, "loss": 0.1965, "step": 380 }, { "epoch": 7.57, "learning_rate": 4.035603715170279e-05, "loss": 0.2141, "step": 390 }, { "epoch": 7.77, "learning_rate": 4.011145510835914e-05, "loss": 0.3889, "step": 400 }, { "epoch": 7.96, "learning_rate": 3.986687306501548e-05, "loss": 0.1134, "step": 410 }, { "epoch": 8.0, "eval_accuracy": 0.8043478260869565, "eval_loss": 0.45851370692253113, "eval_runtime": 0.8807, "eval_samples_per_second": 52.231, "eval_steps_per_second": 6.813, "step": 412 }, { "epoch": 8.16, "learning_rate": 3.962229102167183e-05, "loss": 0.1778, "step": 420 }, { "epoch": 8.35, "learning_rate": 3.937770897832817e-05, "loss": 0.1464, "step": 430 }, { "epoch": 8.54, "learning_rate": 3.9133126934984516e-05, "loss": 0.1724, "step": 440 }, { "epoch": 8.74, "learning_rate": 3.8888544891640866e-05, "loss": 0.1663, "step": 450 }, { "epoch": 8.93, "learning_rate": 3.8643962848297216e-05, "loss": 0.1613, "step": 460 }, { "epoch": 8.99, "eval_accuracy": 0.8478260869565217, "eval_loss": 0.6283782720565796, "eval_runtime": 0.8873, "eval_samples_per_second": 51.841, "eval_steps_per_second": 6.762, "step": 463 }, { "epoch": 9.13, "learning_rate": 3.839938080495356e-05, "loss": 0.1331, "step": 470 }, { "epoch": 9.32, "learning_rate": 3.815479876160991e-05, "loss": 0.1595, "step": 480 }, { "epoch": 9.51, "learning_rate": 3.791021671826625e-05, "loss": 0.2071, "step": 490 }, { "epoch": 9.71, "learning_rate": 3.76656346749226e-05, "loss": 0.11, "step": 500 }, { "epoch": 9.9, "learning_rate": 3.7421052631578945e-05, "loss": 0.1855, "step": 510 }, { "epoch": 10.0, "eval_accuracy": 0.8478260869565217, "eval_loss": 0.5985141396522522, "eval_runtime": 0.8537, "eval_samples_per_second": 53.882, "eval_steps_per_second": 7.028, "step": 515 }, { "epoch": 10.1, "learning_rate": 3.7176470588235295e-05, "loss": 0.0823, "step": 520 }, { "epoch": 10.29, "learning_rate": 3.693188854489164e-05, "loss": 0.1198, "step": 530 }, { "epoch": 10.49, "learning_rate": 3.668730650154799e-05, "loss": 0.1222, "step": 540 }, { "epoch": 10.68, "learning_rate": 3.644272445820434e-05, "loss": 0.1218, "step": 550 }, { "epoch": 10.87, "learning_rate": 3.619814241486068e-05, "loss": 0.1908, "step": 560 }, { "epoch": 10.99, "eval_accuracy": 0.7391304347826086, "eval_loss": 1.0336034297943115, "eval_runtime": 0.844, "eval_samples_per_second": 54.503, "eval_steps_per_second": 7.109, "step": 566 }, { "epoch": 11.07, "learning_rate": 3.5953560371517024e-05, "loss": 0.201, "step": 570 }, { "epoch": 11.26, "learning_rate": 3.5708978328173374e-05, "loss": 0.1526, "step": 580 }, { "epoch": 11.46, "learning_rate": 3.5464396284829724e-05, "loss": 0.0184, "step": 590 }, { "epoch": 11.65, "learning_rate": 3.521981424148607e-05, "loss": 0.0832, "step": 600 }, { "epoch": 11.84, "learning_rate": 3.497523219814242e-05, "loss": 0.2293, "step": 610 }, { "epoch": 12.0, "eval_accuracy": 0.8043478260869565, "eval_loss": 0.7745507955551147, "eval_runtime": 0.8319, "eval_samples_per_second": 55.297, "eval_steps_per_second": 7.213, "step": 618 }, { "epoch": 12.04, "learning_rate": 3.473065015479876e-05, "loss": 0.1271, "step": 620 }, { "epoch": 12.23, "learning_rate": 3.448606811145511e-05, "loss": 0.1235, "step": 630 }, { "epoch": 12.43, "learning_rate": 3.424148606811146e-05, "loss": 0.1258, "step": 640 }, { "epoch": 12.62, "learning_rate": 3.39969040247678e-05, "loss": 0.0889, "step": 650 }, { "epoch": 12.82, "learning_rate": 3.3752321981424146e-05, "loss": 0.1414, "step": 660 }, { "epoch": 12.99, "eval_accuracy": 0.8260869565217391, "eval_loss": 0.6517351865768433, "eval_runtime": 0.9004, "eval_samples_per_second": 51.091, "eval_steps_per_second": 6.664, "step": 669 }, { "epoch": 13.01, "learning_rate": 3.3507739938080496e-05, "loss": 0.0693, "step": 670 }, { "epoch": 13.2, "learning_rate": 3.326315789473684e-05, "loss": 0.0435, "step": 680 }, { "epoch": 13.4, "learning_rate": 3.301857585139319e-05, "loss": 0.2878, "step": 690 }, { "epoch": 13.59, "learning_rate": 3.277399380804954e-05, "loss": 0.1721, "step": 700 }, { "epoch": 13.79, "learning_rate": 3.252941176470588e-05, "loss": 0.1421, "step": 710 }, { "epoch": 13.98, "learning_rate": 3.2284829721362225e-05, "loss": 0.0877, "step": 720 }, { "epoch": 14.0, "eval_accuracy": 0.8260869565217391, "eval_loss": 0.563931941986084, "eval_runtime": 0.8391, "eval_samples_per_second": 54.824, "eval_steps_per_second": 7.151, "step": 721 }, { "epoch": 14.17, "learning_rate": 3.2040247678018575e-05, "loss": 0.0886, "step": 730 }, { "epoch": 14.37, "learning_rate": 3.1795665634674925e-05, "loss": 0.1254, "step": 740 }, { "epoch": 14.56, "learning_rate": 3.155108359133127e-05, "loss": 0.0781, "step": 750 }, { "epoch": 14.76, "learning_rate": 3.130650154798762e-05, "loss": 0.1242, "step": 760 }, { "epoch": 14.95, "learning_rate": 3.106191950464396e-05, "loss": 0.1302, "step": 770 }, { "epoch": 14.99, "eval_accuracy": 0.8260869565217391, "eval_loss": 0.7687188982963562, "eval_runtime": 0.8747, "eval_samples_per_second": 52.587, "eval_steps_per_second": 6.859, "step": 772 }, { "epoch": 15.15, "learning_rate": 3.0817337461300304e-05, "loss": 0.0822, "step": 780 }, { "epoch": 15.34, "learning_rate": 3.057275541795666e-05, "loss": 0.0589, "step": 790 }, { "epoch": 15.53, "learning_rate": 3.0328173374613004e-05, "loss": 0.0808, "step": 800 }, { "epoch": 15.73, "learning_rate": 3.008359133126935e-05, "loss": 0.0589, "step": 810 }, { "epoch": 15.92, "learning_rate": 2.9839009287925697e-05, "loss": 0.047, "step": 820 }, { "epoch": 16.0, "eval_accuracy": 0.8695652173913043, "eval_loss": 0.6772664189338684, "eval_runtime": 0.827, "eval_samples_per_second": 55.621, "eval_steps_per_second": 7.255, "step": 824 }, { "epoch": 16.12, "learning_rate": 2.959442724458204e-05, "loss": 0.1881, "step": 830 }, { "epoch": 16.31, "learning_rate": 2.9349845201238394e-05, "loss": 0.0418, "step": 840 }, { "epoch": 16.5, "learning_rate": 2.9105263157894737e-05, "loss": 0.0664, "step": 850 }, { "epoch": 16.7, "learning_rate": 2.8860681114551083e-05, "loss": 0.1497, "step": 860 }, { "epoch": 16.89, "learning_rate": 2.861609907120743e-05, "loss": 0.1045, "step": 870 }, { "epoch": 16.99, "eval_accuracy": 0.9130434782608695, "eval_loss": 0.4344385862350464, "eval_runtime": 0.8479, "eval_samples_per_second": 54.255, "eval_steps_per_second": 7.077, "step": 875 }, { "epoch": 17.09, "learning_rate": 2.8371517027863776e-05, "loss": 0.1355, "step": 880 }, { "epoch": 17.28, "learning_rate": 2.8126934984520126e-05, "loss": 0.1205, "step": 890 }, { "epoch": 17.48, "learning_rate": 2.7882352941176473e-05, "loss": 0.0984, "step": 900 }, { "epoch": 17.67, "learning_rate": 2.7637770897832816e-05, "loss": 0.1224, "step": 910 }, { "epoch": 17.86, "learning_rate": 2.7393188854489163e-05, "loss": 0.0751, "step": 920 }, { "epoch": 18.0, "eval_accuracy": 0.7391304347826086, "eval_loss": 1.0160393714904785, "eval_runtime": 0.8692, "eval_samples_per_second": 52.922, "eval_steps_per_second": 6.903, "step": 927 }, { "epoch": 18.06, "learning_rate": 2.714860681114551e-05, "loss": 0.0833, "step": 930 }, { "epoch": 18.25, "learning_rate": 2.690402476780186e-05, "loss": 0.0335, "step": 940 }, { "epoch": 18.45, "learning_rate": 2.6659442724458205e-05, "loss": 0.0702, "step": 950 }, { "epoch": 18.64, "learning_rate": 2.6414860681114552e-05, "loss": 0.0391, "step": 960 }, { "epoch": 18.83, "learning_rate": 2.6170278637770895e-05, "loss": 0.1141, "step": 970 }, { "epoch": 18.99, "eval_accuracy": 0.8695652173913043, "eval_loss": 0.6642520427703857, "eval_runtime": 0.9401, "eval_samples_per_second": 48.929, "eval_steps_per_second": 6.382, "step": 978 }, { "epoch": 19.03, "learning_rate": 2.592569659442724e-05, "loss": 0.0581, "step": 980 }, { "epoch": 19.22, "learning_rate": 2.568111455108359e-05, "loss": 0.0739, "step": 990 }, { "epoch": 19.42, "learning_rate": 2.5436532507739938e-05, "loss": 0.0566, "step": 1000 }, { "epoch": 19.61, "learning_rate": 2.5191950464396285e-05, "loss": 0.2267, "step": 1010 }, { "epoch": 19.81, "learning_rate": 2.494736842105263e-05, "loss": 0.0873, "step": 1020 }, { "epoch": 20.0, "learning_rate": 2.4702786377708978e-05, "loss": 0.1756, "step": 1030 }, { "epoch": 20.0, "eval_accuracy": 0.8913043478260869, "eval_loss": 0.5581848621368408, "eval_runtime": 0.8076, "eval_samples_per_second": 56.958, "eval_steps_per_second": 7.429, "step": 1030 }, { "epoch": 20.19, "learning_rate": 2.4458204334365328e-05, "loss": 0.0867, "step": 1040 }, { "epoch": 20.39, "learning_rate": 2.4213622291021674e-05, "loss": 0.0911, "step": 1050 }, { "epoch": 20.58, "learning_rate": 2.3969040247678017e-05, "loss": 0.1337, "step": 1060 }, { "epoch": 20.78, "learning_rate": 2.3724458204334364e-05, "loss": 0.1217, "step": 1070 }, { "epoch": 20.97, "learning_rate": 2.3479876160990714e-05, "loss": 0.1212, "step": 1080 }, { "epoch": 20.99, "eval_accuracy": 0.8913043478260869, "eval_loss": 0.5640941858291626, "eval_runtime": 0.8177, "eval_samples_per_second": 56.253, "eval_steps_per_second": 7.337, "step": 1081 }, { "epoch": 21.17, "learning_rate": 2.3235294117647057e-05, "loss": 0.1123, "step": 1090 }, { "epoch": 21.36, "learning_rate": 2.2990712074303407e-05, "loss": 0.063, "step": 1100 }, { "epoch": 21.55, "learning_rate": 2.2746130030959753e-05, "loss": 0.0782, "step": 1110 }, { "epoch": 21.75, "learning_rate": 2.25015479876161e-05, "loss": 0.0588, "step": 1120 }, { "epoch": 21.94, "learning_rate": 2.2256965944272446e-05, "loss": 0.0903, "step": 1130 }, { "epoch": 22.0, "eval_accuracy": 0.8260869565217391, "eval_loss": 0.698965847492218, "eval_runtime": 0.827, "eval_samples_per_second": 55.62, "eval_steps_per_second": 7.255, "step": 1133 }, { "epoch": 22.14, "learning_rate": 2.2012383900928793e-05, "loss": 0.0266, "step": 1140 }, { "epoch": 22.33, "learning_rate": 2.176780185758514e-05, "loss": 0.0578, "step": 1150 }, { "epoch": 22.52, "learning_rate": 2.1523219814241486e-05, "loss": 0.0606, "step": 1160 }, { "epoch": 22.72, "learning_rate": 2.1278637770897832e-05, "loss": 0.0809, "step": 1170 }, { "epoch": 22.91, "learning_rate": 2.103405572755418e-05, "loss": 0.0693, "step": 1180 }, { "epoch": 22.99, "eval_accuracy": 0.8913043478260869, "eval_loss": 0.5548133254051208, "eval_runtime": 0.8892, "eval_samples_per_second": 51.732, "eval_steps_per_second": 6.748, "step": 1184 }, { "epoch": 23.11, "learning_rate": 2.0789473684210525e-05, "loss": 0.0246, "step": 1190 }, { "epoch": 23.3, "learning_rate": 2.0544891640866872e-05, "loss": 0.0926, "step": 1200 }, { "epoch": 23.5, "learning_rate": 2.030030959752322e-05, "loss": 0.0344, "step": 1210 }, { "epoch": 23.69, "learning_rate": 2.005572755417957e-05, "loss": 0.0526, "step": 1220 }, { "epoch": 23.88, "learning_rate": 1.9811145510835915e-05, "loss": 0.0048, "step": 1230 }, { "epoch": 24.0, "eval_accuracy": 0.8478260869565217, "eval_loss": 0.6957914233207703, "eval_runtime": 0.8992, "eval_samples_per_second": 51.155, "eval_steps_per_second": 6.672, "step": 1236 }, { "epoch": 24.08, "learning_rate": 1.9566563467492258e-05, "loss": 0.0412, "step": 1240 }, { "epoch": 24.27, "learning_rate": 1.9321981424148608e-05, "loss": 0.1009, "step": 1250 }, { "epoch": 24.47, "learning_rate": 1.9077399380804954e-05, "loss": 0.1074, "step": 1260 }, { "epoch": 24.66, "learning_rate": 1.88328173374613e-05, "loss": 0.1063, "step": 1270 }, { "epoch": 24.85, "learning_rate": 1.8588235294117647e-05, "loss": 0.0785, "step": 1280 }, { "epoch": 24.99, "eval_accuracy": 0.8043478260869565, "eval_loss": 0.7886456251144409, "eval_runtime": 0.7742, "eval_samples_per_second": 59.417, "eval_steps_per_second": 7.75, "step": 1287 }, { "epoch": 25.05, "learning_rate": 1.8343653250773994e-05, "loss": 0.0349, "step": 1290 }, { "epoch": 25.24, "learning_rate": 1.809907120743034e-05, "loss": 0.1957, "step": 1300 }, { "epoch": 25.44, "learning_rate": 1.7854489164086687e-05, "loss": 0.0344, "step": 1310 }, { "epoch": 25.63, "learning_rate": 1.7609907120743034e-05, "loss": 0.1233, "step": 1320 }, { "epoch": 25.83, "learning_rate": 1.736532507739938e-05, "loss": 0.0373, "step": 1330 }, { "epoch": 26.0, "eval_accuracy": 0.8478260869565217, "eval_loss": 0.6345128417015076, "eval_runtime": 0.8942, "eval_samples_per_second": 51.442, "eval_steps_per_second": 6.71, "step": 1339 }, { "epoch": 26.02, "learning_rate": 1.712074303405573e-05, "loss": 0.0051, "step": 1340 }, { "epoch": 26.21, "learning_rate": 1.6876160990712073e-05, "loss": 0.1126, "step": 1350 }, { "epoch": 26.41, "learning_rate": 1.663157894736842e-05, "loss": 0.0579, "step": 1360 }, { "epoch": 26.6, "learning_rate": 1.638699690402477e-05, "loss": 0.0228, "step": 1370 }, { "epoch": 26.8, "learning_rate": 1.6142414860681113e-05, "loss": 0.0734, "step": 1380 }, { "epoch": 26.99, "learning_rate": 1.5897832817337463e-05, "loss": 0.0763, "step": 1390 }, { "epoch": 26.99, "eval_accuracy": 0.8695652173913043, "eval_loss": 0.6829540729522705, "eval_runtime": 0.8532, "eval_samples_per_second": 53.914, "eval_steps_per_second": 7.032, "step": 1390 }, { "epoch": 27.18, "learning_rate": 1.565325077399381e-05, "loss": 0.0457, "step": 1400 }, { "epoch": 27.38, "learning_rate": 1.5408668730650152e-05, "loss": 0.1175, "step": 1410 }, { "epoch": 27.57, "learning_rate": 1.5164086687306502e-05, "loss": 0.0137, "step": 1420 }, { "epoch": 27.77, "learning_rate": 1.4919504643962849e-05, "loss": 0.0147, "step": 1430 }, { "epoch": 27.96, "learning_rate": 1.4674922600619197e-05, "loss": 0.0621, "step": 1440 }, { "epoch": 28.0, "eval_accuracy": 0.8478260869565217, "eval_loss": 0.7293574213981628, "eval_runtime": 0.9042, "eval_samples_per_second": 50.873, "eval_steps_per_second": 6.636, "step": 1442 }, { "epoch": 28.16, "learning_rate": 1.4430340557275542e-05, "loss": 0.071, "step": 1450 }, { "epoch": 28.35, "learning_rate": 1.4185758513931888e-05, "loss": 0.0818, "step": 1460 }, { "epoch": 28.54, "learning_rate": 1.3941176470588236e-05, "loss": 0.009, "step": 1470 }, { "epoch": 28.74, "learning_rate": 1.3696594427244581e-05, "loss": 0.0417, "step": 1480 }, { "epoch": 28.93, "learning_rate": 1.345201238390093e-05, "loss": 0.0367, "step": 1490 }, { "epoch": 28.99, "eval_accuracy": 0.8695652173913043, "eval_loss": 0.6636242866516113, "eval_runtime": 0.8167, "eval_samples_per_second": 56.324, "eval_steps_per_second": 7.347, "step": 1493 }, { "epoch": 29.13, "learning_rate": 1.3207430340557276e-05, "loss": 0.0583, "step": 1500 }, { "epoch": 29.32, "learning_rate": 1.296284829721362e-05, "loss": 0.0745, "step": 1510 }, { "epoch": 29.51, "learning_rate": 1.2718266253869969e-05, "loss": 0.0502, "step": 1520 }, { "epoch": 29.71, "learning_rate": 1.2473684210526316e-05, "loss": 0.0385, "step": 1530 }, { "epoch": 29.9, "learning_rate": 1.2229102167182664e-05, "loss": 0.0124, "step": 1540 }, { "epoch": 30.0, "eval_accuracy": 0.8478260869565217, "eval_loss": 0.8030955195426941, "eval_runtime": 0.7682, "eval_samples_per_second": 59.881, "eval_steps_per_second": 7.811, "step": 1545 }, { "epoch": 30.1, "learning_rate": 1.1984520123839009e-05, "loss": 0.0158, "step": 1550 }, { "epoch": 30.29, "learning_rate": 1.1739938080495357e-05, "loss": 0.0406, "step": 1560 }, { "epoch": 30.49, "learning_rate": 1.1495356037151703e-05, "loss": 0.1925, "step": 1570 }, { "epoch": 30.68, "learning_rate": 1.125077399380805e-05, "loss": 0.0512, "step": 1580 }, { "epoch": 30.87, "learning_rate": 1.1006191950464396e-05, "loss": 0.0759, "step": 1590 }, { "epoch": 30.99, "eval_accuracy": 0.8695652173913043, "eval_loss": 0.7075828313827515, "eval_runtime": 0.8152, "eval_samples_per_second": 56.429, "eval_steps_per_second": 7.36, "step": 1596 }, { "epoch": 31.07, "learning_rate": 1.0761609907120743e-05, "loss": 0.0585, "step": 1600 }, { "epoch": 31.26, "learning_rate": 1.051702786377709e-05, "loss": 0.0113, "step": 1610 }, { "epoch": 31.46, "learning_rate": 1.0272445820433436e-05, "loss": 0.0053, "step": 1620 }, { "epoch": 31.65, "learning_rate": 1.0027863777089784e-05, "loss": 0.0227, "step": 1630 }, { "epoch": 31.84, "learning_rate": 9.783281733746129e-06, "loss": 0.0786, "step": 1640 }, { "epoch": 32.0, "eval_accuracy": 0.8260869565217391, "eval_loss": 0.8023592829704285, "eval_runtime": 0.8222, "eval_samples_per_second": 55.947, "eval_steps_per_second": 7.297, "step": 1648 }, { "epoch": 32.04, "learning_rate": 9.538699690402477e-06, "loss": 0.004, "step": 1650 }, { "epoch": 32.23, "learning_rate": 9.294117647058824e-06, "loss": 0.0131, "step": 1660 }, { "epoch": 32.43, "learning_rate": 9.04953560371517e-06, "loss": 0.0616, "step": 1670 }, { "epoch": 32.62, "learning_rate": 8.804953560371517e-06, "loss": 0.0058, "step": 1680 }, { "epoch": 32.82, "learning_rate": 8.560371517027865e-06, "loss": 0.0487, "step": 1690 }, { "epoch": 32.99, "eval_accuracy": 0.8695652173913043, "eval_loss": 0.7926502227783203, "eval_runtime": 0.8442, "eval_samples_per_second": 54.49, "eval_steps_per_second": 7.107, "step": 1699 }, { "epoch": 33.01, "learning_rate": 8.31578947368421e-06, "loss": 0.1104, "step": 1700 }, { "epoch": 33.2, "learning_rate": 8.071207430340556e-06, "loss": 0.0528, "step": 1710 }, { "epoch": 33.4, "learning_rate": 7.826625386996905e-06, "loss": 0.056, "step": 1720 }, { "epoch": 33.59, "learning_rate": 7.582043343653251e-06, "loss": 0.0205, "step": 1730 }, { "epoch": 33.79, "learning_rate": 7.3374613003095984e-06, "loss": 0.0431, "step": 1740 }, { "epoch": 33.98, "learning_rate": 7.092879256965944e-06, "loss": 0.0664, "step": 1750 }, { "epoch": 34.0, "eval_accuracy": 0.8260869565217391, "eval_loss": 0.9607385396957397, "eval_runtime": 0.8292, "eval_samples_per_second": 55.474, "eval_steps_per_second": 7.236, "step": 1751 }, { "epoch": 34.17, "learning_rate": 6.848297213622291e-06, "loss": 0.0596, "step": 1760 }, { "epoch": 34.37, "learning_rate": 6.603715170278638e-06, "loss": 0.0592, "step": 1770 }, { "epoch": 34.56, "learning_rate": 6.3591331269349845e-06, "loss": 0.0469, "step": 1780 }, { "epoch": 34.76, "learning_rate": 6.114551083591332e-06, "loss": 0.0376, "step": 1790 }, { "epoch": 34.95, "learning_rate": 5.869969040247678e-06, "loss": 0.0054, "step": 1800 }, { "epoch": 34.99, "eval_accuracy": 0.8260869565217391, "eval_loss": 0.9701578617095947, "eval_runtime": 0.8192, "eval_samples_per_second": 56.152, "eval_steps_per_second": 7.324, "step": 1802 }, { "epoch": 35.15, "learning_rate": 5.625386996904025e-06, "loss": 0.0512, "step": 1810 }, { "epoch": 35.34, "learning_rate": 5.3808049535603715e-06, "loss": 0.0149, "step": 1820 }, { "epoch": 35.53, "learning_rate": 5.136222910216718e-06, "loss": 0.0083, "step": 1830 }, { "epoch": 35.73, "learning_rate": 4.8916408668730645e-06, "loss": 0.0085, "step": 1840 }, { "epoch": 35.92, "learning_rate": 4.647058823529412e-06, "loss": 0.0277, "step": 1850 }, { "epoch": 36.0, "eval_accuracy": 0.8260869565217391, "eval_loss": 0.8351179957389832, "eval_runtime": 0.8282, "eval_samples_per_second": 55.543, "eval_steps_per_second": 7.245, "step": 1854 }, { "epoch": 36.12, "learning_rate": 4.402476780185758e-06, "loss": 0.0365, "step": 1860 }, { "epoch": 36.31, "learning_rate": 4.157894736842105e-06, "loss": 0.0119, "step": 1870 }, { "epoch": 36.5, "learning_rate": 3.913312693498452e-06, "loss": 0.0608, "step": 1880 }, { "epoch": 36.7, "learning_rate": 3.6687306501547992e-06, "loss": 0.135, "step": 1890 }, { "epoch": 36.89, "learning_rate": 3.4241486068111453e-06, "loss": 0.0025, "step": 1900 }, { "epoch": 36.99, "eval_accuracy": 0.8260869565217391, "eval_loss": 0.9318453073501587, "eval_runtime": 1.1833, "eval_samples_per_second": 38.875, "eval_steps_per_second": 5.071, "step": 1905 }, { "epoch": 37.09, "learning_rate": 3.1795665634674923e-06, "loss": 0.027, "step": 1910 }, { "epoch": 37.28, "learning_rate": 2.934984520123839e-06, "loss": 0.0794, "step": 1920 }, { "epoch": 37.48, "learning_rate": 2.6904024767801857e-06, "loss": 0.0461, "step": 1930 }, { "epoch": 37.67, "learning_rate": 2.4458204334365322e-06, "loss": 0.0464, "step": 1940 }, { "epoch": 37.86, "learning_rate": 2.201238390092879e-06, "loss": 0.0188, "step": 1950 }, { "epoch": 38.0, "eval_accuracy": 0.8478260869565217, "eval_loss": 0.899507462978363, "eval_runtime": 0.8587, "eval_samples_per_second": 53.568, "eval_steps_per_second": 6.987, "step": 1957 }, { "epoch": 38.06, "learning_rate": 1.956656346749226e-06, "loss": 0.0413, "step": 1960 }, { "epoch": 38.25, "learning_rate": 1.7120743034055727e-06, "loss": 0.0481, "step": 1970 }, { "epoch": 38.45, "learning_rate": 1.4674922600619196e-06, "loss": 0.0494, "step": 1980 }, { "epoch": 38.64, "learning_rate": 1.2229102167182661e-06, "loss": 0.0201, "step": 1990 }, { "epoch": 38.83, "learning_rate": 9.78328173374613e-07, "loss": 0.0385, "step": 2000 }, { "epoch": 38.99, "eval_accuracy": 0.8478260869565217, "eval_loss": 0.8927894830703735, "eval_runtime": 0.8062, "eval_samples_per_second": 57.057, "eval_steps_per_second": 7.442, "step": 2008 }, { "epoch": 39.03, "learning_rate": 7.337461300309598e-07, "loss": 0.0231, "step": 2010 }, { "epoch": 39.22, "learning_rate": 4.891640866873065e-07, "loss": 0.0227, "step": 2020 }, { "epoch": 39.42, "learning_rate": 2.4458204334365327e-07, "loss": 0.1008, "step": 2030 }, { "epoch": 39.61, "learning_rate": 0.0, "loss": 0.0474, "step": 2040 }, { "epoch": 39.61, "eval_accuracy": 0.8478260869565217, "eval_loss": 0.8863323926925659, "eval_runtime": 1.0372, "eval_samples_per_second": 44.349, "eval_steps_per_second": 5.785, "step": 2040 }, { "epoch": 39.61, "step": 2040, "total_flos": 2.5142726714989363e+18, "train_loss": 0.19007185283390915, "train_runtime": 695.8956, "train_samples_per_second": 47.076, "train_steps_per_second": 2.931 } ], "logging_steps": 10, "max_steps": 2040, "num_input_tokens_seen": 0, "num_train_epochs": 40, "save_steps": 500, "total_flos": 2.5142726714989363e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }