{ "best_metric": 0.7344809133071709, "best_model_checkpoint": "swin-tiny-patch4-window7-224-finetuned-eurosat/checkpoint-1678", "epoch": 29.772151898734176, "global_step": 2940, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1, "learning_rate": 1.7006802721088438e-06, "loss": 0.7145, "step": 10 }, { "epoch": 0.2, "learning_rate": 3.4013605442176877e-06, "loss": 0.6942, "step": 20 }, { "epoch": 0.3, "learning_rate": 5.102040816326531e-06, "loss": 0.6747, "step": 30 }, { "epoch": 0.41, "learning_rate": 6.802721088435375e-06, "loss": 0.6626, "step": 40 }, { "epoch": 0.51, "learning_rate": 8.503401360544217e-06, "loss": 0.645, "step": 50 }, { "epoch": 0.61, "learning_rate": 1.0204081632653061e-05, "loss": 0.643, "step": 60 }, { "epoch": 0.71, "learning_rate": 1.1904761904761905e-05, "loss": 0.6353, "step": 70 }, { "epoch": 0.81, "learning_rate": 1.360544217687075e-05, "loss": 0.6242, "step": 80 }, { "epoch": 0.91, "learning_rate": 1.5306122448979594e-05, "loss": 0.6271, "step": 90 }, { "epoch": 0.99, "eval_accuracy": 0.6925615412058509, "eval_loss": 0.6034993529319763, "eval_runtime": 291.7633, "eval_samples_per_second": 38.428, "eval_steps_per_second": 0.151, "step": 98 }, { "epoch": 1.01, "learning_rate": 1.7006802721088435e-05, "loss": 0.625, "step": 100 }, { "epoch": 1.11, "learning_rate": 1.8707482993197282e-05, "loss": 0.6206, "step": 110 }, { "epoch": 1.22, "learning_rate": 2.0408163265306123e-05, "loss": 0.6208, "step": 120 }, { "epoch": 1.32, "learning_rate": 2.2108843537414966e-05, "loss": 0.625, "step": 130 }, { "epoch": 1.42, "learning_rate": 2.380952380952381e-05, "loss": 0.6198, "step": 140 }, { "epoch": 1.52, "learning_rate": 2.5510204081632654e-05, "loss": 0.613, "step": 150 }, { "epoch": 1.62, "learning_rate": 2.72108843537415e-05, "loss": 0.6242, "step": 160 }, { "epoch": 1.72, "learning_rate": 2.891156462585034e-05, "loss": 0.616, "step": 170 }, { "epoch": 1.82, "learning_rate": 3.061224489795919e-05, "loss": 0.6145, "step": 180 }, { "epoch": 1.92, "learning_rate": 3.231292517006803e-05, "loss": 0.6156, "step": 190 }, { "epoch": 1.99, "eval_accuracy": 0.7005886550124866, "eval_loss": 0.5843892097473145, "eval_runtime": 301.4707, "eval_samples_per_second": 37.191, "eval_steps_per_second": 0.146, "step": 197 }, { "epoch": 2.03, "learning_rate": 3.401360544217687e-05, "loss": 0.6069, "step": 200 }, { "epoch": 2.13, "learning_rate": 3.571428571428572e-05, "loss": 0.6087, "step": 210 }, { "epoch": 2.23, "learning_rate": 3.7414965986394564e-05, "loss": 0.6116, "step": 220 }, { "epoch": 2.33, "learning_rate": 3.9115646258503405e-05, "loss": 0.6085, "step": 230 }, { "epoch": 2.43, "learning_rate": 4.0816326530612245e-05, "loss": 0.6049, "step": 240 }, { "epoch": 2.53, "learning_rate": 4.2517006802721085e-05, "loss": 0.6013, "step": 250 }, { "epoch": 2.63, "learning_rate": 4.421768707482993e-05, "loss": 0.6009, "step": 260 }, { "epoch": 2.73, "learning_rate": 4.591836734693878e-05, "loss": 0.608, "step": 270 }, { "epoch": 2.84, "learning_rate": 4.761904761904762e-05, "loss": 0.6145, "step": 280 }, { "epoch": 2.94, "learning_rate": 4.931972789115647e-05, "loss": 0.6148, "step": 290 }, { "epoch": 3.0, "eval_accuracy": 0.7103995718872637, "eval_loss": 0.5758489966392517, "eval_runtime": 290.2528, "eval_samples_per_second": 38.628, "eval_steps_per_second": 0.152, "step": 296 }, { "epoch": 3.04, "learning_rate": 4.9886621315192745e-05, "loss": 0.5965, "step": 300 }, { "epoch": 3.14, "learning_rate": 4.969765684051398e-05, "loss": 0.601, "step": 310 }, { "epoch": 3.24, "learning_rate": 4.9508692365835225e-05, "loss": 0.6087, "step": 320 }, { "epoch": 3.34, "learning_rate": 4.931972789115647e-05, "loss": 0.6063, "step": 330 }, { "epoch": 3.44, "learning_rate": 4.9130763416477704e-05, "loss": 0.6034, "step": 340 }, { "epoch": 3.54, "learning_rate": 4.894179894179895e-05, "loss": 0.5962, "step": 350 }, { "epoch": 3.65, "learning_rate": 4.875283446712018e-05, "loss": 0.605, "step": 360 }, { "epoch": 3.75, "learning_rate": 4.8563869992441426e-05, "loss": 0.5962, "step": 370 }, { "epoch": 3.85, "learning_rate": 4.837490551776266e-05, "loss": 0.5986, "step": 380 }, { "epoch": 3.95, "learning_rate": 4.8185941043083905e-05, "loss": 0.6055, "step": 390 }, { "epoch": 4.0, "eval_accuracy": 0.7014805565465573, "eval_loss": 0.5852587819099426, "eval_runtime": 287.2656, "eval_samples_per_second": 39.03, "eval_steps_per_second": 0.153, "step": 395 }, { "epoch": 4.05, "learning_rate": 4.799697656840514e-05, "loss": 0.597, "step": 400 }, { "epoch": 4.15, "learning_rate": 4.7808012093726384e-05, "loss": 0.5931, "step": 410 }, { "epoch": 4.25, "learning_rate": 4.761904761904762e-05, "loss": 0.5942, "step": 420 }, { "epoch": 4.35, "learning_rate": 4.7430083144368857e-05, "loss": 0.5989, "step": 430 }, { "epoch": 4.46, "learning_rate": 4.72411186696901e-05, "loss": 0.5947, "step": 440 }, { "epoch": 4.56, "learning_rate": 4.7052154195011336e-05, "loss": 0.5972, "step": 450 }, { "epoch": 4.66, "learning_rate": 4.686318972033258e-05, "loss": 0.5864, "step": 460 }, { "epoch": 4.76, "learning_rate": 4.667422524565382e-05, "loss": 0.5975, "step": 470 }, { "epoch": 4.86, "learning_rate": 4.648526077097506e-05, "loss": 0.598, "step": 480 }, { "epoch": 4.96, "learning_rate": 4.62962962962963e-05, "loss": 0.5938, "step": 490 }, { "epoch": 4.99, "eval_accuracy": 0.7103995718872637, "eval_loss": 0.5857925415039062, "eval_runtime": 270.8215, "eval_samples_per_second": 41.4, "eval_steps_per_second": 0.162, "step": 493 }, { "epoch": 5.06, "learning_rate": 4.610733182161754e-05, "loss": 0.5987, "step": 500 }, { "epoch": 5.16, "learning_rate": 4.591836734693878e-05, "loss": 0.5889, "step": 510 }, { "epoch": 5.27, "learning_rate": 4.5729402872260016e-05, "loss": 0.5908, "step": 520 }, { "epoch": 5.37, "learning_rate": 4.554043839758126e-05, "loss": 0.5908, "step": 530 }, { "epoch": 5.47, "learning_rate": 4.53514739229025e-05, "loss": 0.5871, "step": 540 }, { "epoch": 5.57, "learning_rate": 4.516250944822374e-05, "loss": 0.593, "step": 550 }, { "epoch": 5.67, "learning_rate": 4.4973544973544974e-05, "loss": 0.5891, "step": 560 }, { "epoch": 5.77, "learning_rate": 4.478458049886621e-05, "loss": 0.594, "step": 570 }, { "epoch": 5.87, "learning_rate": 4.4595616024187454e-05, "loss": 0.5924, "step": 580 }, { "epoch": 5.97, "learning_rate": 4.4406651549508697e-05, "loss": 0.5878, "step": 590 }, { "epoch": 5.99, "eval_accuracy": 0.7210132001427042, "eval_loss": 0.5630102753639221, "eval_runtime": 271.5122, "eval_samples_per_second": 41.295, "eval_steps_per_second": 0.162, "step": 592 }, { "epoch": 6.08, "learning_rate": 4.421768707482993e-05, "loss": 0.5898, "step": 600 }, { "epoch": 6.18, "learning_rate": 4.4028722600151176e-05, "loss": 0.5872, "step": 610 }, { "epoch": 6.28, "learning_rate": 4.383975812547241e-05, "loss": 0.5885, "step": 620 }, { "epoch": 6.38, "learning_rate": 4.3650793650793655e-05, "loss": 0.5811, "step": 630 }, { "epoch": 6.48, "learning_rate": 4.346182917611489e-05, "loss": 0.5859, "step": 640 }, { "epoch": 6.58, "learning_rate": 4.3272864701436134e-05, "loss": 0.5914, "step": 650 }, { "epoch": 6.68, "learning_rate": 4.308390022675737e-05, "loss": 0.5867, "step": 660 }, { "epoch": 6.78, "learning_rate": 4.289493575207861e-05, "loss": 0.5865, "step": 670 }, { "epoch": 6.89, "learning_rate": 4.2705971277399856e-05, "loss": 0.5881, "step": 680 }, { "epoch": 6.99, "learning_rate": 4.2517006802721085e-05, "loss": 0.5873, "step": 690 }, { "epoch": 7.0, "eval_accuracy": 0.7235997145915091, "eval_loss": 0.5619792938232422, "eval_runtime": 267.8238, "eval_samples_per_second": 41.863, "eval_steps_per_second": 0.164, "step": 691 }, { "epoch": 7.09, "learning_rate": 4.232804232804233e-05, "loss": 0.5803, "step": 700 }, { "epoch": 7.19, "learning_rate": 4.2139077853363565e-05, "loss": 0.5775, "step": 710 }, { "epoch": 7.29, "learning_rate": 4.195011337868481e-05, "loss": 0.5849, "step": 720 }, { "epoch": 7.39, "learning_rate": 4.176114890400605e-05, "loss": 0.5856, "step": 730 }, { "epoch": 7.49, "learning_rate": 4.157218442932729e-05, "loss": 0.5843, "step": 740 }, { "epoch": 7.59, "learning_rate": 4.138321995464853e-05, "loss": 0.5889, "step": 750 }, { "epoch": 7.7, "learning_rate": 4.1194255479969766e-05, "loss": 0.5763, "step": 760 }, { "epoch": 7.8, "learning_rate": 4.100529100529101e-05, "loss": 0.5833, "step": 770 }, { "epoch": 7.9, "learning_rate": 4.0816326530612245e-05, "loss": 0.5781, "step": 780 }, { "epoch": 8.0, "learning_rate": 4.062736205593349e-05, "loss": 0.5947, "step": 790 }, { "epoch": 8.0, "eval_accuracy": 0.7195861576881912, "eval_loss": 0.5669898390769958, "eval_runtime": 271.8958, "eval_samples_per_second": 41.236, "eval_steps_per_second": 0.162, "step": 790 }, { "epoch": 8.1, "learning_rate": 4.043839758125473e-05, "loss": 0.5784, "step": 800 }, { "epoch": 8.2, "learning_rate": 4.024943310657597e-05, "loss": 0.578, "step": 810 }, { "epoch": 8.3, "learning_rate": 4.006046863189721e-05, "loss": 0.5888, "step": 820 }, { "epoch": 8.41, "learning_rate": 3.987150415721844e-05, "loss": 0.5828, "step": 830 }, { "epoch": 8.51, "learning_rate": 3.968253968253968e-05, "loss": 0.582, "step": 840 }, { "epoch": 8.61, "learning_rate": 3.9493575207860925e-05, "loss": 0.5817, "step": 850 }, { "epoch": 8.71, "learning_rate": 3.930461073318216e-05, "loss": 0.5729, "step": 860 }, { "epoch": 8.81, "learning_rate": 3.9115646258503405e-05, "loss": 0.5831, "step": 870 }, { "epoch": 8.91, "learning_rate": 3.892668178382464e-05, "loss": 0.5866, "step": 880 }, { "epoch": 8.99, "eval_accuracy": 0.7265429896539422, "eval_loss": 0.5592203736305237, "eval_runtime": 270.4426, "eval_samples_per_second": 41.458, "eval_steps_per_second": 0.163, "step": 888 }, { "epoch": 9.01, "learning_rate": 3.8737717309145884e-05, "loss": 0.5808, "step": 890 }, { "epoch": 9.11, "learning_rate": 3.854875283446712e-05, "loss": 0.5863, "step": 900 }, { "epoch": 9.22, "learning_rate": 3.835978835978836e-05, "loss": 0.5783, "step": 910 }, { "epoch": 9.32, "learning_rate": 3.81708238851096e-05, "loss": 0.578, "step": 920 }, { "epoch": 9.42, "learning_rate": 3.798185941043084e-05, "loss": 0.569, "step": 930 }, { "epoch": 9.52, "learning_rate": 3.7792894935752085e-05, "loss": 0.5821, "step": 940 }, { "epoch": 9.62, "learning_rate": 3.760393046107332e-05, "loss": 0.5827, "step": 950 }, { "epoch": 9.72, "learning_rate": 3.7414965986394564e-05, "loss": 0.5817, "step": 960 }, { "epoch": 9.82, "learning_rate": 3.7226001511715794e-05, "loss": 0.5774, "step": 970 }, { "epoch": 9.92, "learning_rate": 3.7037037037037037e-05, "loss": 0.5807, "step": 980 }, { "epoch": 9.99, "eval_accuracy": 0.7253835176596504, "eval_loss": 0.557357132434845, "eval_runtime": 270.5413, "eval_samples_per_second": 41.443, "eval_steps_per_second": 0.163, "step": 987 }, { "epoch": 10.03, "learning_rate": 3.684807256235828e-05, "loss": 0.5883, "step": 990 }, { "epoch": 10.13, "learning_rate": 3.6659108087679516e-05, "loss": 0.5711, "step": 1000 }, { "epoch": 10.23, "learning_rate": 3.647014361300076e-05, "loss": 0.5794, "step": 1010 }, { "epoch": 10.33, "learning_rate": 3.6281179138321995e-05, "loss": 0.5869, "step": 1020 }, { "epoch": 10.43, "learning_rate": 3.609221466364324e-05, "loss": 0.584, "step": 1030 }, { "epoch": 10.53, "learning_rate": 3.5903250188964474e-05, "loss": 0.5716, "step": 1040 }, { "epoch": 10.63, "learning_rate": 3.571428571428572e-05, "loss": 0.5783, "step": 1050 }, { "epoch": 10.73, "learning_rate": 3.552532123960696e-05, "loss": 0.577, "step": 1060 }, { "epoch": 10.84, "learning_rate": 3.5336356764928196e-05, "loss": 0.5779, "step": 1070 }, { "epoch": 10.94, "learning_rate": 3.514739229024944e-05, "loss": 0.5764, "step": 1080 }, { "epoch": 11.0, "eval_accuracy": 0.7244916161255798, "eval_loss": 0.565514862537384, "eval_runtime": 273.5069, "eval_samples_per_second": 40.993, "eval_steps_per_second": 0.161, "step": 1086 }, { "epoch": 11.04, "learning_rate": 3.4958427815570675e-05, "loss": 0.5806, "step": 1090 }, { "epoch": 11.14, "learning_rate": 3.476946334089191e-05, "loss": 0.5779, "step": 1100 }, { "epoch": 11.24, "learning_rate": 3.4580498866213154e-05, "loss": 0.5799, "step": 1110 }, { "epoch": 11.34, "learning_rate": 3.439153439153439e-05, "loss": 0.5844, "step": 1120 }, { "epoch": 11.44, "learning_rate": 3.4202569916855634e-05, "loss": 0.5715, "step": 1130 }, { "epoch": 11.54, "learning_rate": 3.401360544217687e-05, "loss": 0.5809, "step": 1140 }, { "epoch": 11.65, "learning_rate": 3.382464096749811e-05, "loss": 0.5695, "step": 1150 }, { "epoch": 11.75, "learning_rate": 3.363567649281935e-05, "loss": 0.576, "step": 1160 }, { "epoch": 11.85, "learning_rate": 3.344671201814059e-05, "loss": 0.5736, "step": 1170 }, { "epoch": 11.95, "learning_rate": 3.325774754346183e-05, "loss": 0.5729, "step": 1180 }, { "epoch": 12.0, "eval_accuracy": 0.7236889047449162, "eval_loss": 0.5611264109611511, "eval_runtime": 268.1746, "eval_samples_per_second": 41.809, "eval_steps_per_second": 0.164, "step": 1185 }, { "epoch": 12.05, "learning_rate": 3.306878306878307e-05, "loss": 0.5775, "step": 1190 }, { "epoch": 12.15, "learning_rate": 3.2879818594104314e-05, "loss": 0.5774, "step": 1200 }, { "epoch": 12.25, "learning_rate": 3.269085411942555e-05, "loss": 0.5751, "step": 1210 }, { "epoch": 12.35, "learning_rate": 3.250188964474679e-05, "loss": 0.5767, "step": 1220 }, { "epoch": 12.46, "learning_rate": 3.231292517006803e-05, "loss": 0.5717, "step": 1230 }, { "epoch": 12.56, "learning_rate": 3.2123960695389265e-05, "loss": 0.5738, "step": 1240 }, { "epoch": 12.66, "learning_rate": 3.193499622071051e-05, "loss": 0.5749, "step": 1250 }, { "epoch": 12.76, "learning_rate": 3.1746031746031745e-05, "loss": 0.5741, "step": 1260 }, { "epoch": 12.86, "learning_rate": 3.155706727135299e-05, "loss": 0.5789, "step": 1270 }, { "epoch": 12.96, "learning_rate": 3.1368102796674224e-05, "loss": 0.577, "step": 1280 }, { "epoch": 12.99, "eval_accuracy": 0.7188726364609347, "eval_loss": 0.5702112913131714, "eval_runtime": 267.6574, "eval_samples_per_second": 41.889, "eval_steps_per_second": 0.164, "step": 1283 }, { "epoch": 13.06, "learning_rate": 3.117913832199547e-05, "loss": 0.5767, "step": 1290 }, { "epoch": 13.16, "learning_rate": 3.09901738473167e-05, "loss": 0.5727, "step": 1300 }, { "epoch": 13.27, "learning_rate": 3.0801209372637946e-05, "loss": 0.5702, "step": 1310 }, { "epoch": 13.37, "learning_rate": 3.061224489795919e-05, "loss": 0.5768, "step": 1320 }, { "epoch": 13.47, "learning_rate": 3.0423280423280425e-05, "loss": 0.5708, "step": 1330 }, { "epoch": 13.57, "learning_rate": 3.0234315948601665e-05, "loss": 0.5708, "step": 1340 }, { "epoch": 13.67, "learning_rate": 3.0045351473922904e-05, "loss": 0.5677, "step": 1350 }, { "epoch": 13.77, "learning_rate": 2.9856386999244147e-05, "loss": 0.5755, "step": 1360 }, { "epoch": 13.87, "learning_rate": 2.9667422524565387e-05, "loss": 0.5731, "step": 1370 }, { "epoch": 13.97, "learning_rate": 2.947845804988662e-05, "loss": 0.5702, "step": 1380 }, { "epoch": 13.99, "eval_accuracy": 0.7259186585800927, "eval_loss": 0.5587979555130005, "eval_runtime": 267.9851, "eval_samples_per_second": 41.838, "eval_steps_per_second": 0.164, "step": 1382 }, { "epoch": 14.08, "learning_rate": 2.928949357520786e-05, "loss": 0.5713, "step": 1390 }, { "epoch": 14.18, "learning_rate": 2.91005291005291e-05, "loss": 0.5648, "step": 1400 }, { "epoch": 14.28, "learning_rate": 2.891156462585034e-05, "loss": 0.5715, "step": 1410 }, { "epoch": 14.38, "learning_rate": 2.872260015117158e-05, "loss": 0.5711, "step": 1420 }, { "epoch": 14.48, "learning_rate": 2.853363567649282e-05, "loss": 0.5598, "step": 1430 }, { "epoch": 14.58, "learning_rate": 2.834467120181406e-05, "loss": 0.5746, "step": 1440 }, { "epoch": 14.68, "learning_rate": 2.81557067271353e-05, "loss": 0.5702, "step": 1450 }, { "epoch": 14.78, "learning_rate": 2.796674225245654e-05, "loss": 0.58, "step": 1460 }, { "epoch": 14.89, "learning_rate": 2.777777777777778e-05, "loss": 0.5706, "step": 1470 }, { "epoch": 14.99, "learning_rate": 2.758881330309902e-05, "loss": 0.5717, "step": 1480 }, { "epoch": 15.0, "eval_accuracy": 0.7244024259721726, "eval_loss": 0.5564510226249695, "eval_runtime": 273.2108, "eval_samples_per_second": 41.038, "eval_steps_per_second": 0.161, "step": 1481 }, { "epoch": 15.09, "learning_rate": 2.739984882842026e-05, "loss": 0.5678, "step": 1490 }, { "epoch": 15.19, "learning_rate": 2.72108843537415e-05, "loss": 0.5653, "step": 1500 }, { "epoch": 15.29, "learning_rate": 2.7021919879062734e-05, "loss": 0.5599, "step": 1510 }, { "epoch": 15.39, "learning_rate": 2.6832955404383974e-05, "loss": 0.5745, "step": 1520 }, { "epoch": 15.49, "learning_rate": 2.6643990929705213e-05, "loss": 0.5701, "step": 1530 }, { "epoch": 15.59, "learning_rate": 2.6455026455026456e-05, "loss": 0.5715, "step": 1540 }, { "epoch": 15.7, "learning_rate": 2.6266061980347696e-05, "loss": 0.5677, "step": 1550 }, { "epoch": 15.8, "learning_rate": 2.6077097505668935e-05, "loss": 0.5749, "step": 1560 }, { "epoch": 15.9, "learning_rate": 2.5888133030990175e-05, "loss": 0.5741, "step": 1570 }, { "epoch": 16.0, "learning_rate": 2.5699168556311414e-05, "loss": 0.5646, "step": 1580 }, { "epoch": 16.0, "eval_accuracy": 0.7302889760970389, "eval_loss": 0.5536319017410278, "eval_runtime": 270.439, "eval_samples_per_second": 41.459, "eval_steps_per_second": 0.163, "step": 1580 }, { "epoch": 16.1, "learning_rate": 2.5510204081632654e-05, "loss": 0.5717, "step": 1590 }, { "epoch": 16.2, "learning_rate": 2.5321239606953894e-05, "loss": 0.565, "step": 1600 }, { "epoch": 16.3, "learning_rate": 2.5132275132275137e-05, "loss": 0.5749, "step": 1610 }, { "epoch": 16.41, "learning_rate": 2.4943310657596373e-05, "loss": 0.5701, "step": 1620 }, { "epoch": 16.51, "learning_rate": 2.4754346182917612e-05, "loss": 0.5717, "step": 1630 }, { "epoch": 16.61, "learning_rate": 2.4565381708238852e-05, "loss": 0.5641, "step": 1640 }, { "epoch": 16.71, "learning_rate": 2.437641723356009e-05, "loss": 0.5663, "step": 1650 }, { "epoch": 16.81, "learning_rate": 2.418745275888133e-05, "loss": 0.5674, "step": 1660 }, { "epoch": 16.91, "learning_rate": 2.399848828420257e-05, "loss": 0.5591, "step": 1670 }, { "epoch": 16.99, "eval_accuracy": 0.7344809133071709, "eval_loss": 0.5525398254394531, "eval_runtime": 269.9721, "eval_samples_per_second": 41.53, "eval_steps_per_second": 0.163, "step": 1678 }, { "epoch": 17.01, "learning_rate": 2.380952380952381e-05, "loss": 0.5754, "step": 1680 }, { "epoch": 17.11, "learning_rate": 2.362055933484505e-05, "loss": 0.5734, "step": 1690 }, { "epoch": 17.22, "learning_rate": 2.343159486016629e-05, "loss": 0.5671, "step": 1700 }, { "epoch": 17.32, "learning_rate": 2.324263038548753e-05, "loss": 0.5663, "step": 1710 }, { "epoch": 17.42, "learning_rate": 2.305366591080877e-05, "loss": 0.5707, "step": 1720 }, { "epoch": 17.52, "learning_rate": 2.2864701436130008e-05, "loss": 0.5635, "step": 1730 }, { "epoch": 17.62, "learning_rate": 2.267573696145125e-05, "loss": 0.569, "step": 1740 }, { "epoch": 17.72, "learning_rate": 2.2486772486772487e-05, "loss": 0.5675, "step": 1750 }, { "epoch": 17.82, "learning_rate": 2.2297808012093727e-05, "loss": 0.5623, "step": 1760 }, { "epoch": 17.92, "learning_rate": 2.2108843537414966e-05, "loss": 0.5586, "step": 1770 }, { "epoch": 17.99, "eval_accuracy": 0.7285943631823046, "eval_loss": 0.5565158724784851, "eval_runtime": 267.0513, "eval_samples_per_second": 41.984, "eval_steps_per_second": 0.165, "step": 1777 }, { "epoch": 18.03, "learning_rate": 2.1919879062736206e-05, "loss": 0.5606, "step": 1780 }, { "epoch": 18.13, "learning_rate": 2.1730914588057446e-05, "loss": 0.5611, "step": 1790 }, { "epoch": 18.23, "learning_rate": 2.1541950113378685e-05, "loss": 0.5693, "step": 1800 }, { "epoch": 18.33, "learning_rate": 2.1352985638699928e-05, "loss": 0.559, "step": 1810 }, { "epoch": 18.43, "learning_rate": 2.1164021164021164e-05, "loss": 0.5673, "step": 1820 }, { "epoch": 18.53, "learning_rate": 2.0975056689342404e-05, "loss": 0.5635, "step": 1830 }, { "epoch": 18.63, "learning_rate": 2.0786092214663643e-05, "loss": 0.567, "step": 1840 }, { "epoch": 18.73, "learning_rate": 2.0597127739984883e-05, "loss": 0.5637, "step": 1850 }, { "epoch": 18.84, "learning_rate": 2.0408163265306123e-05, "loss": 0.5648, "step": 1860 }, { "epoch": 18.94, "learning_rate": 2.0219198790627365e-05, "loss": 0.5668, "step": 1870 }, { "epoch": 19.0, "eval_accuracy": 0.7303781662504459, "eval_loss": 0.5519587993621826, "eval_runtime": 272.9351, "eval_samples_per_second": 41.079, "eval_steps_per_second": 0.161, "step": 1876 }, { "epoch": 19.04, "learning_rate": 2.0030234315948605e-05, "loss": 0.5628, "step": 1880 }, { "epoch": 19.14, "learning_rate": 1.984126984126984e-05, "loss": 0.5642, "step": 1890 }, { "epoch": 19.24, "learning_rate": 1.965230536659108e-05, "loss": 0.5563, "step": 1900 }, { "epoch": 19.34, "learning_rate": 1.946334089191232e-05, "loss": 0.5642, "step": 1910 }, { "epoch": 19.44, "learning_rate": 1.927437641723356e-05, "loss": 0.5618, "step": 1920 }, { "epoch": 19.54, "learning_rate": 1.90854119425548e-05, "loss": 0.5578, "step": 1930 }, { "epoch": 19.65, "learning_rate": 1.8896447467876043e-05, "loss": 0.5636, "step": 1940 }, { "epoch": 19.75, "learning_rate": 1.8707482993197282e-05, "loss": 0.5749, "step": 1950 }, { "epoch": 19.85, "learning_rate": 1.8518518518518518e-05, "loss": 0.5628, "step": 1960 }, { "epoch": 19.95, "learning_rate": 1.8329554043839758e-05, "loss": 0.5617, "step": 1970 }, { "epoch": 20.0, "eval_accuracy": 0.7288619336425258, "eval_loss": 0.5557389855384827, "eval_runtime": 270.9737, "eval_samples_per_second": 41.377, "eval_steps_per_second": 0.162, "step": 1975 }, { "epoch": 20.05, "learning_rate": 1.8140589569160997e-05, "loss": 0.5541, "step": 1980 }, { "epoch": 20.15, "learning_rate": 1.7951625094482237e-05, "loss": 0.5693, "step": 1990 }, { "epoch": 20.25, "learning_rate": 1.776266061980348e-05, "loss": 0.5652, "step": 2000 }, { "epoch": 20.35, "learning_rate": 1.757369614512472e-05, "loss": 0.5579, "step": 2010 }, { "epoch": 20.46, "learning_rate": 1.7384731670445956e-05, "loss": 0.5597, "step": 2020 }, { "epoch": 20.56, "learning_rate": 1.7195767195767195e-05, "loss": 0.5674, "step": 2030 }, { "epoch": 20.66, "learning_rate": 1.7006802721088435e-05, "loss": 0.5634, "step": 2040 }, { "epoch": 20.76, "learning_rate": 1.6817838246409674e-05, "loss": 0.567, "step": 2050 }, { "epoch": 20.86, "learning_rate": 1.6628873771730914e-05, "loss": 0.5571, "step": 2060 }, { "epoch": 20.96, "learning_rate": 1.6439909297052157e-05, "loss": 0.5546, "step": 2070 }, { "epoch": 20.99, "eval_accuracy": 0.7325187299322155, "eval_loss": 0.5561436414718628, "eval_runtime": 269.5801, "eval_samples_per_second": 41.591, "eval_steps_per_second": 0.163, "step": 2073 }, { "epoch": 21.06, "learning_rate": 1.6250944822373397e-05, "loss": 0.5589, "step": 2080 }, { "epoch": 21.16, "learning_rate": 1.6061980347694633e-05, "loss": 0.5658, "step": 2090 }, { "epoch": 21.27, "learning_rate": 1.5873015873015872e-05, "loss": 0.5596, "step": 2100 }, { "epoch": 21.37, "learning_rate": 1.5684051398337112e-05, "loss": 0.5618, "step": 2110 }, { "epoch": 21.47, "learning_rate": 1.549508692365835e-05, "loss": 0.5624, "step": 2120 }, { "epoch": 21.57, "learning_rate": 1.5306122448979594e-05, "loss": 0.5619, "step": 2130 }, { "epoch": 21.67, "learning_rate": 1.5117157974300832e-05, "loss": 0.5554, "step": 2140 }, { "epoch": 21.77, "learning_rate": 1.4928193499622074e-05, "loss": 0.5539, "step": 2150 }, { "epoch": 21.87, "learning_rate": 1.473922902494331e-05, "loss": 0.5657, "step": 2160 }, { "epoch": 21.97, "learning_rate": 1.455026455026455e-05, "loss": 0.5579, "step": 2170 }, { "epoch": 21.99, "eval_accuracy": 0.7314484480913307, "eval_loss": 0.5537222623825073, "eval_runtime": 267.4411, "eval_samples_per_second": 41.923, "eval_steps_per_second": 0.165, "step": 2172 }, { "epoch": 22.08, "learning_rate": 1.436130007558579e-05, "loss": 0.554, "step": 2180 }, { "epoch": 22.18, "learning_rate": 1.417233560090703e-05, "loss": 0.569, "step": 2190 }, { "epoch": 22.28, "learning_rate": 1.398337112622827e-05, "loss": 0.5528, "step": 2200 }, { "epoch": 22.38, "learning_rate": 1.379440665154951e-05, "loss": 0.5649, "step": 2210 }, { "epoch": 22.48, "learning_rate": 1.360544217687075e-05, "loss": 0.5586, "step": 2220 }, { "epoch": 22.58, "learning_rate": 1.3416477702191987e-05, "loss": 0.5606, "step": 2230 }, { "epoch": 22.68, "learning_rate": 1.3227513227513228e-05, "loss": 0.5551, "step": 2240 }, { "epoch": 22.78, "learning_rate": 1.3038548752834468e-05, "loss": 0.5588, "step": 2250 }, { "epoch": 22.89, "learning_rate": 1.2849584278155707e-05, "loss": 0.5591, "step": 2260 }, { "epoch": 22.99, "learning_rate": 1.2660619803476947e-05, "loss": 0.5604, "step": 2270 }, { "epoch": 23.0, "eval_accuracy": 0.72904031394934, "eval_loss": 0.5545207262039185, "eval_runtime": 270.2678, "eval_samples_per_second": 41.485, "eval_steps_per_second": 0.163, "step": 2271 }, { "epoch": 23.09, "learning_rate": 1.2471655328798186e-05, "loss": 0.5588, "step": 2280 }, { "epoch": 23.19, "learning_rate": 1.2282690854119426e-05, "loss": 0.5499, "step": 2290 }, { "epoch": 23.29, "learning_rate": 1.2093726379440666e-05, "loss": 0.5495, "step": 2300 }, { "epoch": 23.39, "learning_rate": 1.1904761904761905e-05, "loss": 0.5571, "step": 2310 }, { "epoch": 23.49, "learning_rate": 1.1715797430083145e-05, "loss": 0.5545, "step": 2320 }, { "epoch": 23.59, "learning_rate": 1.1526832955404384e-05, "loss": 0.5625, "step": 2330 }, { "epoch": 23.7, "learning_rate": 1.1337868480725626e-05, "loss": 0.5573, "step": 2340 }, { "epoch": 23.8, "learning_rate": 1.1148904006046863e-05, "loss": 0.5623, "step": 2350 }, { "epoch": 23.9, "learning_rate": 1.0959939531368103e-05, "loss": 0.5527, "step": 2360 }, { "epoch": 24.0, "learning_rate": 1.0770975056689343e-05, "loss": 0.5563, "step": 2370 }, { "epoch": 24.0, "eval_accuracy": 0.7287727434891188, "eval_loss": 0.5590741634368896, "eval_runtime": 269.4039, "eval_samples_per_second": 41.618, "eval_steps_per_second": 0.163, "step": 2370 }, { "epoch": 24.1, "learning_rate": 1.0582010582010582e-05, "loss": 0.5586, "step": 2380 }, { "epoch": 24.2, "learning_rate": 1.0393046107331822e-05, "loss": 0.5424, "step": 2390 }, { "epoch": 24.3, "learning_rate": 1.0204081632653061e-05, "loss": 0.5598, "step": 2400 }, { "epoch": 24.41, "learning_rate": 1.0015117157974303e-05, "loss": 0.5528, "step": 2410 }, { "epoch": 24.51, "learning_rate": 9.82615268329554e-06, "loss": 0.5521, "step": 2420 }, { "epoch": 24.61, "learning_rate": 9.63718820861678e-06, "loss": 0.5524, "step": 2430 }, { "epoch": 24.71, "learning_rate": 9.448223733938021e-06, "loss": 0.5602, "step": 2440 }, { "epoch": 24.81, "learning_rate": 9.259259259259259e-06, "loss": 0.5608, "step": 2450 }, { "epoch": 24.91, "learning_rate": 9.070294784580499e-06, "loss": 0.5634, "step": 2460 }, { "epoch": 24.99, "eval_accuracy": 0.7307349268640742, "eval_loss": 0.5545657277107239, "eval_runtime": 273.5576, "eval_samples_per_second": 40.986, "eval_steps_per_second": 0.161, "step": 2468 }, { "epoch": 25.01, "learning_rate": 8.88133030990174e-06, "loss": 0.5516, "step": 2470 }, { "epoch": 25.11, "learning_rate": 8.692365835222978e-06, "loss": 0.5489, "step": 2480 }, { "epoch": 25.22, "learning_rate": 8.503401360544217e-06, "loss": 0.5562, "step": 2490 }, { "epoch": 25.32, "learning_rate": 8.314436885865457e-06, "loss": 0.5498, "step": 2500 }, { "epoch": 25.42, "learning_rate": 8.125472411186698e-06, "loss": 0.5578, "step": 2510 }, { "epoch": 25.52, "learning_rate": 7.936507936507936e-06, "loss": 0.551, "step": 2520 }, { "epoch": 25.62, "learning_rate": 7.747543461829176e-06, "loss": 0.5572, "step": 2530 }, { "epoch": 25.72, "learning_rate": 7.558578987150416e-06, "loss": 0.5523, "step": 2540 }, { "epoch": 25.82, "learning_rate": 7.369614512471655e-06, "loss": 0.5525, "step": 2550 }, { "epoch": 25.92, "learning_rate": 7.180650037792895e-06, "loss": 0.5563, "step": 2560 }, { "epoch": 25.99, "eval_accuracy": 0.7302889760970389, "eval_loss": 0.5556601285934448, "eval_runtime": 270.2761, "eval_samples_per_second": 41.484, "eval_steps_per_second": 0.163, "step": 2567 }, { "epoch": 26.03, "learning_rate": 6.991685563114135e-06, "loss": 0.5607, "step": 2570 }, { "epoch": 26.13, "learning_rate": 6.802721088435375e-06, "loss": 0.5555, "step": 2580 }, { "epoch": 26.23, "learning_rate": 6.613756613756614e-06, "loss": 0.5527, "step": 2590 }, { "epoch": 26.33, "learning_rate": 6.424792139077854e-06, "loss": 0.5549, "step": 2600 }, { "epoch": 26.43, "learning_rate": 6.235827664399093e-06, "loss": 0.5435, "step": 2610 }, { "epoch": 26.53, "learning_rate": 6.046863189720333e-06, "loss": 0.5511, "step": 2620 }, { "epoch": 26.63, "learning_rate": 5.857898715041572e-06, "loss": 0.5629, "step": 2630 }, { "epoch": 26.73, "learning_rate": 5.668934240362813e-06, "loss": 0.5527, "step": 2640 }, { "epoch": 26.84, "learning_rate": 5.4799697656840515e-06, "loss": 0.5541, "step": 2650 }, { "epoch": 26.94, "learning_rate": 5.291005291005291e-06, "loss": 0.5563, "step": 2660 }, { "epoch": 27.0, "eval_accuracy": 0.727613271494827, "eval_loss": 0.5571199655532837, "eval_runtime": 269.8955, "eval_samples_per_second": 41.542, "eval_steps_per_second": 0.163, "step": 2666 }, { "epoch": 27.04, "learning_rate": 5.102040816326531e-06, "loss": 0.553, "step": 2670 }, { "epoch": 27.14, "learning_rate": 4.91307634164777e-06, "loss": 0.5502, "step": 2680 }, { "epoch": 27.24, "learning_rate": 4.724111866969011e-06, "loss": 0.55, "step": 2690 }, { "epoch": 27.34, "learning_rate": 4.535147392290249e-06, "loss": 0.5527, "step": 2700 }, { "epoch": 27.44, "learning_rate": 4.346182917611489e-06, "loss": 0.5462, "step": 2710 }, { "epoch": 27.54, "learning_rate": 4.1572184429327285e-06, "loss": 0.5502, "step": 2720 }, { "epoch": 27.65, "learning_rate": 3.968253968253968e-06, "loss": 0.5533, "step": 2730 }, { "epoch": 27.75, "learning_rate": 3.779289493575208e-06, "loss": 0.5585, "step": 2740 }, { "epoch": 27.85, "learning_rate": 3.5903250188964477e-06, "loss": 0.556, "step": 2750 }, { "epoch": 27.95, "learning_rate": 3.4013605442176877e-06, "loss": 0.5544, "step": 2760 }, { "epoch": 28.0, "eval_accuracy": 0.7298430253300036, "eval_loss": 0.5550753474235535, "eval_runtime": 273.4563, "eval_samples_per_second": 41.001, "eval_steps_per_second": 0.161, "step": 2765 }, { "epoch": 28.05, "learning_rate": 3.212396069538927e-06, "loss": 0.553, "step": 2770 }, { "epoch": 28.15, "learning_rate": 3.0234315948601664e-06, "loss": 0.5534, "step": 2780 }, { "epoch": 28.25, "learning_rate": 2.8344671201814064e-06, "loss": 0.5488, "step": 2790 }, { "epoch": 28.35, "learning_rate": 2.6455026455026455e-06, "loss": 0.5553, "step": 2800 }, { "epoch": 28.46, "learning_rate": 2.456538170823885e-06, "loss": 0.5503, "step": 2810 }, { "epoch": 28.56, "learning_rate": 2.2675736961451247e-06, "loss": 0.5493, "step": 2820 }, { "epoch": 28.66, "learning_rate": 2.0786092214663643e-06, "loss": 0.5474, "step": 2830 }, { "epoch": 28.76, "learning_rate": 1.889644746787604e-06, "loss": 0.5477, "step": 2840 }, { "epoch": 28.86, "learning_rate": 1.7006802721088438e-06, "loss": 0.5565, "step": 2850 }, { "epoch": 28.96, "learning_rate": 1.5117157974300832e-06, "loss": 0.5491, "step": 2860 }, { "epoch": 28.99, "eval_accuracy": 0.7282376025686764, "eval_loss": 0.5596103668212891, "eval_runtime": 270.7488, "eval_samples_per_second": 41.411, "eval_steps_per_second": 0.163, "step": 2863 }, { "epoch": 29.06, "learning_rate": 1.3227513227513228e-06, "loss": 0.5508, "step": 2870 }, { "epoch": 29.16, "learning_rate": 1.1337868480725623e-06, "loss": 0.5513, "step": 2880 }, { "epoch": 29.27, "learning_rate": 9.44822373393802e-07, "loss": 0.5517, "step": 2890 }, { "epoch": 29.37, "learning_rate": 7.558578987150416e-07, "loss": 0.5575, "step": 2900 }, { "epoch": 29.47, "learning_rate": 5.668934240362812e-07, "loss": 0.5519, "step": 2910 }, { "epoch": 29.57, "learning_rate": 3.779289493575208e-07, "loss": 0.5549, "step": 2920 }, { "epoch": 29.67, "learning_rate": 1.889644746787604e-07, "loss": 0.5478, "step": 2930 }, { "epoch": 29.77, "learning_rate": 0.0, "loss": 0.5461, "step": 2940 }, { "epoch": 29.77, "eval_accuracy": 0.7302889760970389, "eval_loss": 0.5574254989624023, "eval_runtime": 267.0764, "eval_samples_per_second": 41.98, "eval_steps_per_second": 0.165, "step": 2940 }, { "epoch": 29.77, "step": 2940, "total_flos": 7.46776315809736e+19, "train_loss": 0.5755378539870385, "train_runtime": 78104.0618, "train_samples_per_second": 38.759, "train_steps_per_second": 0.038 } ], "max_steps": 2940, "num_train_epochs": 30, "total_flos": 7.46776315809736e+19, "trial_name": null, "trial_params": null }