|
{ |
|
"best_metric": 0.7344809133071709, |
|
"best_model_checkpoint": "swin-tiny-patch4-window7-224-finetuned-eurosat/checkpoint-1678", |
|
"epoch": 29.772151898734176, |
|
"global_step": 2940, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.7006802721088438e-06, |
|
"loss": 0.7145, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.4013605442176877e-06, |
|
"loss": 0.6942, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 5.102040816326531e-06, |
|
"loss": 0.6747, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 6.802721088435375e-06, |
|
"loss": 0.6626, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 8.503401360544217e-06, |
|
"loss": 0.645, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.0204081632653061e-05, |
|
"loss": 0.643, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1904761904761905e-05, |
|
"loss": 0.6353, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.360544217687075e-05, |
|
"loss": 0.6242, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.5306122448979594e-05, |
|
"loss": 0.6271, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_accuracy": 0.6925615412058509, |
|
"eval_loss": 0.6034993529319763, |
|
"eval_runtime": 291.7633, |
|
"eval_samples_per_second": 38.428, |
|
"eval_steps_per_second": 0.151, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 1.7006802721088435e-05, |
|
"loss": 0.625, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 1.8707482993197282e-05, |
|
"loss": 0.6206, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 2.0408163265306123e-05, |
|
"loss": 0.6208, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 2.2108843537414966e-05, |
|
"loss": 0.625, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.380952380952381e-05, |
|
"loss": 0.6198, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.5510204081632654e-05, |
|
"loss": 0.613, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.72108843537415e-05, |
|
"loss": 0.6242, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.891156462585034e-05, |
|
"loss": 0.616, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 3.061224489795919e-05, |
|
"loss": 0.6145, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 3.231292517006803e-05, |
|
"loss": 0.6156, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_accuracy": 0.7005886550124866, |
|
"eval_loss": 0.5843892097473145, |
|
"eval_runtime": 301.4707, |
|
"eval_samples_per_second": 37.191, |
|
"eval_steps_per_second": 0.146, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 3.401360544217687e-05, |
|
"loss": 0.6069, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 3.571428571428572e-05, |
|
"loss": 0.6087, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 3.7414965986394564e-05, |
|
"loss": 0.6116, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 3.9115646258503405e-05, |
|
"loss": 0.6085, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 4.0816326530612245e-05, |
|
"loss": 0.6049, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 4.2517006802721085e-05, |
|
"loss": 0.6013, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 4.421768707482993e-05, |
|
"loss": 0.6009, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 4.591836734693878e-05, |
|
"loss": 0.608, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 4.761904761904762e-05, |
|
"loss": 0.6145, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 4.931972789115647e-05, |
|
"loss": 0.6148, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7103995718872637, |
|
"eval_loss": 0.5758489966392517, |
|
"eval_runtime": 290.2528, |
|
"eval_samples_per_second": 38.628, |
|
"eval_steps_per_second": 0.152, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 4.9886621315192745e-05, |
|
"loss": 0.5965, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 4.969765684051398e-05, |
|
"loss": 0.601, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 4.9508692365835225e-05, |
|
"loss": 0.6087, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 4.931972789115647e-05, |
|
"loss": 0.6063, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 4.9130763416477704e-05, |
|
"loss": 0.6034, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 4.894179894179895e-05, |
|
"loss": 0.5962, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 4.875283446712018e-05, |
|
"loss": 0.605, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 4.8563869992441426e-05, |
|
"loss": 0.5962, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 4.837490551776266e-05, |
|
"loss": 0.5986, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 4.8185941043083905e-05, |
|
"loss": 0.6055, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7014805565465573, |
|
"eval_loss": 0.5852587819099426, |
|
"eval_runtime": 287.2656, |
|
"eval_samples_per_second": 39.03, |
|
"eval_steps_per_second": 0.153, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 4.799697656840514e-05, |
|
"loss": 0.597, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 4.7808012093726384e-05, |
|
"loss": 0.5931, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 4.761904761904762e-05, |
|
"loss": 0.5942, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 4.7430083144368857e-05, |
|
"loss": 0.5989, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 4.72411186696901e-05, |
|
"loss": 0.5947, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 4.7052154195011336e-05, |
|
"loss": 0.5972, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 4.686318972033258e-05, |
|
"loss": 0.5864, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 4.667422524565382e-05, |
|
"loss": 0.5975, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 4.648526077097506e-05, |
|
"loss": 0.598, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"learning_rate": 4.62962962962963e-05, |
|
"loss": 0.5938, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"eval_accuracy": 0.7103995718872637, |
|
"eval_loss": 0.5857925415039062, |
|
"eval_runtime": 270.8215, |
|
"eval_samples_per_second": 41.4, |
|
"eval_steps_per_second": 0.162, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"learning_rate": 4.610733182161754e-05, |
|
"loss": 0.5987, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 5.16, |
|
"learning_rate": 4.591836734693878e-05, |
|
"loss": 0.5889, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 5.27, |
|
"learning_rate": 4.5729402872260016e-05, |
|
"loss": 0.5908, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 5.37, |
|
"learning_rate": 4.554043839758126e-05, |
|
"loss": 0.5908, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"learning_rate": 4.53514739229025e-05, |
|
"loss": 0.5871, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"learning_rate": 4.516250944822374e-05, |
|
"loss": 0.593, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 5.67, |
|
"learning_rate": 4.4973544973544974e-05, |
|
"loss": 0.5891, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 5.77, |
|
"learning_rate": 4.478458049886621e-05, |
|
"loss": 0.594, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 5.87, |
|
"learning_rate": 4.4595616024187454e-05, |
|
"loss": 0.5924, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 5.97, |
|
"learning_rate": 4.4406651549508697e-05, |
|
"loss": 0.5878, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"eval_accuracy": 0.7210132001427042, |
|
"eval_loss": 0.5630102753639221, |
|
"eval_runtime": 271.5122, |
|
"eval_samples_per_second": 41.295, |
|
"eval_steps_per_second": 0.162, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"learning_rate": 4.421768707482993e-05, |
|
"loss": 0.5898, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 6.18, |
|
"learning_rate": 4.4028722600151176e-05, |
|
"loss": 0.5872, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.28, |
|
"learning_rate": 4.383975812547241e-05, |
|
"loss": 0.5885, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"learning_rate": 4.3650793650793655e-05, |
|
"loss": 0.5811, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 6.48, |
|
"learning_rate": 4.346182917611489e-05, |
|
"loss": 0.5859, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 6.58, |
|
"learning_rate": 4.3272864701436134e-05, |
|
"loss": 0.5914, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 6.68, |
|
"learning_rate": 4.308390022675737e-05, |
|
"loss": 0.5867, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"learning_rate": 4.289493575207861e-05, |
|
"loss": 0.5865, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 6.89, |
|
"learning_rate": 4.2705971277399856e-05, |
|
"loss": 0.5881, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"learning_rate": 4.2517006802721085e-05, |
|
"loss": 0.5873, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7235997145915091, |
|
"eval_loss": 0.5619792938232422, |
|
"eval_runtime": 267.8238, |
|
"eval_samples_per_second": 41.863, |
|
"eval_steps_per_second": 0.164, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"learning_rate": 4.232804232804233e-05, |
|
"loss": 0.5803, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 7.19, |
|
"learning_rate": 4.2139077853363565e-05, |
|
"loss": 0.5775, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 7.29, |
|
"learning_rate": 4.195011337868481e-05, |
|
"loss": 0.5849, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 7.39, |
|
"learning_rate": 4.176114890400605e-05, |
|
"loss": 0.5856, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"learning_rate": 4.157218442932729e-05, |
|
"loss": 0.5843, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 7.59, |
|
"learning_rate": 4.138321995464853e-05, |
|
"loss": 0.5889, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 7.7, |
|
"learning_rate": 4.1194255479969766e-05, |
|
"loss": 0.5763, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 7.8, |
|
"learning_rate": 4.100529100529101e-05, |
|
"loss": 0.5833, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 7.9, |
|
"learning_rate": 4.0816326530612245e-05, |
|
"loss": 0.5781, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 4.062736205593349e-05, |
|
"loss": 0.5947, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7195861576881912, |
|
"eval_loss": 0.5669898390769958, |
|
"eval_runtime": 271.8958, |
|
"eval_samples_per_second": 41.236, |
|
"eval_steps_per_second": 0.162, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"learning_rate": 4.043839758125473e-05, |
|
"loss": 0.5784, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"learning_rate": 4.024943310657597e-05, |
|
"loss": 0.578, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 8.3, |
|
"learning_rate": 4.006046863189721e-05, |
|
"loss": 0.5888, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 8.41, |
|
"learning_rate": 3.987150415721844e-05, |
|
"loss": 0.5828, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 8.51, |
|
"learning_rate": 3.968253968253968e-05, |
|
"loss": 0.582, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 8.61, |
|
"learning_rate": 3.9493575207860925e-05, |
|
"loss": 0.5817, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 8.71, |
|
"learning_rate": 3.930461073318216e-05, |
|
"loss": 0.5729, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 8.81, |
|
"learning_rate": 3.9115646258503405e-05, |
|
"loss": 0.5831, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 8.91, |
|
"learning_rate": 3.892668178382464e-05, |
|
"loss": 0.5866, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"eval_accuracy": 0.7265429896539422, |
|
"eval_loss": 0.5592203736305237, |
|
"eval_runtime": 270.4426, |
|
"eval_samples_per_second": 41.458, |
|
"eval_steps_per_second": 0.163, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 3.8737717309145884e-05, |
|
"loss": 0.5808, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 9.11, |
|
"learning_rate": 3.854875283446712e-05, |
|
"loss": 0.5863, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 9.22, |
|
"learning_rate": 3.835978835978836e-05, |
|
"loss": 0.5783, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 9.32, |
|
"learning_rate": 3.81708238851096e-05, |
|
"loss": 0.578, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 9.42, |
|
"learning_rate": 3.798185941043084e-05, |
|
"loss": 0.569, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"learning_rate": 3.7792894935752085e-05, |
|
"loss": 0.5821, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 9.62, |
|
"learning_rate": 3.760393046107332e-05, |
|
"loss": 0.5827, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 9.72, |
|
"learning_rate": 3.7414965986394564e-05, |
|
"loss": 0.5817, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 9.82, |
|
"learning_rate": 3.7226001511715794e-05, |
|
"loss": 0.5774, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 9.92, |
|
"learning_rate": 3.7037037037037037e-05, |
|
"loss": 0.5807, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 9.99, |
|
"eval_accuracy": 0.7253835176596504, |
|
"eval_loss": 0.557357132434845, |
|
"eval_runtime": 270.5413, |
|
"eval_samples_per_second": 41.443, |
|
"eval_steps_per_second": 0.163, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 10.03, |
|
"learning_rate": 3.684807256235828e-05, |
|
"loss": 0.5883, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 10.13, |
|
"learning_rate": 3.6659108087679516e-05, |
|
"loss": 0.5711, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 10.23, |
|
"learning_rate": 3.647014361300076e-05, |
|
"loss": 0.5794, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 10.33, |
|
"learning_rate": 3.6281179138321995e-05, |
|
"loss": 0.5869, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 10.43, |
|
"learning_rate": 3.609221466364324e-05, |
|
"loss": 0.584, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 10.53, |
|
"learning_rate": 3.5903250188964474e-05, |
|
"loss": 0.5716, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 10.63, |
|
"learning_rate": 3.571428571428572e-05, |
|
"loss": 0.5783, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 10.73, |
|
"learning_rate": 3.552532123960696e-05, |
|
"loss": 0.577, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 10.84, |
|
"learning_rate": 3.5336356764928196e-05, |
|
"loss": 0.5779, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 10.94, |
|
"learning_rate": 3.514739229024944e-05, |
|
"loss": 0.5764, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.7244916161255798, |
|
"eval_loss": 0.565514862537384, |
|
"eval_runtime": 273.5069, |
|
"eval_samples_per_second": 40.993, |
|
"eval_steps_per_second": 0.161, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 11.04, |
|
"learning_rate": 3.4958427815570675e-05, |
|
"loss": 0.5806, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 11.14, |
|
"learning_rate": 3.476946334089191e-05, |
|
"loss": 0.5779, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 11.24, |
|
"learning_rate": 3.4580498866213154e-05, |
|
"loss": 0.5799, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 11.34, |
|
"learning_rate": 3.439153439153439e-05, |
|
"loss": 0.5844, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 11.44, |
|
"learning_rate": 3.4202569916855634e-05, |
|
"loss": 0.5715, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 11.54, |
|
"learning_rate": 3.401360544217687e-05, |
|
"loss": 0.5809, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 11.65, |
|
"learning_rate": 3.382464096749811e-05, |
|
"loss": 0.5695, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 11.75, |
|
"learning_rate": 3.363567649281935e-05, |
|
"loss": 0.576, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 11.85, |
|
"learning_rate": 3.344671201814059e-05, |
|
"loss": 0.5736, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 11.95, |
|
"learning_rate": 3.325774754346183e-05, |
|
"loss": 0.5729, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.7236889047449162, |
|
"eval_loss": 0.5611264109611511, |
|
"eval_runtime": 268.1746, |
|
"eval_samples_per_second": 41.809, |
|
"eval_steps_per_second": 0.164, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 12.05, |
|
"learning_rate": 3.306878306878307e-05, |
|
"loss": 0.5775, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 12.15, |
|
"learning_rate": 3.2879818594104314e-05, |
|
"loss": 0.5774, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 12.25, |
|
"learning_rate": 3.269085411942555e-05, |
|
"loss": 0.5751, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 12.35, |
|
"learning_rate": 3.250188964474679e-05, |
|
"loss": 0.5767, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 12.46, |
|
"learning_rate": 3.231292517006803e-05, |
|
"loss": 0.5717, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 12.56, |
|
"learning_rate": 3.2123960695389265e-05, |
|
"loss": 0.5738, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 12.66, |
|
"learning_rate": 3.193499622071051e-05, |
|
"loss": 0.5749, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 12.76, |
|
"learning_rate": 3.1746031746031745e-05, |
|
"loss": 0.5741, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 12.86, |
|
"learning_rate": 3.155706727135299e-05, |
|
"loss": 0.5789, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 12.96, |
|
"learning_rate": 3.1368102796674224e-05, |
|
"loss": 0.577, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 12.99, |
|
"eval_accuracy": 0.7188726364609347, |
|
"eval_loss": 0.5702112913131714, |
|
"eval_runtime": 267.6574, |
|
"eval_samples_per_second": 41.889, |
|
"eval_steps_per_second": 0.164, |
|
"step": 1283 |
|
}, |
|
{ |
|
"epoch": 13.06, |
|
"learning_rate": 3.117913832199547e-05, |
|
"loss": 0.5767, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 13.16, |
|
"learning_rate": 3.09901738473167e-05, |
|
"loss": 0.5727, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 13.27, |
|
"learning_rate": 3.0801209372637946e-05, |
|
"loss": 0.5702, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 13.37, |
|
"learning_rate": 3.061224489795919e-05, |
|
"loss": 0.5768, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 13.47, |
|
"learning_rate": 3.0423280423280425e-05, |
|
"loss": 0.5708, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 13.57, |
|
"learning_rate": 3.0234315948601665e-05, |
|
"loss": 0.5708, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 13.67, |
|
"learning_rate": 3.0045351473922904e-05, |
|
"loss": 0.5677, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 13.77, |
|
"learning_rate": 2.9856386999244147e-05, |
|
"loss": 0.5755, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 13.87, |
|
"learning_rate": 2.9667422524565387e-05, |
|
"loss": 0.5731, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 13.97, |
|
"learning_rate": 2.947845804988662e-05, |
|
"loss": 0.5702, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 13.99, |
|
"eval_accuracy": 0.7259186585800927, |
|
"eval_loss": 0.5587979555130005, |
|
"eval_runtime": 267.9851, |
|
"eval_samples_per_second": 41.838, |
|
"eval_steps_per_second": 0.164, |
|
"step": 1382 |
|
}, |
|
{ |
|
"epoch": 14.08, |
|
"learning_rate": 2.928949357520786e-05, |
|
"loss": 0.5713, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 14.18, |
|
"learning_rate": 2.91005291005291e-05, |
|
"loss": 0.5648, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 14.28, |
|
"learning_rate": 2.891156462585034e-05, |
|
"loss": 0.5715, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 14.38, |
|
"learning_rate": 2.872260015117158e-05, |
|
"loss": 0.5711, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 14.48, |
|
"learning_rate": 2.853363567649282e-05, |
|
"loss": 0.5598, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 14.58, |
|
"learning_rate": 2.834467120181406e-05, |
|
"loss": 0.5746, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 14.68, |
|
"learning_rate": 2.81557067271353e-05, |
|
"loss": 0.5702, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 14.78, |
|
"learning_rate": 2.796674225245654e-05, |
|
"loss": 0.58, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 14.89, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.5706, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 14.99, |
|
"learning_rate": 2.758881330309902e-05, |
|
"loss": 0.5717, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.7244024259721726, |
|
"eval_loss": 0.5564510226249695, |
|
"eval_runtime": 273.2108, |
|
"eval_samples_per_second": 41.038, |
|
"eval_steps_per_second": 0.161, |
|
"step": 1481 |
|
}, |
|
{ |
|
"epoch": 15.09, |
|
"learning_rate": 2.739984882842026e-05, |
|
"loss": 0.5678, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 15.19, |
|
"learning_rate": 2.72108843537415e-05, |
|
"loss": 0.5653, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 15.29, |
|
"learning_rate": 2.7021919879062734e-05, |
|
"loss": 0.5599, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 15.39, |
|
"learning_rate": 2.6832955404383974e-05, |
|
"loss": 0.5745, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 15.49, |
|
"learning_rate": 2.6643990929705213e-05, |
|
"loss": 0.5701, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 15.59, |
|
"learning_rate": 2.6455026455026456e-05, |
|
"loss": 0.5715, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 15.7, |
|
"learning_rate": 2.6266061980347696e-05, |
|
"loss": 0.5677, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 15.8, |
|
"learning_rate": 2.6077097505668935e-05, |
|
"loss": 0.5749, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 15.9, |
|
"learning_rate": 2.5888133030990175e-05, |
|
"loss": 0.5741, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 2.5699168556311414e-05, |
|
"loss": 0.5646, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.7302889760970389, |
|
"eval_loss": 0.5536319017410278, |
|
"eval_runtime": 270.439, |
|
"eval_samples_per_second": 41.459, |
|
"eval_steps_per_second": 0.163, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 16.1, |
|
"learning_rate": 2.5510204081632654e-05, |
|
"loss": 0.5717, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 16.2, |
|
"learning_rate": 2.5321239606953894e-05, |
|
"loss": 0.565, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 16.3, |
|
"learning_rate": 2.5132275132275137e-05, |
|
"loss": 0.5749, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 16.41, |
|
"learning_rate": 2.4943310657596373e-05, |
|
"loss": 0.5701, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 16.51, |
|
"learning_rate": 2.4754346182917612e-05, |
|
"loss": 0.5717, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 16.61, |
|
"learning_rate": 2.4565381708238852e-05, |
|
"loss": 0.5641, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 16.71, |
|
"learning_rate": 2.437641723356009e-05, |
|
"loss": 0.5663, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 16.81, |
|
"learning_rate": 2.418745275888133e-05, |
|
"loss": 0.5674, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 16.91, |
|
"learning_rate": 2.399848828420257e-05, |
|
"loss": 0.5591, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 16.99, |
|
"eval_accuracy": 0.7344809133071709, |
|
"eval_loss": 0.5525398254394531, |
|
"eval_runtime": 269.9721, |
|
"eval_samples_per_second": 41.53, |
|
"eval_steps_per_second": 0.163, |
|
"step": 1678 |
|
}, |
|
{ |
|
"epoch": 17.01, |
|
"learning_rate": 2.380952380952381e-05, |
|
"loss": 0.5754, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 17.11, |
|
"learning_rate": 2.362055933484505e-05, |
|
"loss": 0.5734, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 17.22, |
|
"learning_rate": 2.343159486016629e-05, |
|
"loss": 0.5671, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 17.32, |
|
"learning_rate": 2.324263038548753e-05, |
|
"loss": 0.5663, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 17.42, |
|
"learning_rate": 2.305366591080877e-05, |
|
"loss": 0.5707, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 17.52, |
|
"learning_rate": 2.2864701436130008e-05, |
|
"loss": 0.5635, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 17.62, |
|
"learning_rate": 2.267573696145125e-05, |
|
"loss": 0.569, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 17.72, |
|
"learning_rate": 2.2486772486772487e-05, |
|
"loss": 0.5675, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 17.82, |
|
"learning_rate": 2.2297808012093727e-05, |
|
"loss": 0.5623, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 17.92, |
|
"learning_rate": 2.2108843537414966e-05, |
|
"loss": 0.5586, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 17.99, |
|
"eval_accuracy": 0.7285943631823046, |
|
"eval_loss": 0.5565158724784851, |
|
"eval_runtime": 267.0513, |
|
"eval_samples_per_second": 41.984, |
|
"eval_steps_per_second": 0.165, |
|
"step": 1777 |
|
}, |
|
{ |
|
"epoch": 18.03, |
|
"learning_rate": 2.1919879062736206e-05, |
|
"loss": 0.5606, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 18.13, |
|
"learning_rate": 2.1730914588057446e-05, |
|
"loss": 0.5611, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 18.23, |
|
"learning_rate": 2.1541950113378685e-05, |
|
"loss": 0.5693, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 18.33, |
|
"learning_rate": 2.1352985638699928e-05, |
|
"loss": 0.559, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 18.43, |
|
"learning_rate": 2.1164021164021164e-05, |
|
"loss": 0.5673, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 18.53, |
|
"learning_rate": 2.0975056689342404e-05, |
|
"loss": 0.5635, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 18.63, |
|
"learning_rate": 2.0786092214663643e-05, |
|
"loss": 0.567, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 18.73, |
|
"learning_rate": 2.0597127739984883e-05, |
|
"loss": 0.5637, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 18.84, |
|
"learning_rate": 2.0408163265306123e-05, |
|
"loss": 0.5648, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 18.94, |
|
"learning_rate": 2.0219198790627365e-05, |
|
"loss": 0.5668, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.7303781662504459, |
|
"eval_loss": 0.5519587993621826, |
|
"eval_runtime": 272.9351, |
|
"eval_samples_per_second": 41.079, |
|
"eval_steps_per_second": 0.161, |
|
"step": 1876 |
|
}, |
|
{ |
|
"epoch": 19.04, |
|
"learning_rate": 2.0030234315948605e-05, |
|
"loss": 0.5628, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 19.14, |
|
"learning_rate": 1.984126984126984e-05, |
|
"loss": 0.5642, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 19.24, |
|
"learning_rate": 1.965230536659108e-05, |
|
"loss": 0.5563, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 19.34, |
|
"learning_rate": 1.946334089191232e-05, |
|
"loss": 0.5642, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 19.44, |
|
"learning_rate": 1.927437641723356e-05, |
|
"loss": 0.5618, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 19.54, |
|
"learning_rate": 1.90854119425548e-05, |
|
"loss": 0.5578, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 19.65, |
|
"learning_rate": 1.8896447467876043e-05, |
|
"loss": 0.5636, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 19.75, |
|
"learning_rate": 1.8707482993197282e-05, |
|
"loss": 0.5749, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 19.85, |
|
"learning_rate": 1.8518518518518518e-05, |
|
"loss": 0.5628, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 19.95, |
|
"learning_rate": 1.8329554043839758e-05, |
|
"loss": 0.5617, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.7288619336425258, |
|
"eval_loss": 0.5557389855384827, |
|
"eval_runtime": 270.9737, |
|
"eval_samples_per_second": 41.377, |
|
"eval_steps_per_second": 0.162, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 20.05, |
|
"learning_rate": 1.8140589569160997e-05, |
|
"loss": 0.5541, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 20.15, |
|
"learning_rate": 1.7951625094482237e-05, |
|
"loss": 0.5693, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 20.25, |
|
"learning_rate": 1.776266061980348e-05, |
|
"loss": 0.5652, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 20.35, |
|
"learning_rate": 1.757369614512472e-05, |
|
"loss": 0.5579, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 20.46, |
|
"learning_rate": 1.7384731670445956e-05, |
|
"loss": 0.5597, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 20.56, |
|
"learning_rate": 1.7195767195767195e-05, |
|
"loss": 0.5674, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 20.66, |
|
"learning_rate": 1.7006802721088435e-05, |
|
"loss": 0.5634, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 20.76, |
|
"learning_rate": 1.6817838246409674e-05, |
|
"loss": 0.567, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 20.86, |
|
"learning_rate": 1.6628873771730914e-05, |
|
"loss": 0.5571, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 20.96, |
|
"learning_rate": 1.6439909297052157e-05, |
|
"loss": 0.5546, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 20.99, |
|
"eval_accuracy": 0.7325187299322155, |
|
"eval_loss": 0.5561436414718628, |
|
"eval_runtime": 269.5801, |
|
"eval_samples_per_second": 41.591, |
|
"eval_steps_per_second": 0.163, |
|
"step": 2073 |
|
}, |
|
{ |
|
"epoch": 21.06, |
|
"learning_rate": 1.6250944822373397e-05, |
|
"loss": 0.5589, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 21.16, |
|
"learning_rate": 1.6061980347694633e-05, |
|
"loss": 0.5658, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 21.27, |
|
"learning_rate": 1.5873015873015872e-05, |
|
"loss": 0.5596, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 21.37, |
|
"learning_rate": 1.5684051398337112e-05, |
|
"loss": 0.5618, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 21.47, |
|
"learning_rate": 1.549508692365835e-05, |
|
"loss": 0.5624, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 21.57, |
|
"learning_rate": 1.5306122448979594e-05, |
|
"loss": 0.5619, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 21.67, |
|
"learning_rate": 1.5117157974300832e-05, |
|
"loss": 0.5554, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 21.77, |
|
"learning_rate": 1.4928193499622074e-05, |
|
"loss": 0.5539, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 21.87, |
|
"learning_rate": 1.473922902494331e-05, |
|
"loss": 0.5657, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 21.97, |
|
"learning_rate": 1.455026455026455e-05, |
|
"loss": 0.5579, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 21.99, |
|
"eval_accuracy": 0.7314484480913307, |
|
"eval_loss": 0.5537222623825073, |
|
"eval_runtime": 267.4411, |
|
"eval_samples_per_second": 41.923, |
|
"eval_steps_per_second": 0.165, |
|
"step": 2172 |
|
}, |
|
{ |
|
"epoch": 22.08, |
|
"learning_rate": 1.436130007558579e-05, |
|
"loss": 0.554, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 22.18, |
|
"learning_rate": 1.417233560090703e-05, |
|
"loss": 0.569, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 22.28, |
|
"learning_rate": 1.398337112622827e-05, |
|
"loss": 0.5528, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 22.38, |
|
"learning_rate": 1.379440665154951e-05, |
|
"loss": 0.5649, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 22.48, |
|
"learning_rate": 1.360544217687075e-05, |
|
"loss": 0.5586, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 22.58, |
|
"learning_rate": 1.3416477702191987e-05, |
|
"loss": 0.5606, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 22.68, |
|
"learning_rate": 1.3227513227513228e-05, |
|
"loss": 0.5551, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 22.78, |
|
"learning_rate": 1.3038548752834468e-05, |
|
"loss": 0.5588, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 22.89, |
|
"learning_rate": 1.2849584278155707e-05, |
|
"loss": 0.5591, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 22.99, |
|
"learning_rate": 1.2660619803476947e-05, |
|
"loss": 0.5604, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.72904031394934, |
|
"eval_loss": 0.5545207262039185, |
|
"eval_runtime": 270.2678, |
|
"eval_samples_per_second": 41.485, |
|
"eval_steps_per_second": 0.163, |
|
"step": 2271 |
|
}, |
|
{ |
|
"epoch": 23.09, |
|
"learning_rate": 1.2471655328798186e-05, |
|
"loss": 0.5588, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 23.19, |
|
"learning_rate": 1.2282690854119426e-05, |
|
"loss": 0.5499, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 23.29, |
|
"learning_rate": 1.2093726379440666e-05, |
|
"loss": 0.5495, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 23.39, |
|
"learning_rate": 1.1904761904761905e-05, |
|
"loss": 0.5571, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 23.49, |
|
"learning_rate": 1.1715797430083145e-05, |
|
"loss": 0.5545, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 23.59, |
|
"learning_rate": 1.1526832955404384e-05, |
|
"loss": 0.5625, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 23.7, |
|
"learning_rate": 1.1337868480725626e-05, |
|
"loss": 0.5573, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 23.8, |
|
"learning_rate": 1.1148904006046863e-05, |
|
"loss": 0.5623, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 23.9, |
|
"learning_rate": 1.0959939531368103e-05, |
|
"loss": 0.5527, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"learning_rate": 1.0770975056689343e-05, |
|
"loss": 0.5563, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.7287727434891188, |
|
"eval_loss": 0.5590741634368896, |
|
"eval_runtime": 269.4039, |
|
"eval_samples_per_second": 41.618, |
|
"eval_steps_per_second": 0.163, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 24.1, |
|
"learning_rate": 1.0582010582010582e-05, |
|
"loss": 0.5586, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 24.2, |
|
"learning_rate": 1.0393046107331822e-05, |
|
"loss": 0.5424, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 24.3, |
|
"learning_rate": 1.0204081632653061e-05, |
|
"loss": 0.5598, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 24.41, |
|
"learning_rate": 1.0015117157974303e-05, |
|
"loss": 0.5528, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 24.51, |
|
"learning_rate": 9.82615268329554e-06, |
|
"loss": 0.5521, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 24.61, |
|
"learning_rate": 9.63718820861678e-06, |
|
"loss": 0.5524, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 24.71, |
|
"learning_rate": 9.448223733938021e-06, |
|
"loss": 0.5602, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 24.81, |
|
"learning_rate": 9.259259259259259e-06, |
|
"loss": 0.5608, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 24.91, |
|
"learning_rate": 9.070294784580499e-06, |
|
"loss": 0.5634, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 24.99, |
|
"eval_accuracy": 0.7307349268640742, |
|
"eval_loss": 0.5545657277107239, |
|
"eval_runtime": 273.5576, |
|
"eval_samples_per_second": 40.986, |
|
"eval_steps_per_second": 0.161, |
|
"step": 2468 |
|
}, |
|
{ |
|
"epoch": 25.01, |
|
"learning_rate": 8.88133030990174e-06, |
|
"loss": 0.5516, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 25.11, |
|
"learning_rate": 8.692365835222978e-06, |
|
"loss": 0.5489, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 25.22, |
|
"learning_rate": 8.503401360544217e-06, |
|
"loss": 0.5562, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 25.32, |
|
"learning_rate": 8.314436885865457e-06, |
|
"loss": 0.5498, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 25.42, |
|
"learning_rate": 8.125472411186698e-06, |
|
"loss": 0.5578, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 25.52, |
|
"learning_rate": 7.936507936507936e-06, |
|
"loss": 0.551, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 25.62, |
|
"learning_rate": 7.747543461829176e-06, |
|
"loss": 0.5572, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 25.72, |
|
"learning_rate": 7.558578987150416e-06, |
|
"loss": 0.5523, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 25.82, |
|
"learning_rate": 7.369614512471655e-06, |
|
"loss": 0.5525, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 25.92, |
|
"learning_rate": 7.180650037792895e-06, |
|
"loss": 0.5563, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 25.99, |
|
"eval_accuracy": 0.7302889760970389, |
|
"eval_loss": 0.5556601285934448, |
|
"eval_runtime": 270.2761, |
|
"eval_samples_per_second": 41.484, |
|
"eval_steps_per_second": 0.163, |
|
"step": 2567 |
|
}, |
|
{ |
|
"epoch": 26.03, |
|
"learning_rate": 6.991685563114135e-06, |
|
"loss": 0.5607, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 26.13, |
|
"learning_rate": 6.802721088435375e-06, |
|
"loss": 0.5555, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 26.23, |
|
"learning_rate": 6.613756613756614e-06, |
|
"loss": 0.5527, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 26.33, |
|
"learning_rate": 6.424792139077854e-06, |
|
"loss": 0.5549, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 26.43, |
|
"learning_rate": 6.235827664399093e-06, |
|
"loss": 0.5435, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 26.53, |
|
"learning_rate": 6.046863189720333e-06, |
|
"loss": 0.5511, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 26.63, |
|
"learning_rate": 5.857898715041572e-06, |
|
"loss": 0.5629, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 26.73, |
|
"learning_rate": 5.668934240362813e-06, |
|
"loss": 0.5527, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 26.84, |
|
"learning_rate": 5.4799697656840515e-06, |
|
"loss": 0.5541, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 26.94, |
|
"learning_rate": 5.291005291005291e-06, |
|
"loss": 0.5563, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.727613271494827, |
|
"eval_loss": 0.5571199655532837, |
|
"eval_runtime": 269.8955, |
|
"eval_samples_per_second": 41.542, |
|
"eval_steps_per_second": 0.163, |
|
"step": 2666 |
|
}, |
|
{ |
|
"epoch": 27.04, |
|
"learning_rate": 5.102040816326531e-06, |
|
"loss": 0.553, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 27.14, |
|
"learning_rate": 4.91307634164777e-06, |
|
"loss": 0.5502, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 27.24, |
|
"learning_rate": 4.724111866969011e-06, |
|
"loss": 0.55, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 27.34, |
|
"learning_rate": 4.535147392290249e-06, |
|
"loss": 0.5527, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 27.44, |
|
"learning_rate": 4.346182917611489e-06, |
|
"loss": 0.5462, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 27.54, |
|
"learning_rate": 4.1572184429327285e-06, |
|
"loss": 0.5502, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 27.65, |
|
"learning_rate": 3.968253968253968e-06, |
|
"loss": 0.5533, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 27.75, |
|
"learning_rate": 3.779289493575208e-06, |
|
"loss": 0.5585, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 27.85, |
|
"learning_rate": 3.5903250188964477e-06, |
|
"loss": 0.556, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 27.95, |
|
"learning_rate": 3.4013605442176877e-06, |
|
"loss": 0.5544, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.7298430253300036, |
|
"eval_loss": 0.5550753474235535, |
|
"eval_runtime": 273.4563, |
|
"eval_samples_per_second": 41.001, |
|
"eval_steps_per_second": 0.161, |
|
"step": 2765 |
|
}, |
|
{ |
|
"epoch": 28.05, |
|
"learning_rate": 3.212396069538927e-06, |
|
"loss": 0.553, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 28.15, |
|
"learning_rate": 3.0234315948601664e-06, |
|
"loss": 0.5534, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 28.25, |
|
"learning_rate": 2.8344671201814064e-06, |
|
"loss": 0.5488, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 28.35, |
|
"learning_rate": 2.6455026455026455e-06, |
|
"loss": 0.5553, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 28.46, |
|
"learning_rate": 2.456538170823885e-06, |
|
"loss": 0.5503, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 28.56, |
|
"learning_rate": 2.2675736961451247e-06, |
|
"loss": 0.5493, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 28.66, |
|
"learning_rate": 2.0786092214663643e-06, |
|
"loss": 0.5474, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 28.76, |
|
"learning_rate": 1.889644746787604e-06, |
|
"loss": 0.5477, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 28.86, |
|
"learning_rate": 1.7006802721088438e-06, |
|
"loss": 0.5565, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 28.96, |
|
"learning_rate": 1.5117157974300832e-06, |
|
"loss": 0.5491, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 28.99, |
|
"eval_accuracy": 0.7282376025686764, |
|
"eval_loss": 0.5596103668212891, |
|
"eval_runtime": 270.7488, |
|
"eval_samples_per_second": 41.411, |
|
"eval_steps_per_second": 0.163, |
|
"step": 2863 |
|
}, |
|
{ |
|
"epoch": 29.06, |
|
"learning_rate": 1.3227513227513228e-06, |
|
"loss": 0.5508, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 29.16, |
|
"learning_rate": 1.1337868480725623e-06, |
|
"loss": 0.5513, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 29.27, |
|
"learning_rate": 9.44822373393802e-07, |
|
"loss": 0.5517, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 29.37, |
|
"learning_rate": 7.558578987150416e-07, |
|
"loss": 0.5575, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 29.47, |
|
"learning_rate": 5.668934240362812e-07, |
|
"loss": 0.5519, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 29.57, |
|
"learning_rate": 3.779289493575208e-07, |
|
"loss": 0.5549, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 29.67, |
|
"learning_rate": 1.889644746787604e-07, |
|
"loss": 0.5478, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 29.77, |
|
"learning_rate": 0.0, |
|
"loss": 0.5461, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 29.77, |
|
"eval_accuracy": 0.7302889760970389, |
|
"eval_loss": 0.5574254989624023, |
|
"eval_runtime": 267.0764, |
|
"eval_samples_per_second": 41.98, |
|
"eval_steps_per_second": 0.165, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 29.77, |
|
"step": 2940, |
|
"total_flos": 7.46776315809736e+19, |
|
"train_loss": 0.5755378539870385, |
|
"train_runtime": 78104.0618, |
|
"train_samples_per_second": 38.759, |
|
"train_steps_per_second": 0.038 |
|
} |
|
], |
|
"max_steps": 2940, |
|
"num_train_epochs": 30, |
|
"total_flos": 7.46776315809736e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|