{ "best_metric": 0.8251666013328106, "best_model_checkpoint": "videomae-large-cctv-brawl_extended_v1/checkpoint-12565", "epoch": 4.199681655391962, "eval_steps": 500, "global_step": 12565, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2.386634844868735e-09, "loss": 0.7489, "step": 10 }, { "epoch": 0.0, "learning_rate": 4.77326968973747e-09, "loss": 0.724, "step": 20 }, { "epoch": 0.0, "learning_rate": 7.159904534606205e-09, "loss": 0.7649, "step": 30 }, { "epoch": 0.0, "learning_rate": 9.54653937947494e-09, "loss": 0.6728, "step": 40 }, { "epoch": 0.0, "learning_rate": 1.1933174224343675e-08, "loss": 0.7368, "step": 50 }, { "epoch": 0.0, "learning_rate": 1.431980906921241e-08, "loss": 0.7369, "step": 60 }, { "epoch": 0.01, "learning_rate": 1.6706443914081144e-08, "loss": 0.7463, "step": 70 }, { "epoch": 0.01, "learning_rate": 1.909307875894988e-08, "loss": 0.6903, "step": 80 }, { "epoch": 0.01, "learning_rate": 2.1479713603818616e-08, "loss": 0.725, "step": 90 }, { "epoch": 0.01, "learning_rate": 2.386634844868735e-08, "loss": 0.7142, "step": 100 }, { "epoch": 0.01, "learning_rate": 2.6252983293556084e-08, "loss": 0.7131, "step": 110 }, { "epoch": 0.01, "learning_rate": 2.863961813842482e-08, "loss": 0.7178, "step": 120 }, { "epoch": 0.01, "learning_rate": 3.102625298329355e-08, "loss": 0.6786, "step": 130 }, { "epoch": 0.01, "learning_rate": 3.341288782816229e-08, "loss": 0.7279, "step": 140 }, { "epoch": 0.01, "learning_rate": 3.579952267303102e-08, "loss": 0.6817, "step": 150 }, { "epoch": 0.01, "learning_rate": 3.818615751789976e-08, "loss": 0.6895, "step": 160 }, { "epoch": 0.01, "learning_rate": 4.05727923627685e-08, "loss": 0.6714, "step": 170 }, { "epoch": 0.01, "learning_rate": 4.295942720763723e-08, "loss": 0.7969, "step": 180 }, { "epoch": 0.02, "learning_rate": 4.5346062052505965e-08, "loss": 0.6794, "step": 190 }, { "epoch": 0.02, "learning_rate": 4.77326968973747e-08, "loss": 0.7054, "step": 200 }, { "epoch": 0.02, "learning_rate": 5.0119331742243434e-08, "loss": 0.7599, "step": 210 }, { "epoch": 0.02, "learning_rate": 5.250596658711217e-08, "loss": 0.7148, "step": 220 }, { "epoch": 0.02, "learning_rate": 5.48926014319809e-08, "loss": 0.7105, "step": 230 }, { "epoch": 0.02, "learning_rate": 5.727923627684964e-08, "loss": 0.7208, "step": 240 }, { "epoch": 0.02, "learning_rate": 5.966587112171838e-08, "loss": 0.6222, "step": 250 }, { "epoch": 0.02, "learning_rate": 6.20525059665871e-08, "loss": 0.6742, "step": 260 }, { "epoch": 0.02, "learning_rate": 6.443914081145585e-08, "loss": 0.661, "step": 270 }, { "epoch": 0.02, "learning_rate": 6.682577565632457e-08, "loss": 0.722, "step": 280 }, { "epoch": 0.02, "learning_rate": 6.921241050119332e-08, "loss": 0.7342, "step": 290 }, { "epoch": 0.02, "learning_rate": 7.159904534606204e-08, "loss": 0.6948, "step": 300 }, { "epoch": 0.02, "learning_rate": 7.398568019093078e-08, "loss": 0.7301, "step": 310 }, { "epoch": 0.03, "learning_rate": 7.637231503579952e-08, "loss": 0.6596, "step": 320 }, { "epoch": 0.03, "learning_rate": 7.875894988066825e-08, "loss": 0.6857, "step": 330 }, { "epoch": 0.03, "learning_rate": 8.1145584725537e-08, "loss": 0.7129, "step": 340 }, { "epoch": 0.03, "learning_rate": 8.353221957040572e-08, "loss": 0.6889, "step": 350 }, { "epoch": 0.03, "learning_rate": 8.591885441527446e-08, "loss": 0.6673, "step": 360 }, { "epoch": 0.03, "learning_rate": 8.830548926014319e-08, "loss": 0.6823, "step": 370 }, { "epoch": 0.03, "learning_rate": 9.069212410501193e-08, "loss": 0.6655, "step": 380 }, { "epoch": 0.03, "learning_rate": 9.307875894988066e-08, "loss": 0.6166, "step": 390 }, { "epoch": 0.03, "learning_rate": 9.54653937947494e-08, "loss": 0.6371, "step": 400 }, { "epoch": 0.03, "learning_rate": 9.785202863961813e-08, "loss": 0.704, "step": 410 }, { "epoch": 0.03, "learning_rate": 1.0023866348448687e-07, "loss": 0.7303, "step": 420 }, { "epoch": 0.03, "learning_rate": 1.0262529832935561e-07, "loss": 0.709, "step": 430 }, { "epoch": 0.04, "learning_rate": 1.0501193317422434e-07, "loss": 0.6405, "step": 440 }, { "epoch": 0.04, "learning_rate": 1.0739856801909308e-07, "loss": 0.6122, "step": 450 }, { "epoch": 0.04, "learning_rate": 1.097852028639618e-07, "loss": 0.6458, "step": 460 }, { "epoch": 0.04, "learning_rate": 1.1217183770883055e-07, "loss": 0.6574, "step": 470 }, { "epoch": 0.04, "learning_rate": 1.1455847255369927e-07, "loss": 0.6348, "step": 480 }, { "epoch": 0.04, "learning_rate": 1.1694510739856802e-07, "loss": 0.6998, "step": 490 }, { "epoch": 0.04, "learning_rate": 1.1933174224343676e-07, "loss": 0.6245, "step": 500 }, { "epoch": 0.04, "learning_rate": 1.2171837708830548e-07, "loss": 0.6499, "step": 510 }, { "epoch": 0.04, "learning_rate": 1.241050119331742e-07, "loss": 0.5856, "step": 520 }, { "epoch": 0.04, "learning_rate": 1.2649164677804294e-07, "loss": 0.7143, "step": 530 }, { "epoch": 0.04, "learning_rate": 1.288782816229117e-07, "loss": 0.6285, "step": 540 }, { "epoch": 0.04, "learning_rate": 1.3126491646778042e-07, "loss": 0.6485, "step": 550 }, { "epoch": 0.04, "learning_rate": 1.3365155131264915e-07, "loss": 0.6322, "step": 560 }, { "epoch": 0.05, "learning_rate": 1.3603818615751788e-07, "loss": 0.6228, "step": 570 }, { "epoch": 0.05, "learning_rate": 1.3842482100238663e-07, "loss": 0.6469, "step": 580 }, { "epoch": 0.05, "learning_rate": 1.4081145584725536e-07, "loss": 0.6374, "step": 590 }, { "epoch": 0.05, "learning_rate": 1.4319809069212409e-07, "loss": 0.6418, "step": 600 }, { "epoch": 0.05, "learning_rate": 1.4558472553699284e-07, "loss": 0.6212, "step": 610 }, { "epoch": 0.05, "learning_rate": 1.4797136038186157e-07, "loss": 0.66, "step": 620 }, { "epoch": 0.05, "learning_rate": 1.503579952267303e-07, "loss": 0.5956, "step": 630 }, { "epoch": 0.05, "learning_rate": 1.5274463007159905e-07, "loss": 0.6426, "step": 640 }, { "epoch": 0.05, "learning_rate": 1.5513126491646775e-07, "loss": 0.6602, "step": 650 }, { "epoch": 0.05, "learning_rate": 1.575178997613365e-07, "loss": 0.633, "step": 660 }, { "epoch": 0.05, "learning_rate": 1.5990453460620523e-07, "loss": 0.6016, "step": 670 }, { "epoch": 0.05, "learning_rate": 1.62291169451074e-07, "loss": 0.6235, "step": 680 }, { "epoch": 0.05, "learning_rate": 1.646778042959427e-07, "loss": 0.579, "step": 690 }, { "epoch": 0.06, "learning_rate": 1.6706443914081144e-07, "loss": 0.5734, "step": 700 }, { "epoch": 0.06, "learning_rate": 1.6945107398568017e-07, "loss": 0.5904, "step": 710 }, { "epoch": 0.06, "learning_rate": 1.7183770883054892e-07, "loss": 0.5944, "step": 720 }, { "epoch": 0.06, "learning_rate": 1.7422434367541765e-07, "loss": 0.6764, "step": 730 }, { "epoch": 0.06, "learning_rate": 1.7661097852028638e-07, "loss": 0.5815, "step": 740 }, { "epoch": 0.06, "learning_rate": 1.789976133651551e-07, "loss": 0.5883, "step": 750 }, { "epoch": 0.06, "learning_rate": 1.8138424821002386e-07, "loss": 0.6033, "step": 760 }, { "epoch": 0.06, "learning_rate": 1.837708830548926e-07, "loss": 0.5302, "step": 770 }, { "epoch": 0.06, "learning_rate": 1.8615751789976132e-07, "loss": 0.5989, "step": 780 }, { "epoch": 0.06, "learning_rate": 1.8854415274463004e-07, "loss": 0.5534, "step": 790 }, { "epoch": 0.06, "learning_rate": 1.909307875894988e-07, "loss": 0.6403, "step": 800 }, { "epoch": 0.06, "learning_rate": 1.9331742243436753e-07, "loss": 0.4963, "step": 810 }, { "epoch": 0.07, "learning_rate": 1.9570405727923625e-07, "loss": 0.653, "step": 820 }, { "epoch": 0.07, "learning_rate": 1.98090692124105e-07, "loss": 0.606, "step": 830 }, { "epoch": 0.07, "learning_rate": 2.0047732696897374e-07, "loss": 0.6209, "step": 840 }, { "epoch": 0.07, "learning_rate": 2.028639618138425e-07, "loss": 0.5385, "step": 850 }, { "epoch": 0.07, "learning_rate": 2.0525059665871122e-07, "loss": 0.557, "step": 860 }, { "epoch": 0.07, "learning_rate": 2.0763723150357995e-07, "loss": 0.6703, "step": 870 }, { "epoch": 0.07, "learning_rate": 2.1002386634844867e-07, "loss": 0.5217, "step": 880 }, { "epoch": 0.07, "learning_rate": 2.1241050119331743e-07, "loss": 0.5697, "step": 890 }, { "epoch": 0.07, "learning_rate": 2.1479713603818616e-07, "loss": 0.5365, "step": 900 }, { "epoch": 0.07, "learning_rate": 2.1718377088305488e-07, "loss": 0.56, "step": 910 }, { "epoch": 0.07, "learning_rate": 2.195704057279236e-07, "loss": 0.4775, "step": 920 }, { "epoch": 0.07, "learning_rate": 2.2195704057279237e-07, "loss": 0.5328, "step": 930 }, { "epoch": 0.07, "learning_rate": 2.243436754176611e-07, "loss": 0.6135, "step": 940 }, { "epoch": 0.08, "learning_rate": 2.2673031026252982e-07, "loss": 0.5732, "step": 950 }, { "epoch": 0.08, "learning_rate": 2.2911694510739855e-07, "loss": 0.5636, "step": 960 }, { "epoch": 0.08, "learning_rate": 2.315035799522673e-07, "loss": 0.6081, "step": 970 }, { "epoch": 0.08, "learning_rate": 2.3389021479713603e-07, "loss": 0.619, "step": 980 }, { "epoch": 0.08, "learning_rate": 2.3627684964200478e-07, "loss": 0.5916, "step": 990 }, { "epoch": 0.08, "learning_rate": 2.386634844868735e-07, "loss": 0.6113, "step": 1000 }, { "epoch": 0.08, "learning_rate": 2.4105011933174227e-07, "loss": 0.6695, "step": 1010 }, { "epoch": 0.08, "learning_rate": 2.4343675417661097e-07, "loss": 0.559, "step": 1020 }, { "epoch": 0.08, "learning_rate": 2.458233890214797e-07, "loss": 0.5641, "step": 1030 }, { "epoch": 0.08, "learning_rate": 2.482100238663484e-07, "loss": 0.5845, "step": 1040 }, { "epoch": 0.08, "learning_rate": 2.505966587112172e-07, "loss": 0.6465, "step": 1050 }, { "epoch": 0.08, "learning_rate": 2.529832935560859e-07, "loss": 0.5213, "step": 1060 }, { "epoch": 0.09, "learning_rate": 2.5536992840095463e-07, "loss": 0.5663, "step": 1070 }, { "epoch": 0.09, "learning_rate": 2.577565632458234e-07, "loss": 0.6348, "step": 1080 }, { "epoch": 0.09, "learning_rate": 2.6014319809069214e-07, "loss": 0.5389, "step": 1090 }, { "epoch": 0.09, "learning_rate": 2.6252983293556084e-07, "loss": 0.6299, "step": 1100 }, { "epoch": 0.09, "learning_rate": 2.649164677804296e-07, "loss": 0.5185, "step": 1110 }, { "epoch": 0.09, "learning_rate": 2.673031026252983e-07, "loss": 0.4967, "step": 1120 }, { "epoch": 0.09, "learning_rate": 2.6968973747016705e-07, "loss": 0.5323, "step": 1130 }, { "epoch": 0.09, "learning_rate": 2.7207637231503575e-07, "loss": 0.4797, "step": 1140 }, { "epoch": 0.09, "learning_rate": 2.744630071599045e-07, "loss": 0.591, "step": 1150 }, { "epoch": 0.09, "learning_rate": 2.7684964200477326e-07, "loss": 0.488, "step": 1160 }, { "epoch": 0.09, "learning_rate": 2.79236276849642e-07, "loss": 0.4519, "step": 1170 }, { "epoch": 0.09, "learning_rate": 2.816229116945107e-07, "loss": 0.5839, "step": 1180 }, { "epoch": 0.09, "learning_rate": 2.8400954653937947e-07, "loss": 0.5682, "step": 1190 }, { "epoch": 0.1, "learning_rate": 2.8639618138424817e-07, "loss": 0.4765, "step": 1200 }, { "epoch": 0.1, "learning_rate": 2.8878281622911693e-07, "loss": 0.6187, "step": 1210 }, { "epoch": 0.1, "learning_rate": 2.911694510739857e-07, "loss": 0.4894, "step": 1220 }, { "epoch": 0.1, "learning_rate": 2.935560859188544e-07, "loss": 0.5424, "step": 1230 }, { "epoch": 0.1, "learning_rate": 2.9594272076372314e-07, "loss": 0.5351, "step": 1240 }, { "epoch": 0.1, "learning_rate": 2.983293556085919e-07, "loss": 0.5059, "step": 1250 }, { "epoch": 0.1, "learning_rate": 2.999204103289706e-07, "loss": 0.5178, "step": 1260 }, { "epoch": 0.1, "learning_rate": 2.996551114255394e-07, "loss": 0.3589, "step": 1270 }, { "epoch": 0.1, "learning_rate": 2.9938981252210824e-07, "loss": 0.5448, "step": 1280 }, { "epoch": 0.1, "learning_rate": 2.9912451361867705e-07, "loss": 0.4891, "step": 1290 }, { "epoch": 0.1, "learning_rate": 2.988592147152458e-07, "loss": 0.5095, "step": 1300 }, { "epoch": 0.1, "learning_rate": 2.9859391581181463e-07, "loss": 0.5336, "step": 1310 }, { "epoch": 0.11, "learning_rate": 2.9832861690838345e-07, "loss": 0.5037, "step": 1320 }, { "epoch": 0.11, "learning_rate": 2.980633180049522e-07, "loss": 0.4463, "step": 1330 }, { "epoch": 0.11, "learning_rate": 2.97798019101521e-07, "loss": 0.4545, "step": 1340 }, { "epoch": 0.11, "learning_rate": 2.9753272019808984e-07, "loss": 0.5409, "step": 1350 }, { "epoch": 0.11, "learning_rate": 2.9726742129465865e-07, "loss": 0.408, "step": 1360 }, { "epoch": 0.11, "learning_rate": 2.9700212239122747e-07, "loss": 0.5175, "step": 1370 }, { "epoch": 0.11, "learning_rate": 2.9673682348779623e-07, "loss": 0.4775, "step": 1380 }, { "epoch": 0.11, "learning_rate": 2.9647152458436504e-07, "loss": 0.6129, "step": 1390 }, { "epoch": 0.11, "learning_rate": 2.9620622568093386e-07, "loss": 0.366, "step": 1400 }, { "epoch": 0.11, "learning_rate": 2.959409267775026e-07, "loss": 0.4697, "step": 1410 }, { "epoch": 0.11, "learning_rate": 2.9567562787407143e-07, "loss": 0.4706, "step": 1420 }, { "epoch": 0.11, "learning_rate": 2.9541032897064025e-07, "loss": 0.4555, "step": 1430 }, { "epoch": 0.11, "learning_rate": 2.9514503006720906e-07, "loss": 0.5475, "step": 1440 }, { "epoch": 0.12, "learning_rate": 2.948797311637778e-07, "loss": 0.4358, "step": 1450 }, { "epoch": 0.12, "learning_rate": 2.9461443226034664e-07, "loss": 0.5115, "step": 1460 }, { "epoch": 0.12, "learning_rate": 2.9434913335691545e-07, "loss": 0.6776, "step": 1470 }, { "epoch": 0.12, "learning_rate": 2.940838344534842e-07, "loss": 0.4034, "step": 1480 }, { "epoch": 0.12, "learning_rate": 2.938185355500531e-07, "loss": 0.5232, "step": 1490 }, { "epoch": 0.12, "learning_rate": 2.9355323664662185e-07, "loss": 0.5778, "step": 1500 }, { "epoch": 0.12, "learning_rate": 2.9328793774319066e-07, "loss": 0.5217, "step": 1510 }, { "epoch": 0.12, "learning_rate": 2.930226388397595e-07, "loss": 0.5378, "step": 1520 }, { "epoch": 0.12, "learning_rate": 2.9275733993632824e-07, "loss": 0.5131, "step": 1530 }, { "epoch": 0.12, "learning_rate": 2.9249204103289705e-07, "loss": 0.4615, "step": 1540 }, { "epoch": 0.12, "learning_rate": 2.9222674212946587e-07, "loss": 0.5084, "step": 1550 }, { "epoch": 0.12, "learning_rate": 2.9196144322603463e-07, "loss": 0.5639, "step": 1560 }, { "epoch": 0.12, "learning_rate": 2.9169614432260344e-07, "loss": 0.6176, "step": 1570 }, { "epoch": 0.13, "learning_rate": 2.9143084541917226e-07, "loss": 0.4819, "step": 1580 }, { "epoch": 0.13, "learning_rate": 2.9116554651574107e-07, "loss": 0.6385, "step": 1590 }, { "epoch": 0.13, "learning_rate": 2.9090024761230983e-07, "loss": 0.5515, "step": 1600 }, { "epoch": 0.13, "learning_rate": 2.9063494870887865e-07, "loss": 0.5406, "step": 1610 }, { "epoch": 0.13, "learning_rate": 2.9036964980544746e-07, "loss": 0.5813, "step": 1620 }, { "epoch": 0.13, "learning_rate": 2.901043509020162e-07, "loss": 0.6192, "step": 1630 }, { "epoch": 0.13, "learning_rate": 2.898390519985851e-07, "loss": 0.5759, "step": 1640 }, { "epoch": 0.13, "learning_rate": 2.8957375309515386e-07, "loss": 0.4963, "step": 1650 }, { "epoch": 0.13, "learning_rate": 2.8930845419172267e-07, "loss": 0.5021, "step": 1660 }, { "epoch": 0.13, "learning_rate": 2.890431552882915e-07, "loss": 0.5574, "step": 1670 }, { "epoch": 0.13, "learning_rate": 2.8877785638486025e-07, "loss": 0.4434, "step": 1680 }, { "epoch": 0.13, "learning_rate": 2.8851255748142906e-07, "loss": 0.5213, "step": 1690 }, { "epoch": 0.14, "learning_rate": 2.882472585779979e-07, "loss": 0.461, "step": 1700 }, { "epoch": 0.14, "learning_rate": 2.879819596745667e-07, "loss": 0.4587, "step": 1710 }, { "epoch": 0.14, "learning_rate": 2.8771666077113545e-07, "loss": 0.3879, "step": 1720 }, { "epoch": 0.14, "learning_rate": 2.8745136186770427e-07, "loss": 0.48, "step": 1730 }, { "epoch": 0.14, "learning_rate": 2.871860629642731e-07, "loss": 0.6378, "step": 1740 }, { "epoch": 0.14, "learning_rate": 2.8692076406084184e-07, "loss": 0.5548, "step": 1750 }, { "epoch": 0.14, "learning_rate": 2.8665546515741066e-07, "loss": 0.4741, "step": 1760 }, { "epoch": 0.14, "learning_rate": 2.8639016625397947e-07, "loss": 0.4441, "step": 1770 }, { "epoch": 0.14, "learning_rate": 2.8612486735054823e-07, "loss": 0.4345, "step": 1780 }, { "epoch": 0.14, "learning_rate": 2.858595684471171e-07, "loss": 0.6156, "step": 1790 }, { "epoch": 0.14, "learning_rate": 2.8559426954368586e-07, "loss": 0.4104, "step": 1800 }, { "epoch": 0.14, "learning_rate": 2.853289706402547e-07, "loss": 0.5361, "step": 1810 }, { "epoch": 0.14, "learning_rate": 2.850636717368235e-07, "loss": 0.4971, "step": 1820 }, { "epoch": 0.15, "learning_rate": 2.8479837283339226e-07, "loss": 0.4066, "step": 1830 }, { "epoch": 0.15, "learning_rate": 2.8453307392996107e-07, "loss": 0.4786, "step": 1840 }, { "epoch": 0.15, "learning_rate": 2.842677750265299e-07, "loss": 0.4475, "step": 1850 }, { "epoch": 0.15, "learning_rate": 2.840024761230987e-07, "loss": 0.596, "step": 1860 }, { "epoch": 0.15, "learning_rate": 2.8373717721966746e-07, "loss": 0.4797, "step": 1870 }, { "epoch": 0.15, "learning_rate": 2.834718783162363e-07, "loss": 0.6381, "step": 1880 }, { "epoch": 0.15, "learning_rate": 2.832065794128051e-07, "loss": 0.5512, "step": 1890 }, { "epoch": 0.15, "learning_rate": 2.8294128050937385e-07, "loss": 0.526, "step": 1900 }, { "epoch": 0.15, "learning_rate": 2.8267598160594267e-07, "loss": 0.4208, "step": 1910 }, { "epoch": 0.15, "learning_rate": 2.824106827025115e-07, "loss": 0.5019, "step": 1920 }, { "epoch": 0.15, "learning_rate": 2.821453837990803e-07, "loss": 0.5601, "step": 1930 }, { "epoch": 0.15, "learning_rate": 2.818800848956491e-07, "loss": 0.4532, "step": 1940 }, { "epoch": 0.16, "learning_rate": 2.816147859922179e-07, "loss": 0.6079, "step": 1950 }, { "epoch": 0.16, "learning_rate": 2.813494870887867e-07, "loss": 0.4444, "step": 1960 }, { "epoch": 0.16, "learning_rate": 2.810841881853555e-07, "loss": 0.5132, "step": 1970 }, { "epoch": 0.16, "learning_rate": 2.8081888928192426e-07, "loss": 0.5627, "step": 1980 }, { "epoch": 0.16, "learning_rate": 2.805535903784931e-07, "loss": 0.4318, "step": 1990 }, { "epoch": 0.16, "learning_rate": 2.802882914750619e-07, "loss": 0.5026, "step": 2000 }, { "epoch": 0.16, "learning_rate": 2.800229925716307e-07, "loss": 0.5187, "step": 2010 }, { "epoch": 0.16, "learning_rate": 2.7975769366819947e-07, "loss": 0.396, "step": 2020 }, { "epoch": 0.16, "learning_rate": 2.794923947647683e-07, "loss": 0.4171, "step": 2030 }, { "epoch": 0.16, "learning_rate": 2.792270958613371e-07, "loss": 0.4792, "step": 2040 }, { "epoch": 0.16, "learning_rate": 2.7896179695790586e-07, "loss": 0.4526, "step": 2050 }, { "epoch": 0.16, "learning_rate": 2.7869649805447473e-07, "loss": 0.4952, "step": 2060 }, { "epoch": 0.16, "learning_rate": 2.784311991510435e-07, "loss": 0.5462, "step": 2070 }, { "epoch": 0.17, "learning_rate": 2.781659002476123e-07, "loss": 0.6351, "step": 2080 }, { "epoch": 0.17, "learning_rate": 2.779006013441811e-07, "loss": 0.5329, "step": 2090 }, { "epoch": 0.17, "learning_rate": 2.776353024407499e-07, "loss": 0.3761, "step": 2100 }, { "epoch": 0.17, "learning_rate": 2.773700035373187e-07, "loss": 0.4625, "step": 2110 }, { "epoch": 0.17, "learning_rate": 2.771047046338875e-07, "loss": 0.5105, "step": 2120 }, { "epoch": 0.17, "learning_rate": 2.768394057304563e-07, "loss": 0.4063, "step": 2130 }, { "epoch": 0.17, "learning_rate": 2.765741068270251e-07, "loss": 0.4644, "step": 2140 }, { "epoch": 0.17, "learning_rate": 2.763088079235939e-07, "loss": 0.4825, "step": 2150 }, { "epoch": 0.17, "learning_rate": 2.760435090201627e-07, "loss": 0.5125, "step": 2160 }, { "epoch": 0.17, "learning_rate": 2.757782101167315e-07, "loss": 0.5761, "step": 2170 }, { "epoch": 0.17, "learning_rate": 2.755129112133003e-07, "loss": 0.4859, "step": 2180 }, { "epoch": 0.17, "learning_rate": 2.752476123098691e-07, "loss": 0.4388, "step": 2190 }, { "epoch": 0.18, "learning_rate": 2.7498231340643787e-07, "loss": 0.5507, "step": 2200 }, { "epoch": 0.18, "learning_rate": 2.7471701450300674e-07, "loss": 0.4343, "step": 2210 }, { "epoch": 0.18, "learning_rate": 2.744517155995755e-07, "loss": 0.4135, "step": 2220 }, { "epoch": 0.18, "learning_rate": 2.741864166961443e-07, "loss": 0.4108, "step": 2230 }, { "epoch": 0.18, "learning_rate": 2.7392111779271313e-07, "loss": 0.4274, "step": 2240 }, { "epoch": 0.18, "learning_rate": 2.736558188892819e-07, "loss": 0.4261, "step": 2250 }, { "epoch": 0.18, "learning_rate": 2.733905199858507e-07, "loss": 0.4734, "step": 2260 }, { "epoch": 0.18, "learning_rate": 2.731252210824195e-07, "loss": 0.5553, "step": 2270 }, { "epoch": 0.18, "learning_rate": 2.7285992217898834e-07, "loss": 0.4643, "step": 2280 }, { "epoch": 0.18, "learning_rate": 2.725946232755571e-07, "loss": 0.458, "step": 2290 }, { "epoch": 0.18, "learning_rate": 2.723293243721259e-07, "loss": 0.5908, "step": 2300 }, { "epoch": 0.18, "learning_rate": 2.7206402546869473e-07, "loss": 0.533, "step": 2310 }, { "epoch": 0.18, "learning_rate": 2.717987265652635e-07, "loss": 0.6152, "step": 2320 }, { "epoch": 0.19, "learning_rate": 2.715334276618323e-07, "loss": 0.3958, "step": 2330 }, { "epoch": 0.19, "learning_rate": 2.712681287584011e-07, "loss": 0.4243, "step": 2340 }, { "epoch": 0.19, "learning_rate": 2.710028298549699e-07, "loss": 0.4743, "step": 2350 }, { "epoch": 0.19, "learning_rate": 2.7073753095153875e-07, "loss": 0.603, "step": 2360 }, { "epoch": 0.19, "learning_rate": 2.704722320481075e-07, "loss": 0.4256, "step": 2370 }, { "epoch": 0.19, "learning_rate": 2.702069331446763e-07, "loss": 0.5295, "step": 2380 }, { "epoch": 0.19, "learning_rate": 2.6994163424124514e-07, "loss": 0.4739, "step": 2390 }, { "epoch": 0.19, "learning_rate": 2.696763353378139e-07, "loss": 0.3976, "step": 2400 }, { "epoch": 0.19, "learning_rate": 2.694110364343827e-07, "loss": 0.5386, "step": 2410 }, { "epoch": 0.19, "learning_rate": 2.6914573753095153e-07, "loss": 0.5893, "step": 2420 }, { "epoch": 0.19, "learning_rate": 2.6888043862752035e-07, "loss": 0.4234, "step": 2430 }, { "epoch": 0.19, "learning_rate": 2.686151397240891e-07, "loss": 0.411, "step": 2440 }, { "epoch": 0.19, "learning_rate": 2.683498408206579e-07, "loss": 0.5547, "step": 2450 }, { "epoch": 0.2, "learning_rate": 2.6808454191722674e-07, "loss": 0.4212, "step": 2460 }, { "epoch": 0.2, "learning_rate": 2.678192430137955e-07, "loss": 0.3612, "step": 2470 }, { "epoch": 0.2, "learning_rate": 2.675539441103643e-07, "loss": 0.7204, "step": 2480 }, { "epoch": 0.2, "learning_rate": 2.6728864520693313e-07, "loss": 0.5413, "step": 2490 }, { "epoch": 0.2, "learning_rate": 2.6702334630350194e-07, "loss": 0.4005, "step": 2500 }, { "epoch": 0.2, "learning_rate": 2.6675804740007076e-07, "loss": 0.4193, "step": 2510 }, { "epoch": 0.2, "eval_accuracy": 0.6985495883967072, "eval_loss": 0.48386532068252563, "eval_runtime": 707.7642, "eval_samples_per_second": 3.604, "eval_steps_per_second": 0.901, "step": 2514 }, { "epoch": 1.0, "learning_rate": 2.664927484966395e-07, "loss": 0.4189, "step": 2520 }, { "epoch": 1.0, "learning_rate": 2.6622744959320833e-07, "loss": 0.5329, "step": 2530 }, { "epoch": 1.0, "learning_rate": 2.6596215068977715e-07, "loss": 0.528, "step": 2540 }, { "epoch": 1.0, "learning_rate": 2.656968517863459e-07, "loss": 0.4498, "step": 2550 }, { "epoch": 1.0, "learning_rate": 2.654315528829147e-07, "loss": 0.4362, "step": 2560 }, { "epoch": 1.0, "learning_rate": 2.6516625397948354e-07, "loss": 0.4513, "step": 2570 }, { "epoch": 1.01, "learning_rate": 2.6490095507605236e-07, "loss": 0.45, "step": 2580 }, { "epoch": 1.01, "learning_rate": 2.6463565617262117e-07, "loss": 0.5109, "step": 2590 }, { "epoch": 1.01, "learning_rate": 2.6437035726918993e-07, "loss": 0.454, "step": 2600 }, { "epoch": 1.01, "learning_rate": 2.6410505836575875e-07, "loss": 0.5103, "step": 2610 }, { "epoch": 1.01, "learning_rate": 2.6383975946232756e-07, "loss": 0.521, "step": 2620 }, { "epoch": 1.01, "learning_rate": 2.635744605588964e-07, "loss": 0.5211, "step": 2630 }, { "epoch": 1.01, "learning_rate": 2.6330916165546514e-07, "loss": 0.485, "step": 2640 }, { "epoch": 1.01, "learning_rate": 2.6304386275203395e-07, "loss": 0.3995, "step": 2650 }, { "epoch": 1.01, "learning_rate": 2.6277856384860277e-07, "loss": 0.6925, "step": 2660 }, { "epoch": 1.01, "learning_rate": 2.6251326494517153e-07, "loss": 0.3811, "step": 2670 }, { "epoch": 1.01, "learning_rate": 2.6224796604174034e-07, "loss": 0.4942, "step": 2680 }, { "epoch": 1.01, "learning_rate": 2.6198266713830916e-07, "loss": 0.4295, "step": 2690 }, { "epoch": 1.01, "learning_rate": 2.617173682348779e-07, "loss": 0.6002, "step": 2700 }, { "epoch": 1.02, "learning_rate": 2.614520693314468e-07, "loss": 0.3757, "step": 2710 }, { "epoch": 1.02, "learning_rate": 2.6118677042801555e-07, "loss": 0.502, "step": 2720 }, { "epoch": 1.02, "learning_rate": 2.6092147152458436e-07, "loss": 0.2979, "step": 2730 }, { "epoch": 1.02, "learning_rate": 2.606561726211532e-07, "loss": 0.3475, "step": 2740 }, { "epoch": 1.02, "learning_rate": 2.6039087371772194e-07, "loss": 0.5197, "step": 2750 }, { "epoch": 1.02, "learning_rate": 2.6012557481429076e-07, "loss": 0.379, "step": 2760 }, { "epoch": 1.02, "learning_rate": 2.5986027591085957e-07, "loss": 0.4161, "step": 2770 }, { "epoch": 1.02, "learning_rate": 2.595949770074284e-07, "loss": 0.561, "step": 2780 }, { "epoch": 1.02, "learning_rate": 2.5932967810399715e-07, "loss": 0.4509, "step": 2790 }, { "epoch": 1.02, "learning_rate": 2.5906437920056596e-07, "loss": 0.3427, "step": 2800 }, { "epoch": 1.02, "learning_rate": 2.587990802971348e-07, "loss": 0.315, "step": 2810 }, { "epoch": 1.02, "learning_rate": 2.5853378139370354e-07, "loss": 0.5377, "step": 2820 }, { "epoch": 1.03, "learning_rate": 2.5826848249027235e-07, "loss": 0.4747, "step": 2830 }, { "epoch": 1.03, "learning_rate": 2.5800318358684117e-07, "loss": 0.604, "step": 2840 }, { "epoch": 1.03, "learning_rate": 2.5773788468341e-07, "loss": 0.6197, "step": 2850 }, { "epoch": 1.03, "learning_rate": 2.574725857799788e-07, "loss": 0.6246, "step": 2860 }, { "epoch": 1.03, "learning_rate": 2.5720728687654756e-07, "loss": 0.385, "step": 2870 }, { "epoch": 1.03, "learning_rate": 2.569419879731164e-07, "loss": 0.3885, "step": 2880 }, { "epoch": 1.03, "learning_rate": 2.566766890696852e-07, "loss": 0.3586, "step": 2890 }, { "epoch": 1.03, "learning_rate": 2.5641139016625395e-07, "loss": 0.4253, "step": 2900 }, { "epoch": 1.03, "learning_rate": 2.5614609126282276e-07, "loss": 0.3906, "step": 2910 }, { "epoch": 1.03, "learning_rate": 2.558807923593916e-07, "loss": 0.5617, "step": 2920 }, { "epoch": 1.03, "learning_rate": 2.556154934559604e-07, "loss": 0.5688, "step": 2930 }, { "epoch": 1.03, "learning_rate": 2.5535019455252916e-07, "loss": 0.5803, "step": 2940 }, { "epoch": 1.03, "learning_rate": 2.5508489564909797e-07, "loss": 0.4535, "step": 2950 }, { "epoch": 1.04, "learning_rate": 2.548195967456668e-07, "loss": 0.4301, "step": 2960 }, { "epoch": 1.04, "learning_rate": 2.5455429784223555e-07, "loss": 0.4844, "step": 2970 }, { "epoch": 1.04, "learning_rate": 2.542889989388044e-07, "loss": 0.4359, "step": 2980 }, { "epoch": 1.04, "learning_rate": 2.540237000353732e-07, "loss": 0.4562, "step": 2990 }, { "epoch": 1.04, "learning_rate": 2.53758401131942e-07, "loss": 0.3864, "step": 3000 }, { "epoch": 1.04, "learning_rate": 2.534931022285108e-07, "loss": 0.5046, "step": 3010 }, { "epoch": 1.04, "learning_rate": 2.5322780332507957e-07, "loss": 0.2887, "step": 3020 }, { "epoch": 1.04, "learning_rate": 2.529625044216484e-07, "loss": 0.4083, "step": 3030 }, { "epoch": 1.04, "learning_rate": 2.526972055182172e-07, "loss": 0.3877, "step": 3040 }, { "epoch": 1.04, "learning_rate": 2.5243190661478596e-07, "loss": 0.5078, "step": 3050 }, { "epoch": 1.04, "learning_rate": 2.521666077113548e-07, "loss": 0.4621, "step": 3060 }, { "epoch": 1.04, "learning_rate": 2.519013088079236e-07, "loss": 0.4411, "step": 3070 }, { "epoch": 1.05, "learning_rate": 2.516360099044924e-07, "loss": 0.5897, "step": 3080 }, { "epoch": 1.05, "learning_rate": 2.5137071100106117e-07, "loss": 0.4126, "step": 3090 }, { "epoch": 1.05, "learning_rate": 2.5110541209763e-07, "loss": 0.4725, "step": 3100 }, { "epoch": 1.05, "learning_rate": 2.508401131941988e-07, "loss": 0.4072, "step": 3110 }, { "epoch": 1.05, "learning_rate": 2.5057481429076756e-07, "loss": 0.5103, "step": 3120 }, { "epoch": 1.05, "learning_rate": 2.503095153873364e-07, "loss": 0.4725, "step": 3130 }, { "epoch": 1.05, "learning_rate": 2.500442164839052e-07, "loss": 0.4113, "step": 3140 }, { "epoch": 1.05, "learning_rate": 2.49778917580474e-07, "loss": 0.3694, "step": 3150 }, { "epoch": 1.05, "learning_rate": 2.495136186770428e-07, "loss": 0.4339, "step": 3160 }, { "epoch": 1.05, "learning_rate": 2.492483197736116e-07, "loss": 0.4924, "step": 3170 }, { "epoch": 1.05, "learning_rate": 2.489830208701804e-07, "loss": 0.4624, "step": 3180 }, { "epoch": 1.05, "learning_rate": 2.487177219667492e-07, "loss": 0.406, "step": 3190 }, { "epoch": 1.05, "learning_rate": 2.4845242306331797e-07, "loss": 0.4311, "step": 3200 }, { "epoch": 1.06, "learning_rate": 2.481871241598868e-07, "loss": 0.3505, "step": 3210 }, { "epoch": 1.06, "learning_rate": 2.479218252564556e-07, "loss": 0.5599, "step": 3220 }, { "epoch": 1.06, "learning_rate": 2.476565263530244e-07, "loss": 0.4658, "step": 3230 }, { "epoch": 1.06, "learning_rate": 2.473912274495932e-07, "loss": 0.3503, "step": 3240 }, { "epoch": 1.06, "learning_rate": 2.47125928546162e-07, "loss": 0.4346, "step": 3250 }, { "epoch": 1.06, "learning_rate": 2.468606296427308e-07, "loss": 0.4888, "step": 3260 }, { "epoch": 1.06, "learning_rate": 2.4659533073929957e-07, "loss": 0.4938, "step": 3270 }, { "epoch": 1.06, "learning_rate": 2.4633003183586843e-07, "loss": 0.3205, "step": 3280 }, { "epoch": 1.06, "learning_rate": 2.460647329324372e-07, "loss": 0.3941, "step": 3290 }, { "epoch": 1.06, "learning_rate": 2.45799434029006e-07, "loss": 0.447, "step": 3300 }, { "epoch": 1.06, "learning_rate": 2.455341351255748e-07, "loss": 0.2545, "step": 3310 }, { "epoch": 1.06, "learning_rate": 2.452688362221436e-07, "loss": 0.4893, "step": 3320 }, { "epoch": 1.06, "learning_rate": 2.450035373187124e-07, "loss": 0.4399, "step": 3330 }, { "epoch": 1.07, "learning_rate": 2.447382384152812e-07, "loss": 0.4247, "step": 3340 }, { "epoch": 1.07, "learning_rate": 2.4447293951185003e-07, "loss": 0.5176, "step": 3350 }, { "epoch": 1.07, "learning_rate": 2.442076406084188e-07, "loss": 0.5129, "step": 3360 }, { "epoch": 1.07, "learning_rate": 2.439423417049876e-07, "loss": 0.3134, "step": 3370 }, { "epoch": 1.07, "learning_rate": 2.436770428015564e-07, "loss": 0.4207, "step": 3380 }, { "epoch": 1.07, "learning_rate": 2.434117438981252e-07, "loss": 0.6662, "step": 3390 }, { "epoch": 1.07, "learning_rate": 2.43146444994694e-07, "loss": 0.4619, "step": 3400 }, { "epoch": 1.07, "learning_rate": 2.428811460912628e-07, "loss": 0.5505, "step": 3410 }, { "epoch": 1.07, "learning_rate": 2.426158471878316e-07, "loss": 0.5201, "step": 3420 }, { "epoch": 1.07, "learning_rate": 2.4235054828440044e-07, "loss": 0.3945, "step": 3430 }, { "epoch": 1.07, "learning_rate": 2.420852493809692e-07, "loss": 0.4216, "step": 3440 }, { "epoch": 1.07, "learning_rate": 2.41819950477538e-07, "loss": 0.4044, "step": 3450 }, { "epoch": 1.08, "learning_rate": 2.4155465157410683e-07, "loss": 0.4147, "step": 3460 }, { "epoch": 1.08, "learning_rate": 2.412893526706756e-07, "loss": 0.4367, "step": 3470 }, { "epoch": 1.08, "learning_rate": 2.410240537672444e-07, "loss": 0.4442, "step": 3480 }, { "epoch": 1.08, "learning_rate": 2.407587548638132e-07, "loss": 0.3516, "step": 3490 }, { "epoch": 1.08, "learning_rate": 2.4049345596038204e-07, "loss": 0.4012, "step": 3500 }, { "epoch": 1.08, "learning_rate": 2.402281570569508e-07, "loss": 0.3966, "step": 3510 }, { "epoch": 1.08, "learning_rate": 2.399628581535196e-07, "loss": 0.4944, "step": 3520 }, { "epoch": 1.08, "learning_rate": 2.3969755925008843e-07, "loss": 0.5081, "step": 3530 }, { "epoch": 1.08, "learning_rate": 2.394322603466572e-07, "loss": 0.5445, "step": 3540 }, { "epoch": 1.08, "learning_rate": 2.39166961443226e-07, "loss": 0.3361, "step": 3550 }, { "epoch": 1.08, "learning_rate": 2.389016625397948e-07, "loss": 0.3811, "step": 3560 }, { "epoch": 1.08, "learning_rate": 2.3863636363636364e-07, "loss": 0.4163, "step": 3570 }, { "epoch": 1.08, "learning_rate": 2.3837106473293243e-07, "loss": 0.3715, "step": 3580 }, { "epoch": 1.09, "learning_rate": 2.3810576582950121e-07, "loss": 0.4421, "step": 3590 }, { "epoch": 1.09, "learning_rate": 2.3784046692607003e-07, "loss": 0.5121, "step": 3600 }, { "epoch": 1.09, "learning_rate": 2.3757516802263884e-07, "loss": 0.4881, "step": 3610 }, { "epoch": 1.09, "learning_rate": 2.3730986911920763e-07, "loss": 0.4282, "step": 3620 }, { "epoch": 1.09, "learning_rate": 2.3704457021577642e-07, "loss": 0.3844, "step": 3630 }, { "epoch": 1.09, "learning_rate": 2.3677927131234523e-07, "loss": 0.3984, "step": 3640 }, { "epoch": 1.09, "learning_rate": 2.3651397240891402e-07, "loss": 0.3395, "step": 3650 }, { "epoch": 1.09, "learning_rate": 2.362486735054828e-07, "loss": 0.3538, "step": 3660 }, { "epoch": 1.09, "learning_rate": 2.3598337460205165e-07, "loss": 0.469, "step": 3670 }, { "epoch": 1.09, "learning_rate": 2.3571807569862044e-07, "loss": 0.4087, "step": 3680 }, { "epoch": 1.09, "learning_rate": 2.3545277679518923e-07, "loss": 0.5183, "step": 3690 }, { "epoch": 1.09, "learning_rate": 2.3518747789175804e-07, "loss": 0.4856, "step": 3700 }, { "epoch": 1.1, "learning_rate": 2.3492217898832683e-07, "loss": 0.3906, "step": 3710 }, { "epoch": 1.1, "learning_rate": 2.3465688008489562e-07, "loss": 0.4084, "step": 3720 }, { "epoch": 1.1, "learning_rate": 2.3439158118146444e-07, "loss": 0.4071, "step": 3730 }, { "epoch": 1.1, "learning_rate": 2.3412628227803325e-07, "loss": 0.4447, "step": 3740 }, { "epoch": 1.1, "learning_rate": 2.3386098337460204e-07, "loss": 0.4718, "step": 3750 }, { "epoch": 1.1, "learning_rate": 2.3359568447117085e-07, "loss": 0.4005, "step": 3760 }, { "epoch": 1.1, "learning_rate": 2.3333038556773964e-07, "loss": 0.4809, "step": 3770 }, { "epoch": 1.1, "learning_rate": 2.3306508666430843e-07, "loss": 0.3627, "step": 3780 }, { "epoch": 1.1, "learning_rate": 2.3279978776087724e-07, "loss": 0.4105, "step": 3790 }, { "epoch": 1.1, "learning_rate": 2.3253448885744603e-07, "loss": 0.4147, "step": 3800 }, { "epoch": 1.1, "learning_rate": 2.3226918995401482e-07, "loss": 0.5299, "step": 3810 }, { "epoch": 1.1, "learning_rate": 2.3200389105058366e-07, "loss": 0.4322, "step": 3820 }, { "epoch": 1.1, "learning_rate": 2.3173859214715245e-07, "loss": 0.3699, "step": 3830 }, { "epoch": 1.11, "learning_rate": 2.3147329324372124e-07, "loss": 0.3699, "step": 3840 }, { "epoch": 1.11, "learning_rate": 2.3120799434029005e-07, "loss": 0.2841, "step": 3850 }, { "epoch": 1.11, "learning_rate": 2.3094269543685884e-07, "loss": 0.5602, "step": 3860 }, { "epoch": 1.11, "learning_rate": 2.3067739653342763e-07, "loss": 0.3621, "step": 3870 }, { "epoch": 1.11, "learning_rate": 2.3041209762999647e-07, "loss": 0.3471, "step": 3880 }, { "epoch": 1.11, "learning_rate": 2.3014679872656526e-07, "loss": 0.3144, "step": 3890 }, { "epoch": 1.11, "learning_rate": 2.2988149982313405e-07, "loss": 0.3478, "step": 3900 }, { "epoch": 1.11, "learning_rate": 2.2961620091970286e-07, "loss": 0.3689, "step": 3910 }, { "epoch": 1.11, "learning_rate": 2.2935090201627165e-07, "loss": 0.4915, "step": 3920 }, { "epoch": 1.11, "learning_rate": 2.2908560311284044e-07, "loss": 0.5927, "step": 3930 }, { "epoch": 1.11, "learning_rate": 2.2882030420940925e-07, "loss": 0.3461, "step": 3940 }, { "epoch": 1.11, "learning_rate": 2.2855500530597804e-07, "loss": 0.4687, "step": 3950 }, { "epoch": 1.12, "learning_rate": 2.2828970640254686e-07, "loss": 0.3873, "step": 3960 }, { "epoch": 1.12, "learning_rate": 2.2802440749911567e-07, "loss": 0.4167, "step": 3970 }, { "epoch": 1.12, "learning_rate": 2.2775910859568446e-07, "loss": 0.369, "step": 3980 }, { "epoch": 1.12, "learning_rate": 2.2749380969225325e-07, "loss": 0.3097, "step": 3990 }, { "epoch": 1.12, "learning_rate": 2.2722851078882206e-07, "loss": 0.4325, "step": 4000 }, { "epoch": 1.12, "learning_rate": 2.2696321188539085e-07, "loss": 0.3702, "step": 4010 }, { "epoch": 1.12, "learning_rate": 2.2669791298195964e-07, "loss": 0.5312, "step": 4020 }, { "epoch": 1.12, "learning_rate": 2.2643261407852848e-07, "loss": 0.2592, "step": 4030 }, { "epoch": 1.12, "learning_rate": 2.2616731517509727e-07, "loss": 0.5263, "step": 4040 }, { "epoch": 1.12, "learning_rate": 2.2590201627166606e-07, "loss": 0.3491, "step": 4050 }, { "epoch": 1.12, "learning_rate": 2.2563671736823487e-07, "loss": 0.4487, "step": 4060 }, { "epoch": 1.12, "learning_rate": 2.2537141846480366e-07, "loss": 0.4003, "step": 4070 }, { "epoch": 1.12, "learning_rate": 2.2510611956137245e-07, "loss": 0.554, "step": 4080 }, { "epoch": 1.13, "learning_rate": 2.248408206579413e-07, "loss": 0.3426, "step": 4090 }, { "epoch": 1.13, "learning_rate": 2.2457552175451008e-07, "loss": 0.3495, "step": 4100 }, { "epoch": 1.13, "learning_rate": 2.2431022285107887e-07, "loss": 0.3407, "step": 4110 }, { "epoch": 1.13, "learning_rate": 2.2404492394764768e-07, "loss": 0.4444, "step": 4120 }, { "epoch": 1.13, "learning_rate": 2.2377962504421647e-07, "loss": 0.3747, "step": 4130 }, { "epoch": 1.13, "learning_rate": 2.2351432614078526e-07, "loss": 0.4015, "step": 4140 }, { "epoch": 1.13, "learning_rate": 2.2324902723735407e-07, "loss": 0.34, "step": 4150 }, { "epoch": 1.13, "learning_rate": 2.2298372833392286e-07, "loss": 0.4813, "step": 4160 }, { "epoch": 1.13, "learning_rate": 2.2271842943049165e-07, "loss": 0.4426, "step": 4170 }, { "epoch": 1.13, "learning_rate": 2.224531305270605e-07, "loss": 0.314, "step": 4180 }, { "epoch": 1.13, "learning_rate": 2.2218783162362928e-07, "loss": 0.4372, "step": 4190 }, { "epoch": 1.13, "learning_rate": 2.2192253272019807e-07, "loss": 0.5747, "step": 4200 }, { "epoch": 1.13, "learning_rate": 2.2165723381676688e-07, "loss": 0.3215, "step": 4210 }, { "epoch": 1.14, "learning_rate": 2.2139193491333567e-07, "loss": 0.4074, "step": 4220 }, { "epoch": 1.14, "learning_rate": 2.2112663600990446e-07, "loss": 0.3729, "step": 4230 }, { "epoch": 1.14, "learning_rate": 2.208613371064733e-07, "loss": 0.4418, "step": 4240 }, { "epoch": 1.14, "learning_rate": 2.205960382030421e-07, "loss": 0.4403, "step": 4250 }, { "epoch": 1.14, "learning_rate": 2.203307392996109e-07, "loss": 0.4159, "step": 4260 }, { "epoch": 1.14, "learning_rate": 2.200654403961797e-07, "loss": 0.4298, "step": 4270 }, { "epoch": 1.14, "learning_rate": 2.1980014149274848e-07, "loss": 0.4128, "step": 4280 }, { "epoch": 1.14, "learning_rate": 2.195348425893173e-07, "loss": 0.3948, "step": 4290 }, { "epoch": 1.14, "learning_rate": 2.1926954368588608e-07, "loss": 0.3271, "step": 4300 }, { "epoch": 1.14, "learning_rate": 2.190042447824549e-07, "loss": 0.4198, "step": 4310 }, { "epoch": 1.14, "learning_rate": 2.187389458790237e-07, "loss": 0.4266, "step": 4320 }, { "epoch": 1.14, "learning_rate": 2.184736469755925e-07, "loss": 0.4236, "step": 4330 }, { "epoch": 1.15, "learning_rate": 2.182083480721613e-07, "loss": 0.4734, "step": 4340 }, { "epoch": 1.15, "learning_rate": 2.179430491687301e-07, "loss": 0.4639, "step": 4350 }, { "epoch": 1.15, "learning_rate": 2.176777502652989e-07, "loss": 0.3681, "step": 4360 }, { "epoch": 1.15, "learning_rate": 2.1741245136186768e-07, "loss": 0.2595, "step": 4370 }, { "epoch": 1.15, "learning_rate": 2.1714715245843652e-07, "loss": 0.4727, "step": 4380 }, { "epoch": 1.15, "learning_rate": 2.168818535550053e-07, "loss": 0.3284, "step": 4390 }, { "epoch": 1.15, "learning_rate": 2.166165546515741e-07, "loss": 0.3289, "step": 4400 }, { "epoch": 1.15, "learning_rate": 2.163512557481429e-07, "loss": 0.4443, "step": 4410 }, { "epoch": 1.15, "learning_rate": 2.160859568447117e-07, "loss": 0.2787, "step": 4420 }, { "epoch": 1.15, "learning_rate": 2.158206579412805e-07, "loss": 0.6671, "step": 4430 }, { "epoch": 1.15, "learning_rate": 2.1555535903784933e-07, "loss": 0.3741, "step": 4440 }, { "epoch": 1.15, "learning_rate": 2.1529006013441812e-07, "loss": 0.3775, "step": 4450 }, { "epoch": 1.15, "learning_rate": 2.150247612309869e-07, "loss": 0.3939, "step": 4460 }, { "epoch": 1.16, "learning_rate": 2.1475946232755572e-07, "loss": 0.2898, "step": 4470 }, { "epoch": 1.16, "learning_rate": 2.144941634241245e-07, "loss": 0.535, "step": 4480 }, { "epoch": 1.16, "learning_rate": 2.142288645206933e-07, "loss": 0.3356, "step": 4490 }, { "epoch": 1.16, "learning_rate": 2.139635656172621e-07, "loss": 0.4034, "step": 4500 }, { "epoch": 1.16, "learning_rate": 2.136982667138309e-07, "loss": 0.3448, "step": 4510 }, { "epoch": 1.16, "learning_rate": 2.134329678103997e-07, "loss": 0.4358, "step": 4520 }, { "epoch": 1.16, "learning_rate": 2.1316766890696853e-07, "loss": 0.2735, "step": 4530 }, { "epoch": 1.16, "learning_rate": 2.1290237000353732e-07, "loss": 0.4325, "step": 4540 }, { "epoch": 1.16, "learning_rate": 2.126370711001061e-07, "loss": 0.3883, "step": 4550 }, { "epoch": 1.16, "learning_rate": 2.1237177219667492e-07, "loss": 0.3338, "step": 4560 }, { "epoch": 1.16, "learning_rate": 2.121064732932437e-07, "loss": 0.3989, "step": 4570 }, { "epoch": 1.16, "learning_rate": 2.118411743898125e-07, "loss": 0.3722, "step": 4580 }, { "epoch": 1.17, "learning_rate": 2.1157587548638134e-07, "loss": 0.3492, "step": 4590 }, { "epoch": 1.17, "learning_rate": 2.1131057658295013e-07, "loss": 0.3775, "step": 4600 }, { "epoch": 1.17, "learning_rate": 2.1104527767951891e-07, "loss": 0.3562, "step": 4610 }, { "epoch": 1.17, "learning_rate": 2.1077997877608773e-07, "loss": 0.6652, "step": 4620 }, { "epoch": 1.17, "learning_rate": 2.1051467987265652e-07, "loss": 0.3927, "step": 4630 }, { "epoch": 1.17, "learning_rate": 2.102493809692253e-07, "loss": 0.3628, "step": 4640 }, { "epoch": 1.17, "learning_rate": 2.0998408206579412e-07, "loss": 0.5185, "step": 4650 }, { "epoch": 1.17, "learning_rate": 2.0971878316236294e-07, "loss": 0.3643, "step": 4660 }, { "epoch": 1.17, "learning_rate": 2.0945348425893172e-07, "loss": 0.4946, "step": 4670 }, { "epoch": 1.17, "learning_rate": 2.0918818535550054e-07, "loss": 0.4316, "step": 4680 }, { "epoch": 1.17, "learning_rate": 2.0892288645206933e-07, "loss": 0.3947, "step": 4690 }, { "epoch": 1.17, "learning_rate": 2.0865758754863811e-07, "loss": 0.3337, "step": 4700 }, { "epoch": 1.17, "learning_rate": 2.0839228864520693e-07, "loss": 0.3022, "step": 4710 }, { "epoch": 1.18, "learning_rate": 2.0812698974177572e-07, "loss": 0.3628, "step": 4720 }, { "epoch": 1.18, "learning_rate": 2.078616908383445e-07, "loss": 0.3703, "step": 4730 }, { "epoch": 1.18, "learning_rate": 2.0759639193491335e-07, "loss": 0.275, "step": 4740 }, { "epoch": 1.18, "learning_rate": 2.0733109303148214e-07, "loss": 0.3861, "step": 4750 }, { "epoch": 1.18, "learning_rate": 2.0706579412805092e-07, "loss": 0.416, "step": 4760 }, { "epoch": 1.18, "learning_rate": 2.0680049522461974e-07, "loss": 0.3163, "step": 4770 }, { "epoch": 1.18, "learning_rate": 2.0653519632118853e-07, "loss": 0.371, "step": 4780 }, { "epoch": 1.18, "learning_rate": 2.0626989741775732e-07, "loss": 0.2409, "step": 4790 }, { "epoch": 1.18, "learning_rate": 2.0600459851432616e-07, "loss": 0.5185, "step": 4800 }, { "epoch": 1.18, "learning_rate": 2.0573929961089494e-07, "loss": 0.3575, "step": 4810 }, { "epoch": 1.18, "learning_rate": 2.0547400070746373e-07, "loss": 0.5805, "step": 4820 }, { "epoch": 1.18, "learning_rate": 2.0520870180403255e-07, "loss": 0.3353, "step": 4830 }, { "epoch": 1.19, "learning_rate": 2.0494340290060134e-07, "loss": 0.3381, "step": 4840 }, { "epoch": 1.19, "learning_rate": 2.0467810399717012e-07, "loss": 0.3048, "step": 4850 }, { "epoch": 1.19, "learning_rate": 2.0441280509373894e-07, "loss": 0.3347, "step": 4860 }, { "epoch": 1.19, "learning_rate": 2.0414750619030773e-07, "loss": 0.3736, "step": 4870 }, { "epoch": 1.19, "learning_rate": 2.0388220728687654e-07, "loss": 0.3378, "step": 4880 }, { "epoch": 1.19, "learning_rate": 2.0361690838344536e-07, "loss": 0.3411, "step": 4890 }, { "epoch": 1.19, "learning_rate": 2.0335160948001414e-07, "loss": 0.2934, "step": 4900 }, { "epoch": 1.19, "learning_rate": 2.0308631057658293e-07, "loss": 0.4118, "step": 4910 }, { "epoch": 1.19, "learning_rate": 2.0282101167315175e-07, "loss": 0.3951, "step": 4920 }, { "epoch": 1.19, "learning_rate": 2.0255571276972054e-07, "loss": 0.322, "step": 4930 }, { "epoch": 1.19, "learning_rate": 2.0229041386628932e-07, "loss": 0.3692, "step": 4940 }, { "epoch": 1.19, "learning_rate": 2.0202511496285817e-07, "loss": 0.3782, "step": 4950 }, { "epoch": 1.19, "learning_rate": 2.0175981605942695e-07, "loss": 0.4777, "step": 4960 }, { "epoch": 1.2, "learning_rate": 2.0149451715599574e-07, "loss": 0.421, "step": 4970 }, { "epoch": 1.2, "learning_rate": 2.0122921825256456e-07, "loss": 0.3586, "step": 4980 }, { "epoch": 1.2, "learning_rate": 2.0096391934913335e-07, "loss": 0.3617, "step": 4990 }, { "epoch": 1.2, "learning_rate": 2.0069862044570213e-07, "loss": 0.2693, "step": 5000 }, { "epoch": 1.2, "learning_rate": 2.0043332154227095e-07, "loss": 0.2447, "step": 5010 }, { "epoch": 1.2, "learning_rate": 2.0016802263883976e-07, "loss": 0.3542, "step": 5020 }, { "epoch": 1.2, "eval_accuracy": 0.7593100744805958, "eval_loss": 0.44453132152557373, "eval_runtime": 710.6654, "eval_samples_per_second": 3.59, "eval_steps_per_second": 0.898, "step": 5028 }, { "epoch": 2.0, "learning_rate": 1.9990272373540855e-07, "loss": 0.3873, "step": 5030 }, { "epoch": 2.0, "learning_rate": 1.9963742483197737e-07, "loss": 0.3811, "step": 5040 }, { "epoch": 2.0, "learning_rate": 1.9937212592854615e-07, "loss": 0.2772, "step": 5050 }, { "epoch": 2.0, "learning_rate": 1.9910682702511494e-07, "loss": 0.4806, "step": 5060 }, { "epoch": 2.0, "learning_rate": 1.9884152812168376e-07, "loss": 0.3124, "step": 5070 }, { "epoch": 2.0, "learning_rate": 1.9857622921825255e-07, "loss": 0.247, "step": 5080 }, { "epoch": 2.0, "learning_rate": 1.9831093031482133e-07, "loss": 0.3793, "step": 5090 }, { "epoch": 2.01, "learning_rate": 1.9804563141139017e-07, "loss": 0.5119, "step": 5100 }, { "epoch": 2.01, "learning_rate": 1.9778033250795896e-07, "loss": 0.2919, "step": 5110 }, { "epoch": 2.01, "learning_rate": 1.9751503360452775e-07, "loss": 0.3725, "step": 5120 }, { "epoch": 2.01, "learning_rate": 1.9724973470109657e-07, "loss": 0.4659, "step": 5130 }, { "epoch": 2.01, "learning_rate": 1.9698443579766535e-07, "loss": 0.3603, "step": 5140 }, { "epoch": 2.01, "learning_rate": 1.9671913689423414e-07, "loss": 0.3629, "step": 5150 }, { "epoch": 2.01, "learning_rate": 1.9645383799080298e-07, "loss": 0.4292, "step": 5160 }, { "epoch": 2.01, "learning_rate": 1.9618853908737177e-07, "loss": 0.3256, "step": 5170 }, { "epoch": 2.01, "learning_rate": 1.9592324018394056e-07, "loss": 0.2833, "step": 5180 }, { "epoch": 2.01, "learning_rate": 1.9565794128050938e-07, "loss": 0.47, "step": 5190 }, { "epoch": 2.01, "learning_rate": 1.9539264237707816e-07, "loss": 0.2898, "step": 5200 }, { "epoch": 2.01, "learning_rate": 1.9512734347364695e-07, "loss": 0.2773, "step": 5210 }, { "epoch": 2.02, "learning_rate": 1.9486204457021577e-07, "loss": 0.4663, "step": 5220 }, { "epoch": 2.02, "learning_rate": 1.9459674566678455e-07, "loss": 0.4463, "step": 5230 }, { "epoch": 2.02, "learning_rate": 1.9433144676335337e-07, "loss": 0.306, "step": 5240 }, { "epoch": 2.02, "learning_rate": 1.9406614785992218e-07, "loss": 0.5645, "step": 5250 }, { "epoch": 2.02, "learning_rate": 1.9380084895649097e-07, "loss": 0.3439, "step": 5260 }, { "epoch": 2.02, "learning_rate": 1.9353555005305976e-07, "loss": 0.311, "step": 5270 }, { "epoch": 2.02, "learning_rate": 1.9327025114962858e-07, "loss": 0.4669, "step": 5280 }, { "epoch": 2.02, "learning_rate": 1.9300495224619736e-07, "loss": 0.2812, "step": 5290 }, { "epoch": 2.02, "learning_rate": 1.9273965334276615e-07, "loss": 0.3005, "step": 5300 }, { "epoch": 2.02, "learning_rate": 1.92474354439335e-07, "loss": 0.3576, "step": 5310 }, { "epoch": 2.02, "learning_rate": 1.9220905553590378e-07, "loss": 0.2867, "step": 5320 }, { "epoch": 2.02, "learning_rate": 1.9194375663247257e-07, "loss": 0.2798, "step": 5330 }, { "epoch": 2.02, "learning_rate": 1.9167845772904138e-07, "loss": 0.3237, "step": 5340 }, { "epoch": 2.03, "learning_rate": 1.9141315882561017e-07, "loss": 0.3129, "step": 5350 }, { "epoch": 2.03, "learning_rate": 1.9114785992217896e-07, "loss": 0.3229, "step": 5360 }, { "epoch": 2.03, "learning_rate": 1.908825610187478e-07, "loss": 0.4269, "step": 5370 }, { "epoch": 2.03, "learning_rate": 1.906172621153166e-07, "loss": 0.4231, "step": 5380 }, { "epoch": 2.03, "learning_rate": 1.9035196321188538e-07, "loss": 0.3866, "step": 5390 }, { "epoch": 2.03, "learning_rate": 1.900866643084542e-07, "loss": 0.2466, "step": 5400 }, { "epoch": 2.03, "learning_rate": 1.8982136540502298e-07, "loss": 0.4208, "step": 5410 }, { "epoch": 2.03, "learning_rate": 1.8955606650159177e-07, "loss": 0.3165, "step": 5420 }, { "epoch": 2.03, "learning_rate": 1.8929076759816058e-07, "loss": 0.3127, "step": 5430 }, { "epoch": 2.03, "learning_rate": 1.8902546869472937e-07, "loss": 0.3945, "step": 5440 }, { "epoch": 2.03, "learning_rate": 1.8876016979129816e-07, "loss": 0.381, "step": 5450 }, { "epoch": 2.03, "learning_rate": 1.88494870887867e-07, "loss": 0.4172, "step": 5460 }, { "epoch": 2.04, "learning_rate": 1.882295719844358e-07, "loss": 0.461, "step": 5470 }, { "epoch": 2.04, "learning_rate": 1.8796427308100458e-07, "loss": 0.46, "step": 5480 }, { "epoch": 2.04, "learning_rate": 1.876989741775734e-07, "loss": 0.2381, "step": 5490 }, { "epoch": 2.04, "learning_rate": 1.8743367527414218e-07, "loss": 0.4672, "step": 5500 }, { "epoch": 2.04, "learning_rate": 1.8716837637071097e-07, "loss": 0.2909, "step": 5510 }, { "epoch": 2.04, "learning_rate": 1.869030774672798e-07, "loss": 0.4985, "step": 5520 }, { "epoch": 2.04, "learning_rate": 1.866377785638486e-07, "loss": 0.3446, "step": 5530 }, { "epoch": 2.04, "learning_rate": 1.863724796604174e-07, "loss": 0.3878, "step": 5540 }, { "epoch": 2.04, "learning_rate": 1.861071807569862e-07, "loss": 0.2926, "step": 5550 }, { "epoch": 2.04, "learning_rate": 1.85841881853555e-07, "loss": 0.2187, "step": 5560 }, { "epoch": 2.04, "learning_rate": 1.8557658295012378e-07, "loss": 0.338, "step": 5570 }, { "epoch": 2.04, "learning_rate": 1.853112840466926e-07, "loss": 0.2448, "step": 5580 }, { "epoch": 2.04, "learning_rate": 1.850459851432614e-07, "loss": 0.2129, "step": 5590 }, { "epoch": 2.05, "learning_rate": 1.847806862398302e-07, "loss": 0.3554, "step": 5600 }, { "epoch": 2.05, "learning_rate": 1.84515387336399e-07, "loss": 0.444, "step": 5610 }, { "epoch": 2.05, "learning_rate": 1.842500884329678e-07, "loss": 0.306, "step": 5620 }, { "epoch": 2.05, "learning_rate": 1.839847895295366e-07, "loss": 0.4596, "step": 5630 }, { "epoch": 2.05, "learning_rate": 1.837194906261054e-07, "loss": 0.4518, "step": 5640 }, { "epoch": 2.05, "learning_rate": 1.834541917226742e-07, "loss": 0.2654, "step": 5650 }, { "epoch": 2.05, "learning_rate": 1.8318889281924298e-07, "loss": 0.2054, "step": 5660 }, { "epoch": 2.05, "learning_rate": 1.8292359391581182e-07, "loss": 0.2483, "step": 5670 }, { "epoch": 2.05, "learning_rate": 1.826582950123806e-07, "loss": 0.3451, "step": 5680 }, { "epoch": 2.05, "learning_rate": 1.823929961089494e-07, "loss": 0.3117, "step": 5690 }, { "epoch": 2.05, "learning_rate": 1.821276972055182e-07, "loss": 0.2829, "step": 5700 }, { "epoch": 2.05, "learning_rate": 1.81862398302087e-07, "loss": 0.3713, "step": 5710 }, { "epoch": 2.06, "learning_rate": 1.815970993986558e-07, "loss": 0.2603, "step": 5720 }, { "epoch": 2.06, "learning_rate": 1.8133180049522463e-07, "loss": 0.2378, "step": 5730 }, { "epoch": 2.06, "learning_rate": 1.8106650159179342e-07, "loss": 0.4374, "step": 5740 }, { "epoch": 2.06, "learning_rate": 1.808012026883622e-07, "loss": 0.4291, "step": 5750 }, { "epoch": 2.06, "learning_rate": 1.8053590378493102e-07, "loss": 0.4139, "step": 5760 }, { "epoch": 2.06, "learning_rate": 1.802706048814998e-07, "loss": 0.385, "step": 5770 }, { "epoch": 2.06, "learning_rate": 1.800053059780686e-07, "loss": 0.3487, "step": 5780 }, { "epoch": 2.06, "learning_rate": 1.797400070746374e-07, "loss": 0.316, "step": 5790 }, { "epoch": 2.06, "learning_rate": 1.794747081712062e-07, "loss": 0.3861, "step": 5800 }, { "epoch": 2.06, "learning_rate": 1.7920940926777502e-07, "loss": 0.3524, "step": 5810 }, { "epoch": 2.06, "learning_rate": 1.7894411036434383e-07, "loss": 0.2247, "step": 5820 }, { "epoch": 2.06, "learning_rate": 1.7867881146091262e-07, "loss": 0.3113, "step": 5830 }, { "epoch": 2.06, "learning_rate": 1.784135125574814e-07, "loss": 0.5685, "step": 5840 }, { "epoch": 2.07, "learning_rate": 1.7814821365405022e-07, "loss": 0.4044, "step": 5850 }, { "epoch": 2.07, "learning_rate": 1.77882914750619e-07, "loss": 0.3159, "step": 5860 }, { "epoch": 2.07, "learning_rate": 1.776176158471878e-07, "loss": 0.372, "step": 5870 }, { "epoch": 2.07, "learning_rate": 1.7735231694375664e-07, "loss": 0.2586, "step": 5880 }, { "epoch": 2.07, "learning_rate": 1.7708701804032543e-07, "loss": 0.3496, "step": 5890 }, { "epoch": 2.07, "learning_rate": 1.7682171913689422e-07, "loss": 0.3252, "step": 5900 }, { "epoch": 2.07, "learning_rate": 1.7655642023346303e-07, "loss": 0.2223, "step": 5910 }, { "epoch": 2.07, "learning_rate": 1.7629112133003182e-07, "loss": 0.3757, "step": 5920 }, { "epoch": 2.07, "learning_rate": 1.7602582242660063e-07, "loss": 0.3672, "step": 5930 }, { "epoch": 2.07, "learning_rate": 1.7576052352316945e-07, "loss": 0.2711, "step": 5940 }, { "epoch": 2.07, "learning_rate": 1.7549522461973824e-07, "loss": 0.3355, "step": 5950 }, { "epoch": 2.07, "learning_rate": 1.7522992571630705e-07, "loss": 0.3543, "step": 5960 }, { "epoch": 2.07, "learning_rate": 1.7496462681287584e-07, "loss": 0.3793, "step": 5970 }, { "epoch": 2.08, "learning_rate": 1.7469932790944463e-07, "loss": 0.5126, "step": 5980 }, { "epoch": 2.08, "learning_rate": 1.7443402900601344e-07, "loss": 0.2448, "step": 5990 }, { "epoch": 2.08, "learning_rate": 1.7416873010258223e-07, "loss": 0.2939, "step": 6000 }, { "epoch": 2.08, "learning_rate": 1.7390343119915102e-07, "loss": 0.246, "step": 6010 }, { "epoch": 2.08, "learning_rate": 1.7363813229571986e-07, "loss": 0.4639, "step": 6020 }, { "epoch": 2.08, "learning_rate": 1.7337283339228865e-07, "loss": 0.3554, "step": 6030 }, { "epoch": 2.08, "learning_rate": 1.7310753448885744e-07, "loss": 0.5248, "step": 6040 }, { "epoch": 2.08, "learning_rate": 1.7284223558542625e-07, "loss": 0.2787, "step": 6050 }, { "epoch": 2.08, "learning_rate": 1.7257693668199504e-07, "loss": 0.3388, "step": 6060 }, { "epoch": 2.08, "learning_rate": 1.7231163777856383e-07, "loss": 0.306, "step": 6070 }, { "epoch": 2.08, "learning_rate": 1.7204633887513267e-07, "loss": 0.3312, "step": 6080 }, { "epoch": 2.08, "learning_rate": 1.7178103997170146e-07, "loss": 0.3058, "step": 6090 }, { "epoch": 2.09, "learning_rate": 1.7151574106827025e-07, "loss": 0.6773, "step": 6100 }, { "epoch": 2.09, "learning_rate": 1.7125044216483906e-07, "loss": 0.2879, "step": 6110 }, { "epoch": 2.09, "learning_rate": 1.7098514326140785e-07, "loss": 0.3534, "step": 6120 }, { "epoch": 2.09, "learning_rate": 1.7071984435797664e-07, "loss": 0.3959, "step": 6130 }, { "epoch": 2.09, "learning_rate": 1.7045454545454545e-07, "loss": 0.4433, "step": 6140 }, { "epoch": 2.09, "learning_rate": 1.7018924655111424e-07, "loss": 0.3306, "step": 6150 }, { "epoch": 2.09, "learning_rate": 1.6992394764768303e-07, "loss": 0.2876, "step": 6160 }, { "epoch": 2.09, "learning_rate": 1.6965864874425187e-07, "loss": 0.2279, "step": 6170 }, { "epoch": 2.09, "learning_rate": 1.6939334984082066e-07, "loss": 0.47, "step": 6180 }, { "epoch": 2.09, "learning_rate": 1.6912805093738945e-07, "loss": 0.5794, "step": 6190 }, { "epoch": 2.09, "learning_rate": 1.6886275203395826e-07, "loss": 0.3706, "step": 6200 }, { "epoch": 2.09, "learning_rate": 1.6859745313052705e-07, "loss": 0.3674, "step": 6210 }, { "epoch": 2.09, "learning_rate": 1.6833215422709584e-07, "loss": 0.4053, "step": 6220 }, { "epoch": 2.1, "learning_rate": 1.6806685532366468e-07, "loss": 0.3724, "step": 6230 }, { "epoch": 2.1, "learning_rate": 1.6780155642023347e-07, "loss": 0.4831, "step": 6240 }, { "epoch": 2.1, "learning_rate": 1.6753625751680225e-07, "loss": 0.1894, "step": 6250 }, { "epoch": 2.1, "learning_rate": 1.6727095861337107e-07, "loss": 0.4633, "step": 6260 }, { "epoch": 2.1, "learning_rate": 1.6700565970993986e-07, "loss": 0.2882, "step": 6270 }, { "epoch": 2.1, "learning_rate": 1.6674036080650865e-07, "loss": 0.4873, "step": 6280 }, { "epoch": 2.1, "learning_rate": 1.6647506190307746e-07, "loss": 0.4315, "step": 6290 }, { "epoch": 2.1, "learning_rate": 1.6620976299964628e-07, "loss": 0.4249, "step": 6300 }, { "epoch": 2.1, "learning_rate": 1.6594446409621506e-07, "loss": 0.3353, "step": 6310 }, { "epoch": 2.1, "learning_rate": 1.6567916519278388e-07, "loss": 0.7332, "step": 6320 }, { "epoch": 2.1, "learning_rate": 1.6541386628935267e-07, "loss": 0.1961, "step": 6330 }, { "epoch": 2.1, "learning_rate": 1.6514856738592146e-07, "loss": 0.3474, "step": 6340 }, { "epoch": 2.11, "learning_rate": 1.6488326848249027e-07, "loss": 0.2356, "step": 6350 }, { "epoch": 2.11, "learning_rate": 1.6461796957905906e-07, "loss": 0.3085, "step": 6360 }, { "epoch": 2.11, "learning_rate": 1.6435267067562785e-07, "loss": 0.3192, "step": 6370 }, { "epoch": 2.11, "learning_rate": 1.640873717721967e-07, "loss": 0.2222, "step": 6380 }, { "epoch": 2.11, "learning_rate": 1.6382207286876548e-07, "loss": 0.4065, "step": 6390 }, { "epoch": 2.11, "learning_rate": 1.6355677396533426e-07, "loss": 0.4045, "step": 6400 }, { "epoch": 2.11, "learning_rate": 1.6329147506190308e-07, "loss": 0.3183, "step": 6410 }, { "epoch": 2.11, "learning_rate": 1.6302617615847187e-07, "loss": 0.1826, "step": 6420 }, { "epoch": 2.11, "learning_rate": 1.6276087725504066e-07, "loss": 0.3059, "step": 6430 }, { "epoch": 2.11, "learning_rate": 1.624955783516095e-07, "loss": 0.2374, "step": 6440 }, { "epoch": 2.11, "learning_rate": 1.6223027944817828e-07, "loss": 0.3193, "step": 6450 }, { "epoch": 2.11, "learning_rate": 1.6196498054474707e-07, "loss": 0.2761, "step": 6460 }, { "epoch": 2.11, "learning_rate": 1.616996816413159e-07, "loss": 0.3633, "step": 6470 }, { "epoch": 2.12, "learning_rate": 1.6143438273788468e-07, "loss": 0.3956, "step": 6480 }, { "epoch": 2.12, "learning_rate": 1.6116908383445346e-07, "loss": 0.2156, "step": 6490 }, { "epoch": 2.12, "learning_rate": 1.6090378493102228e-07, "loss": 0.3898, "step": 6500 }, { "epoch": 2.12, "learning_rate": 1.6063848602759107e-07, "loss": 0.2793, "step": 6510 }, { "epoch": 2.12, "learning_rate": 1.6037318712415988e-07, "loss": 0.3232, "step": 6520 }, { "epoch": 2.12, "learning_rate": 1.601078882207287e-07, "loss": 0.2486, "step": 6530 }, { "epoch": 2.12, "learning_rate": 1.5984258931729749e-07, "loss": 0.447, "step": 6540 }, { "epoch": 2.12, "learning_rate": 1.5957729041386627e-07, "loss": 0.368, "step": 6550 }, { "epoch": 2.12, "learning_rate": 1.593119915104351e-07, "loss": 0.3826, "step": 6560 }, { "epoch": 2.12, "learning_rate": 1.5904669260700388e-07, "loss": 0.2456, "step": 6570 }, { "epoch": 2.12, "learning_rate": 1.5878139370357266e-07, "loss": 0.3172, "step": 6580 }, { "epoch": 2.12, "learning_rate": 1.585160948001415e-07, "loss": 0.2154, "step": 6590 }, { "epoch": 2.13, "learning_rate": 1.582507958967103e-07, "loss": 0.3329, "step": 6600 }, { "epoch": 2.13, "learning_rate": 1.5798549699327908e-07, "loss": 0.266, "step": 6610 }, { "epoch": 2.13, "learning_rate": 1.577201980898479e-07, "loss": 0.3691, "step": 6620 }, { "epoch": 2.13, "learning_rate": 1.5745489918641669e-07, "loss": 0.2765, "step": 6630 }, { "epoch": 2.13, "learning_rate": 1.5718960028298547e-07, "loss": 0.2228, "step": 6640 }, { "epoch": 2.13, "learning_rate": 1.5692430137955432e-07, "loss": 0.1927, "step": 6650 }, { "epoch": 2.13, "learning_rate": 1.566590024761231e-07, "loss": 0.4745, "step": 6660 }, { "epoch": 2.13, "learning_rate": 1.563937035726919e-07, "loss": 0.1534, "step": 6670 }, { "epoch": 2.13, "learning_rate": 1.561284046692607e-07, "loss": 0.1242, "step": 6680 }, { "epoch": 2.13, "learning_rate": 1.558631057658295e-07, "loss": 0.4647, "step": 6690 }, { "epoch": 2.13, "learning_rate": 1.5559780686239828e-07, "loss": 0.5586, "step": 6700 }, { "epoch": 2.13, "learning_rate": 1.553325079589671e-07, "loss": 0.4222, "step": 6710 }, { "epoch": 2.13, "learning_rate": 1.5506720905553589e-07, "loss": 0.206, "step": 6720 }, { "epoch": 2.14, "learning_rate": 1.5480191015210467e-07, "loss": 0.3112, "step": 6730 }, { "epoch": 2.14, "learning_rate": 1.5453661124867352e-07, "loss": 0.4122, "step": 6740 }, { "epoch": 2.14, "learning_rate": 1.542713123452423e-07, "loss": 0.3272, "step": 6750 }, { "epoch": 2.14, "learning_rate": 1.540060134418111e-07, "loss": 0.3635, "step": 6760 }, { "epoch": 2.14, "learning_rate": 1.537407145383799e-07, "loss": 0.5525, "step": 6770 }, { "epoch": 2.14, "learning_rate": 1.534754156349487e-07, "loss": 0.2226, "step": 6780 }, { "epoch": 2.14, "learning_rate": 1.5321011673151748e-07, "loss": 0.1621, "step": 6790 }, { "epoch": 2.14, "learning_rate": 1.5294481782808632e-07, "loss": 0.5783, "step": 6800 }, { "epoch": 2.14, "learning_rate": 1.526795189246551e-07, "loss": 0.3406, "step": 6810 }, { "epoch": 2.14, "learning_rate": 1.524142200212239e-07, "loss": 0.2203, "step": 6820 }, { "epoch": 2.14, "learning_rate": 1.5214892111779272e-07, "loss": 0.4043, "step": 6830 }, { "epoch": 2.14, "learning_rate": 1.518836222143615e-07, "loss": 0.3353, "step": 6840 }, { "epoch": 2.15, "learning_rate": 1.516183233109303e-07, "loss": 0.1902, "step": 6850 }, { "epoch": 2.15, "learning_rate": 1.513530244074991e-07, "loss": 0.3804, "step": 6860 }, { "epoch": 2.15, "learning_rate": 1.5108772550406792e-07, "loss": 0.3891, "step": 6870 }, { "epoch": 2.15, "learning_rate": 1.508224266006367e-07, "loss": 0.2982, "step": 6880 }, { "epoch": 2.15, "learning_rate": 1.5055712769720552e-07, "loss": 0.319, "step": 6890 }, { "epoch": 2.15, "learning_rate": 1.502918287937743e-07, "loss": 0.3148, "step": 6900 }, { "epoch": 2.15, "learning_rate": 1.500265298903431e-07, "loss": 0.3978, "step": 6910 }, { "epoch": 2.15, "learning_rate": 1.4976123098691192e-07, "loss": 0.2571, "step": 6920 }, { "epoch": 2.15, "learning_rate": 1.494959320834807e-07, "loss": 0.4223, "step": 6930 }, { "epoch": 2.15, "learning_rate": 1.4923063318004952e-07, "loss": 0.4157, "step": 6940 }, { "epoch": 2.15, "learning_rate": 1.489653342766183e-07, "loss": 0.4624, "step": 6950 }, { "epoch": 2.15, "learning_rate": 1.4870003537318712e-07, "loss": 0.2294, "step": 6960 }, { "epoch": 2.15, "learning_rate": 1.484347364697559e-07, "loss": 0.2723, "step": 6970 }, { "epoch": 2.16, "learning_rate": 1.4816943756632472e-07, "loss": 0.3129, "step": 6980 }, { "epoch": 2.16, "learning_rate": 1.479041386628935e-07, "loss": 0.2511, "step": 6990 }, { "epoch": 2.16, "learning_rate": 1.4763883975946233e-07, "loss": 0.4619, "step": 7000 }, { "epoch": 2.16, "learning_rate": 1.4737354085603112e-07, "loss": 0.3391, "step": 7010 }, { "epoch": 2.16, "learning_rate": 1.4710824195259993e-07, "loss": 0.3051, "step": 7020 }, { "epoch": 2.16, "learning_rate": 1.4684294304916872e-07, "loss": 0.2132, "step": 7030 }, { "epoch": 2.16, "learning_rate": 1.465776441457375e-07, "loss": 0.3529, "step": 7040 }, { "epoch": 2.16, "learning_rate": 1.4631234524230632e-07, "loss": 0.411, "step": 7050 }, { "epoch": 2.16, "learning_rate": 1.4604704633887514e-07, "loss": 0.5373, "step": 7060 }, { "epoch": 2.16, "learning_rate": 1.4578174743544393e-07, "loss": 0.2255, "step": 7070 }, { "epoch": 2.16, "learning_rate": 1.4551644853201271e-07, "loss": 0.3849, "step": 7080 }, { "epoch": 2.16, "learning_rate": 1.4525114962858153e-07, "loss": 0.3697, "step": 7090 }, { "epoch": 2.16, "learning_rate": 1.4498585072515032e-07, "loss": 0.3, "step": 7100 }, { "epoch": 2.17, "learning_rate": 1.4472055182171913e-07, "loss": 0.2502, "step": 7110 }, { "epoch": 2.17, "learning_rate": 1.4445525291828795e-07, "loss": 0.2659, "step": 7120 }, { "epoch": 2.17, "learning_rate": 1.4418995401485673e-07, "loss": 0.2379, "step": 7130 }, { "epoch": 2.17, "learning_rate": 1.4392465511142552e-07, "loss": 0.3465, "step": 7140 }, { "epoch": 2.17, "learning_rate": 1.4365935620799434e-07, "loss": 0.2112, "step": 7150 }, { "epoch": 2.17, "learning_rate": 1.4339405730456313e-07, "loss": 0.2263, "step": 7160 }, { "epoch": 2.17, "learning_rate": 1.4312875840113194e-07, "loss": 0.2231, "step": 7170 }, { "epoch": 2.17, "learning_rate": 1.4286345949770073e-07, "loss": 0.3545, "step": 7180 }, { "epoch": 2.17, "learning_rate": 1.4259816059426954e-07, "loss": 0.3163, "step": 7190 }, { "epoch": 2.17, "learning_rate": 1.4233286169083833e-07, "loss": 0.4264, "step": 7200 }, { "epoch": 2.17, "learning_rate": 1.4206756278740715e-07, "loss": 0.1768, "step": 7210 }, { "epoch": 2.17, "learning_rate": 1.4180226388397596e-07, "loss": 0.3796, "step": 7220 }, { "epoch": 2.18, "learning_rate": 1.4153696498054475e-07, "loss": 0.3193, "step": 7230 }, { "epoch": 2.18, "learning_rate": 1.4127166607711354e-07, "loss": 0.531, "step": 7240 }, { "epoch": 2.18, "learning_rate": 1.4100636717368235e-07, "loss": 0.2121, "step": 7250 }, { "epoch": 2.18, "learning_rate": 1.4074106827025114e-07, "loss": 0.3917, "step": 7260 }, { "epoch": 2.18, "learning_rate": 1.4047576936681996e-07, "loss": 0.0989, "step": 7270 }, { "epoch": 2.18, "learning_rate": 1.4021047046338874e-07, "loss": 0.5345, "step": 7280 }, { "epoch": 2.18, "learning_rate": 1.3994517155995753e-07, "loss": 0.1947, "step": 7290 }, { "epoch": 2.18, "learning_rate": 1.3967987265652635e-07, "loss": 0.3083, "step": 7300 }, { "epoch": 2.18, "learning_rate": 1.3941457375309516e-07, "loss": 0.4358, "step": 7310 }, { "epoch": 2.18, "learning_rate": 1.3914927484966395e-07, "loss": 0.1397, "step": 7320 }, { "epoch": 2.18, "learning_rate": 1.3888397594623274e-07, "loss": 0.3461, "step": 7330 }, { "epoch": 2.18, "learning_rate": 1.3861867704280155e-07, "loss": 0.237, "step": 7340 }, { "epoch": 2.18, "learning_rate": 1.3835337813937034e-07, "loss": 0.3058, "step": 7350 }, { "epoch": 2.19, "learning_rate": 1.3808807923593916e-07, "loss": 0.2037, "step": 7360 }, { "epoch": 2.19, "learning_rate": 1.3782278033250797e-07, "loss": 0.3189, "step": 7370 }, { "epoch": 2.19, "learning_rate": 1.3755748142907676e-07, "loss": 0.2856, "step": 7380 }, { "epoch": 2.19, "learning_rate": 1.3729218252564555e-07, "loss": 0.3189, "step": 7390 }, { "epoch": 2.19, "learning_rate": 1.3702688362221436e-07, "loss": 0.27, "step": 7400 }, { "epoch": 2.19, "learning_rate": 1.3676158471878315e-07, "loss": 0.3278, "step": 7410 }, { "epoch": 2.19, "learning_rate": 1.3649628581535196e-07, "loss": 0.5432, "step": 7420 }, { "epoch": 2.19, "learning_rate": 1.3623098691192075e-07, "loss": 0.3994, "step": 7430 }, { "epoch": 2.19, "learning_rate": 1.3596568800848954e-07, "loss": 0.1706, "step": 7440 }, { "epoch": 2.19, "learning_rate": 1.3570038910505836e-07, "loss": 0.2554, "step": 7450 }, { "epoch": 2.19, "learning_rate": 1.3543509020162717e-07, "loss": 0.3261, "step": 7460 }, { "epoch": 2.19, "learning_rate": 1.3516979129819596e-07, "loss": 0.4152, "step": 7470 }, { "epoch": 2.2, "learning_rate": 1.3490449239476477e-07, "loss": 0.4484, "step": 7480 }, { "epoch": 2.2, "learning_rate": 1.3463919349133356e-07, "loss": 0.5551, "step": 7490 }, { "epoch": 2.2, "learning_rate": 1.3437389458790235e-07, "loss": 0.5414, "step": 7500 }, { "epoch": 2.2, "learning_rate": 1.3410859568447116e-07, "loss": 0.3728, "step": 7510 }, { "epoch": 2.2, "learning_rate": 1.3384329678103998e-07, "loss": 0.576, "step": 7520 }, { "epoch": 2.2, "learning_rate": 1.3357799787760877e-07, "loss": 0.5139, "step": 7530 }, { "epoch": 2.2, "learning_rate": 1.3331269897417756e-07, "loss": 0.2673, "step": 7540 }, { "epoch": 2.2, "eval_accuracy": 0.7992943943551548, "eval_loss": 0.4343836009502411, "eval_runtime": 667.7031, "eval_samples_per_second": 3.821, "eval_steps_per_second": 0.956, "step": 7542 }, { "epoch": 3.0, "learning_rate": 1.3304740007074637e-07, "loss": 0.3858, "step": 7550 }, { "epoch": 3.0, "learning_rate": 1.3278210116731516e-07, "loss": 0.4057, "step": 7560 }, { "epoch": 3.0, "learning_rate": 1.3251680226388397e-07, "loss": 0.2731, "step": 7570 }, { "epoch": 3.0, "learning_rate": 1.322515033604528e-07, "loss": 0.3348, "step": 7580 }, { "epoch": 3.0, "learning_rate": 1.3198620445702158e-07, "loss": 0.1794, "step": 7590 }, { "epoch": 3.0, "learning_rate": 1.3172090555359037e-07, "loss": 0.2986, "step": 7600 }, { "epoch": 3.01, "learning_rate": 1.3145560665015918e-07, "loss": 0.3266, "step": 7610 }, { "epoch": 3.01, "learning_rate": 1.3119030774672797e-07, "loss": 0.3126, "step": 7620 }, { "epoch": 3.01, "learning_rate": 1.3092500884329678e-07, "loss": 0.3153, "step": 7630 }, { "epoch": 3.01, "learning_rate": 1.3065970993986557e-07, "loss": 0.3477, "step": 7640 }, { "epoch": 3.01, "learning_rate": 1.3039441103643436e-07, "loss": 0.3177, "step": 7650 }, { "epoch": 3.01, "learning_rate": 1.3012911213300317e-07, "loss": 0.2935, "step": 7660 }, { "epoch": 3.01, "learning_rate": 1.29863813229572e-07, "loss": 0.3827, "step": 7670 }, { "epoch": 3.01, "learning_rate": 1.2959851432614078e-07, "loss": 0.3086, "step": 7680 }, { "epoch": 3.01, "learning_rate": 1.293332154227096e-07, "loss": 0.2633, "step": 7690 }, { "epoch": 3.01, "learning_rate": 1.2906791651927838e-07, "loss": 0.2911, "step": 7700 }, { "epoch": 3.01, "learning_rate": 1.2880261761584717e-07, "loss": 0.3615, "step": 7710 }, { "epoch": 3.01, "learning_rate": 1.2853731871241598e-07, "loss": 0.4242, "step": 7720 }, { "epoch": 3.01, "learning_rate": 1.282720198089848e-07, "loss": 0.2024, "step": 7730 }, { "epoch": 3.02, "learning_rate": 1.2800672090555359e-07, "loss": 0.4539, "step": 7740 }, { "epoch": 3.02, "learning_rate": 1.2774142200212237e-07, "loss": 0.2429, "step": 7750 }, { "epoch": 3.02, "learning_rate": 1.274761230986912e-07, "loss": 0.2325, "step": 7760 }, { "epoch": 3.02, "learning_rate": 1.2721082419525998e-07, "loss": 0.4162, "step": 7770 }, { "epoch": 3.02, "learning_rate": 1.269455252918288e-07, "loss": 0.3089, "step": 7780 }, { "epoch": 3.02, "learning_rate": 1.2668022638839758e-07, "loss": 0.4546, "step": 7790 }, { "epoch": 3.02, "learning_rate": 1.264149274849664e-07, "loss": 0.2543, "step": 7800 }, { "epoch": 3.02, "learning_rate": 1.2614962858153518e-07, "loss": 0.377, "step": 7810 }, { "epoch": 3.02, "learning_rate": 1.25884329678104e-07, "loss": 0.3386, "step": 7820 }, { "epoch": 3.02, "learning_rate": 1.2561903077467279e-07, "loss": 0.3492, "step": 7830 }, { "epoch": 3.02, "learning_rate": 1.253537318712416e-07, "loss": 0.275, "step": 7840 }, { "epoch": 3.02, "learning_rate": 1.250884329678104e-07, "loss": 0.3005, "step": 7850 }, { "epoch": 3.03, "learning_rate": 1.2482313406437918e-07, "loss": 0.301, "step": 7860 }, { "epoch": 3.03, "learning_rate": 1.24557835160948e-07, "loss": 0.3116, "step": 7870 }, { "epoch": 3.03, "learning_rate": 1.242925362575168e-07, "loss": 0.6061, "step": 7880 }, { "epoch": 3.03, "learning_rate": 1.240272373540856e-07, "loss": 0.2186, "step": 7890 }, { "epoch": 3.03, "learning_rate": 1.2376193845065438e-07, "loss": 0.2526, "step": 7900 }, { "epoch": 3.03, "learning_rate": 1.234966395472232e-07, "loss": 0.3165, "step": 7910 }, { "epoch": 3.03, "learning_rate": 1.2323134064379199e-07, "loss": 0.1801, "step": 7920 }, { "epoch": 3.03, "learning_rate": 1.229660417403608e-07, "loss": 0.4456, "step": 7930 }, { "epoch": 3.03, "learning_rate": 1.2270074283692962e-07, "loss": 0.1817, "step": 7940 }, { "epoch": 3.03, "learning_rate": 1.224354439334984e-07, "loss": 0.3027, "step": 7950 }, { "epoch": 3.03, "learning_rate": 1.221701450300672e-07, "loss": 0.2641, "step": 7960 }, { "epoch": 3.03, "learning_rate": 1.21904846126636e-07, "loss": 0.1637, "step": 7970 }, { "epoch": 3.03, "learning_rate": 1.216395472232048e-07, "loss": 0.4759, "step": 7980 }, { "epoch": 3.04, "learning_rate": 1.213742483197736e-07, "loss": 0.2518, "step": 7990 }, { "epoch": 3.04, "learning_rate": 1.211089494163424e-07, "loss": 0.3353, "step": 8000 }, { "epoch": 3.04, "learning_rate": 1.2084365051291121e-07, "loss": 0.2325, "step": 8010 }, { "epoch": 3.04, "learning_rate": 1.2057835160948e-07, "loss": 0.2963, "step": 8020 }, { "epoch": 3.04, "learning_rate": 1.2031305270604882e-07, "loss": 0.3176, "step": 8030 }, { "epoch": 3.04, "learning_rate": 1.2004775380261763e-07, "loss": 0.2076, "step": 8040 }, { "epoch": 3.04, "learning_rate": 1.1978245489918642e-07, "loss": 0.3213, "step": 8050 }, { "epoch": 3.04, "learning_rate": 1.195171559957552e-07, "loss": 0.2378, "step": 8060 }, { "epoch": 3.04, "learning_rate": 1.1925185709232402e-07, "loss": 0.1516, "step": 8070 }, { "epoch": 3.04, "learning_rate": 1.1898655818889281e-07, "loss": 0.2366, "step": 8080 }, { "epoch": 3.04, "learning_rate": 1.1872125928546161e-07, "loss": 0.4461, "step": 8090 }, { "epoch": 3.04, "learning_rate": 1.1845596038203043e-07, "loss": 0.402, "step": 8100 }, { "epoch": 3.05, "learning_rate": 1.1819066147859922e-07, "loss": 0.477, "step": 8110 }, { "epoch": 3.05, "learning_rate": 1.1792536257516802e-07, "loss": 0.331, "step": 8120 }, { "epoch": 3.05, "learning_rate": 1.1766006367173683e-07, "loss": 0.1779, "step": 8130 }, { "epoch": 3.05, "learning_rate": 1.1739476476830562e-07, "loss": 0.2126, "step": 8140 }, { "epoch": 3.05, "learning_rate": 1.1712946586487442e-07, "loss": 0.3229, "step": 8150 }, { "epoch": 3.05, "learning_rate": 1.1686416696144322e-07, "loss": 0.3967, "step": 8160 }, { "epoch": 3.05, "learning_rate": 1.1659886805801201e-07, "loss": 0.12, "step": 8170 }, { "epoch": 3.05, "learning_rate": 1.1633356915458083e-07, "loss": 0.2832, "step": 8180 }, { "epoch": 3.05, "learning_rate": 1.1606827025114963e-07, "loss": 0.2127, "step": 8190 }, { "epoch": 3.05, "learning_rate": 1.1580297134771842e-07, "loss": 0.2145, "step": 8200 }, { "epoch": 3.05, "learning_rate": 1.1553767244428723e-07, "loss": 0.2717, "step": 8210 }, { "epoch": 3.05, "learning_rate": 1.1527237354085603e-07, "loss": 0.2418, "step": 8220 }, { "epoch": 3.05, "learning_rate": 1.1500707463742482e-07, "loss": 0.2614, "step": 8230 }, { "epoch": 3.06, "learning_rate": 1.1474177573399363e-07, "loss": 0.1934, "step": 8240 }, { "epoch": 3.06, "learning_rate": 1.1447647683056244e-07, "loss": 0.4718, "step": 8250 }, { "epoch": 3.06, "learning_rate": 1.1421117792713122e-07, "loss": 0.2791, "step": 8260 }, { "epoch": 3.06, "learning_rate": 1.1394587902370003e-07, "loss": 0.3744, "step": 8270 }, { "epoch": 3.06, "learning_rate": 1.1368058012026884e-07, "loss": 0.4266, "step": 8280 }, { "epoch": 3.06, "learning_rate": 1.1341528121683763e-07, "loss": 0.2921, "step": 8290 }, { "epoch": 3.06, "learning_rate": 1.1314998231340643e-07, "loss": 0.3229, "step": 8300 }, { "epoch": 3.06, "learning_rate": 1.1288468340997525e-07, "loss": 0.3329, "step": 8310 }, { "epoch": 3.06, "learning_rate": 1.1261938450654403e-07, "loss": 0.1501, "step": 8320 }, { "epoch": 3.06, "learning_rate": 1.1235408560311284e-07, "loss": 0.3098, "step": 8330 }, { "epoch": 3.06, "learning_rate": 1.1208878669968164e-07, "loss": 0.3795, "step": 8340 }, { "epoch": 3.06, "learning_rate": 1.1182348779625044e-07, "loss": 0.2139, "step": 8350 }, { "epoch": 3.07, "learning_rate": 1.1155818889281924e-07, "loss": 0.2172, "step": 8360 }, { "epoch": 3.07, "learning_rate": 1.1129288998938804e-07, "loss": 0.4199, "step": 8370 }, { "epoch": 3.07, "learning_rate": 1.1102759108595683e-07, "loss": 0.1432, "step": 8380 }, { "epoch": 3.07, "learning_rate": 1.1076229218252564e-07, "loss": 0.2769, "step": 8390 }, { "epoch": 3.07, "learning_rate": 1.1049699327909445e-07, "loss": 0.4188, "step": 8400 }, { "epoch": 3.07, "learning_rate": 1.1023169437566323e-07, "loss": 0.5459, "step": 8410 }, { "epoch": 3.07, "learning_rate": 1.0996639547223205e-07, "loss": 0.5241, "step": 8420 }, { "epoch": 3.07, "learning_rate": 1.0970109656880085e-07, "loss": 0.573, "step": 8430 }, { "epoch": 3.07, "learning_rate": 1.0943579766536964e-07, "loss": 0.2892, "step": 8440 }, { "epoch": 3.07, "learning_rate": 1.0917049876193844e-07, "loss": 0.2946, "step": 8450 }, { "epoch": 3.07, "learning_rate": 1.0890519985850725e-07, "loss": 0.2701, "step": 8460 }, { "epoch": 3.07, "learning_rate": 1.0863990095507604e-07, "loss": 0.3341, "step": 8470 }, { "epoch": 3.07, "learning_rate": 1.0837460205164484e-07, "loss": 0.347, "step": 8480 }, { "epoch": 3.08, "learning_rate": 1.0810930314821366e-07, "loss": 0.3488, "step": 8490 }, { "epoch": 3.08, "learning_rate": 1.0784400424478245e-07, "loss": 0.2516, "step": 8500 }, { "epoch": 3.08, "learning_rate": 1.0757870534135125e-07, "loss": 0.4856, "step": 8510 }, { "epoch": 3.08, "learning_rate": 1.0731340643792005e-07, "loss": 0.3262, "step": 8520 }, { "epoch": 3.08, "learning_rate": 1.0704810753448885e-07, "loss": 0.4792, "step": 8530 }, { "epoch": 3.08, "learning_rate": 1.0678280863105765e-07, "loss": 0.3977, "step": 8540 }, { "epoch": 3.08, "learning_rate": 1.0651750972762645e-07, "loss": 0.247, "step": 8550 }, { "epoch": 3.08, "learning_rate": 1.0625221082419524e-07, "loss": 0.5944, "step": 8560 }, { "epoch": 3.08, "learning_rate": 1.0598691192076406e-07, "loss": 0.4538, "step": 8570 }, { "epoch": 3.08, "learning_rate": 1.0572161301733286e-07, "loss": 0.322, "step": 8580 }, { "epoch": 3.08, "learning_rate": 1.0545631411390165e-07, "loss": 0.8246, "step": 8590 }, { "epoch": 3.08, "learning_rate": 1.0519101521047046e-07, "loss": 0.2921, "step": 8600 }, { "epoch": 3.08, "learning_rate": 1.0492571630703926e-07, "loss": 0.2552, "step": 8610 }, { "epoch": 3.09, "learning_rate": 1.0466041740360805e-07, "loss": 0.2207, "step": 8620 }, { "epoch": 3.09, "learning_rate": 1.0439511850017685e-07, "loss": 0.136, "step": 8630 }, { "epoch": 3.09, "learning_rate": 1.0412981959674567e-07, "loss": 0.5574, "step": 8640 }, { "epoch": 3.09, "learning_rate": 1.0386452069331446e-07, "loss": 0.2432, "step": 8650 }, { "epoch": 3.09, "learning_rate": 1.0359922178988326e-07, "loss": 0.4907, "step": 8660 }, { "epoch": 3.09, "learning_rate": 1.0333392288645207e-07, "loss": 0.2984, "step": 8670 }, { "epoch": 3.09, "learning_rate": 1.0306862398302086e-07, "loss": 0.3477, "step": 8680 }, { "epoch": 3.09, "learning_rate": 1.0280332507958966e-07, "loss": 0.4005, "step": 8690 }, { "epoch": 3.09, "learning_rate": 1.0253802617615848e-07, "loss": 0.4083, "step": 8700 }, { "epoch": 3.09, "learning_rate": 1.0227272727272727e-07, "loss": 0.3129, "step": 8710 }, { "epoch": 3.09, "learning_rate": 1.0200742836929607e-07, "loss": 0.4127, "step": 8720 }, { "epoch": 3.09, "learning_rate": 1.0174212946586487e-07, "loss": 0.1239, "step": 8730 }, { "epoch": 3.1, "learning_rate": 1.0147683056243366e-07, "loss": 0.1863, "step": 8740 }, { "epoch": 3.1, "learning_rate": 1.0121153165900247e-07, "loss": 0.3504, "step": 8750 }, { "epoch": 3.1, "learning_rate": 1.0094623275557127e-07, "loss": 0.3059, "step": 8760 }, { "epoch": 3.1, "learning_rate": 1.0068093385214006e-07, "loss": 0.1524, "step": 8770 }, { "epoch": 3.1, "learning_rate": 1.0041563494870888e-07, "loss": 0.38, "step": 8780 }, { "epoch": 3.1, "learning_rate": 1.0015033604527768e-07, "loss": 0.3079, "step": 8790 }, { "epoch": 3.1, "learning_rate": 9.988503714184647e-08, "loss": 0.3288, "step": 8800 }, { "epoch": 3.1, "learning_rate": 9.961973823841527e-08, "loss": 0.554, "step": 8810 }, { "epoch": 3.1, "learning_rate": 9.935443933498408e-08, "loss": 0.2589, "step": 8820 }, { "epoch": 3.1, "learning_rate": 9.908914043155288e-08, "loss": 0.2241, "step": 8830 }, { "epoch": 3.1, "learning_rate": 9.882384152812167e-08, "loss": 0.291, "step": 8840 }, { "epoch": 3.1, "learning_rate": 9.855854262469049e-08, "loss": 0.1931, "step": 8850 }, { "epoch": 3.1, "learning_rate": 9.829324372125929e-08, "loss": 0.2891, "step": 8860 }, { "epoch": 3.11, "learning_rate": 9.802794481782808e-08, "loss": 0.301, "step": 8870 }, { "epoch": 3.11, "learning_rate": 9.776264591439689e-08, "loss": 0.1596, "step": 8880 }, { "epoch": 3.11, "learning_rate": 9.749734701096569e-08, "loss": 0.261, "step": 8890 }, { "epoch": 3.11, "learning_rate": 9.723204810753448e-08, "loss": 0.2761, "step": 8900 }, { "epoch": 3.11, "learning_rate": 9.696674920410328e-08, "loss": 0.2916, "step": 8910 }, { "epoch": 3.11, "learning_rate": 9.67014503006721e-08, "loss": 0.4751, "step": 8920 }, { "epoch": 3.11, "learning_rate": 9.643615139724089e-08, "loss": 0.2561, "step": 8930 }, { "epoch": 3.11, "learning_rate": 9.617085249380969e-08, "loss": 0.1122, "step": 8940 }, { "epoch": 3.11, "learning_rate": 9.59055535903785e-08, "loss": 0.1563, "step": 8950 }, { "epoch": 3.11, "learning_rate": 9.564025468694729e-08, "loss": 0.2828, "step": 8960 }, { "epoch": 3.11, "learning_rate": 9.537495578351609e-08, "loss": 0.3116, "step": 8970 }, { "epoch": 3.11, "learning_rate": 9.510965688008489e-08, "loss": 0.1198, "step": 8980 }, { "epoch": 3.12, "learning_rate": 9.48443579766537e-08, "loss": 0.2329, "step": 8990 }, { "epoch": 3.12, "learning_rate": 9.45790590732225e-08, "loss": 0.2435, "step": 9000 }, { "epoch": 3.12, "learning_rate": 9.43137601697913e-08, "loss": 0.2814, "step": 9010 }, { "epoch": 3.12, "learning_rate": 9.404846126636009e-08, "loss": 0.4588, "step": 9020 }, { "epoch": 3.12, "learning_rate": 9.37831623629289e-08, "loss": 0.3259, "step": 9030 }, { "epoch": 3.12, "learning_rate": 9.35178634594977e-08, "loss": 0.2281, "step": 9040 }, { "epoch": 3.12, "learning_rate": 9.325256455606649e-08, "loss": 0.1325, "step": 9050 }, { "epoch": 3.12, "learning_rate": 9.29872656526353e-08, "loss": 0.1756, "step": 9060 }, { "epoch": 3.12, "learning_rate": 9.27219667492041e-08, "loss": 0.4611, "step": 9070 }, { "epoch": 3.12, "learning_rate": 9.24566678457729e-08, "loss": 0.3821, "step": 9080 }, { "epoch": 3.12, "learning_rate": 9.21913689423417e-08, "loss": 0.4035, "step": 9090 }, { "epoch": 3.12, "learning_rate": 9.192607003891051e-08, "loss": 0.2634, "step": 9100 }, { "epoch": 3.12, "learning_rate": 9.16607711354793e-08, "loss": 0.5165, "step": 9110 }, { "epoch": 3.13, "learning_rate": 9.13954722320481e-08, "loss": 0.2734, "step": 9120 }, { "epoch": 3.13, "learning_rate": 9.113017332861692e-08, "loss": 0.3244, "step": 9130 }, { "epoch": 3.13, "learning_rate": 9.08648744251857e-08, "loss": 0.5966, "step": 9140 }, { "epoch": 3.13, "learning_rate": 9.05995755217545e-08, "loss": 0.2808, "step": 9150 }, { "epoch": 3.13, "learning_rate": 9.033427661832331e-08, "loss": 0.0861, "step": 9160 }, { "epoch": 3.13, "learning_rate": 9.006897771489211e-08, "loss": 0.5532, "step": 9170 }, { "epoch": 3.13, "learning_rate": 8.980367881146091e-08, "loss": 0.1335, "step": 9180 }, { "epoch": 3.13, "learning_rate": 8.953837990802971e-08, "loss": 0.2124, "step": 9190 }, { "epoch": 3.13, "learning_rate": 8.92730810045985e-08, "loss": 0.3196, "step": 9200 }, { "epoch": 3.13, "learning_rate": 8.900778210116731e-08, "loss": 0.3537, "step": 9210 }, { "epoch": 3.13, "learning_rate": 8.874248319773612e-08, "loss": 0.2052, "step": 9220 }, { "epoch": 3.13, "learning_rate": 8.84771842943049e-08, "loss": 0.2914, "step": 9230 }, { "epoch": 3.14, "learning_rate": 8.821188539087372e-08, "loss": 0.201, "step": 9240 }, { "epoch": 3.14, "learning_rate": 8.794658648744252e-08, "loss": 0.5735, "step": 9250 }, { "epoch": 3.14, "learning_rate": 8.768128758401131e-08, "loss": 0.3856, "step": 9260 }, { "epoch": 3.14, "learning_rate": 8.741598868058011e-08, "loss": 0.3567, "step": 9270 }, { "epoch": 3.14, "learning_rate": 8.715068977714892e-08, "loss": 0.1276, "step": 9280 }, { "epoch": 3.14, "learning_rate": 8.688539087371771e-08, "loss": 0.2726, "step": 9290 }, { "epoch": 3.14, "learning_rate": 8.662009197028651e-08, "loss": 0.1824, "step": 9300 }, { "epoch": 3.14, "learning_rate": 8.635479306685533e-08, "loss": 0.2436, "step": 9310 }, { "epoch": 3.14, "learning_rate": 8.608949416342412e-08, "loss": 0.1757, "step": 9320 }, { "epoch": 3.14, "learning_rate": 8.582419525999292e-08, "loss": 0.115, "step": 9330 }, { "epoch": 3.14, "learning_rate": 8.555889635656172e-08, "loss": 0.3133, "step": 9340 }, { "epoch": 3.14, "learning_rate": 8.529359745313052e-08, "loss": 0.3231, "step": 9350 }, { "epoch": 3.14, "learning_rate": 8.502829854969932e-08, "loss": 0.3843, "step": 9360 }, { "epoch": 3.15, "learning_rate": 8.476299964626813e-08, "loss": 0.3218, "step": 9370 }, { "epoch": 3.15, "learning_rate": 8.449770074283691e-08, "loss": 0.2972, "step": 9380 }, { "epoch": 3.15, "learning_rate": 8.423240183940573e-08, "loss": 0.1626, "step": 9390 }, { "epoch": 3.15, "learning_rate": 8.396710293597453e-08, "loss": 0.3293, "step": 9400 }, { "epoch": 3.15, "learning_rate": 8.370180403254332e-08, "loss": 0.1546, "step": 9410 }, { "epoch": 3.15, "learning_rate": 8.343650512911213e-08, "loss": 0.2222, "step": 9420 }, { "epoch": 3.15, "learning_rate": 8.317120622568093e-08, "loss": 0.5815, "step": 9430 }, { "epoch": 3.15, "learning_rate": 8.290590732224972e-08, "loss": 0.4438, "step": 9440 }, { "epoch": 3.15, "learning_rate": 8.264060841881852e-08, "loss": 0.1896, "step": 9450 }, { "epoch": 3.15, "learning_rate": 8.237530951538734e-08, "loss": 0.5805, "step": 9460 }, { "epoch": 3.15, "learning_rate": 8.211001061195613e-08, "loss": 0.5619, "step": 9470 }, { "epoch": 3.15, "learning_rate": 8.184471170852493e-08, "loss": 0.256, "step": 9480 }, { "epoch": 3.16, "learning_rate": 8.157941280509374e-08, "loss": 0.1082, "step": 9490 }, { "epoch": 3.16, "learning_rate": 8.131411390166253e-08, "loss": 0.4024, "step": 9500 }, { "epoch": 3.16, "learning_rate": 8.104881499823133e-08, "loss": 0.4838, "step": 9510 }, { "epoch": 3.16, "learning_rate": 8.078351609480015e-08, "loss": 0.4411, "step": 9520 }, { "epoch": 3.16, "learning_rate": 8.051821719136894e-08, "loss": 0.4502, "step": 9530 }, { "epoch": 3.16, "learning_rate": 8.025291828793774e-08, "loss": 0.3072, "step": 9540 }, { "epoch": 3.16, "learning_rate": 7.998761938450654e-08, "loss": 0.0834, "step": 9550 }, { "epoch": 3.16, "learning_rate": 7.972232048107533e-08, "loss": 0.2351, "step": 9560 }, { "epoch": 3.16, "learning_rate": 7.945702157764414e-08, "loss": 0.293, "step": 9570 }, { "epoch": 3.16, "learning_rate": 7.919172267421294e-08, "loss": 0.4146, "step": 9580 }, { "epoch": 3.16, "learning_rate": 7.892642377078173e-08, "loss": 0.2291, "step": 9590 }, { "epoch": 3.16, "learning_rate": 7.866112486735055e-08, "loss": 0.1389, "step": 9600 }, { "epoch": 3.16, "learning_rate": 7.839582596391935e-08, "loss": 0.2594, "step": 9610 }, { "epoch": 3.17, "learning_rate": 7.813052706048814e-08, "loss": 0.5084, "step": 9620 }, { "epoch": 3.17, "learning_rate": 7.786522815705695e-08, "loss": 0.4067, "step": 9630 }, { "epoch": 3.17, "learning_rate": 7.759992925362575e-08, "loss": 0.3945, "step": 9640 }, { "epoch": 3.17, "learning_rate": 7.733463035019454e-08, "loss": 0.3778, "step": 9650 }, { "epoch": 3.17, "learning_rate": 7.706933144676334e-08, "loss": 0.2721, "step": 9660 }, { "epoch": 3.17, "learning_rate": 7.680403254333216e-08, "loss": 0.304, "step": 9670 }, { "epoch": 3.17, "learning_rate": 7.653873363990096e-08, "loss": 0.2496, "step": 9680 }, { "epoch": 3.17, "learning_rate": 7.627343473646975e-08, "loss": 0.3475, "step": 9690 }, { "epoch": 3.17, "learning_rate": 7.600813583303856e-08, "loss": 0.2106, "step": 9700 }, { "epoch": 3.17, "learning_rate": 7.574283692960736e-08, "loss": 0.4637, "step": 9710 }, { "epoch": 3.17, "learning_rate": 7.547753802617615e-08, "loss": 0.2955, "step": 9720 }, { "epoch": 3.17, "learning_rate": 7.521223912274495e-08, "loss": 0.4538, "step": 9730 }, { "epoch": 3.17, "learning_rate": 7.494694021931375e-08, "loss": 0.4784, "step": 9740 }, { "epoch": 3.18, "learning_rate": 7.468164131588256e-08, "loss": 0.2853, "step": 9750 }, { "epoch": 3.18, "learning_rate": 7.441634241245136e-08, "loss": 0.1619, "step": 9760 }, { "epoch": 3.18, "learning_rate": 7.415104350902016e-08, "loss": 0.4363, "step": 9770 }, { "epoch": 3.18, "learning_rate": 7.388574460558896e-08, "loss": 0.3628, "step": 9780 }, { "epoch": 3.18, "learning_rate": 7.362044570215776e-08, "loss": 0.5235, "step": 9790 }, { "epoch": 3.18, "learning_rate": 7.335514679872656e-08, "loss": 0.276, "step": 9800 }, { "epoch": 3.18, "learning_rate": 7.308984789529536e-08, "loss": 0.3532, "step": 9810 }, { "epoch": 3.18, "learning_rate": 7.282454899186417e-08, "loss": 0.3284, "step": 9820 }, { "epoch": 3.18, "learning_rate": 7.255925008843297e-08, "loss": 0.305, "step": 9830 }, { "epoch": 3.18, "learning_rate": 7.229395118500176e-08, "loss": 0.1895, "step": 9840 }, { "epoch": 3.18, "learning_rate": 7.202865228157057e-08, "loss": 0.6018, "step": 9850 }, { "epoch": 3.18, "learning_rate": 7.176335337813937e-08, "loss": 0.277, "step": 9860 }, { "epoch": 3.19, "learning_rate": 7.149805447470817e-08, "loss": 0.1618, "step": 9870 }, { "epoch": 3.19, "learning_rate": 7.123275557127698e-08, "loss": 0.1706, "step": 9880 }, { "epoch": 3.19, "learning_rate": 7.096745666784576e-08, "loss": 0.1546, "step": 9890 }, { "epoch": 3.19, "learning_rate": 7.070215776441458e-08, "loss": 0.2086, "step": 9900 }, { "epoch": 3.19, "learning_rate": 7.043685886098337e-08, "loss": 0.2052, "step": 9910 }, { "epoch": 3.19, "learning_rate": 7.017155995755217e-08, "loss": 0.2529, "step": 9920 }, { "epoch": 3.19, "learning_rate": 6.990626105412098e-08, "loss": 0.2568, "step": 9930 }, { "epoch": 3.19, "learning_rate": 6.964096215068977e-08, "loss": 0.5543, "step": 9940 }, { "epoch": 3.19, "learning_rate": 6.937566324725857e-08, "loss": 0.4673, "step": 9950 }, { "epoch": 3.19, "learning_rate": 6.911036434382737e-08, "loss": 0.2104, "step": 9960 }, { "epoch": 3.19, "learning_rate": 6.884506544039618e-08, "loss": 0.1235, "step": 9970 }, { "epoch": 3.19, "learning_rate": 6.857976653696498e-08, "loss": 0.3148, "step": 9980 }, { "epoch": 3.19, "learning_rate": 6.831446763353378e-08, "loss": 0.2482, "step": 9990 }, { "epoch": 3.2, "learning_rate": 6.804916873010258e-08, "loss": 0.2479, "step": 10000 }, { "epoch": 3.2, "learning_rate": 6.778386982667138e-08, "loss": 0.4603, "step": 10010 }, { "epoch": 3.2, "learning_rate": 6.751857092324018e-08, "loss": 0.181, "step": 10020 }, { "epoch": 3.2, "learning_rate": 6.725327201980898e-08, "loss": 0.0648, "step": 10030 }, { "epoch": 3.2, "learning_rate": 6.698797311637779e-08, "loss": 0.1411, "step": 10040 }, { "epoch": 3.2, "learning_rate": 6.672267421294659e-08, "loss": 0.3185, "step": 10050 }, { "epoch": 3.2, "eval_accuracy": 0.8200705605644845, "eval_loss": 0.4328227639198303, "eval_runtime": 675.1569, "eval_samples_per_second": 3.778, "eval_steps_per_second": 0.945, "step": 10056 }, { "epoch": 4.0, "learning_rate": 6.645737530951539e-08, "loss": 0.3281, "step": 10060 }, { "epoch": 4.0, "learning_rate": 6.619207640608418e-08, "loss": 0.1367, "step": 10070 }, { "epoch": 4.0, "learning_rate": 6.592677750265299e-08, "loss": 0.1347, "step": 10080 }, { "epoch": 4.0, "learning_rate": 6.566147859922178e-08, "loss": 0.2287, "step": 10090 }, { "epoch": 4.0, "learning_rate": 6.539617969579058e-08, "loss": 0.079, "step": 10100 }, { "epoch": 4.0, "learning_rate": 6.51308807923594e-08, "loss": 0.4218, "step": 10110 }, { "epoch": 4.01, "learning_rate": 6.486558188892818e-08, "loss": 0.1522, "step": 10120 }, { "epoch": 4.01, "learning_rate": 6.460028298549699e-08, "loss": 0.2411, "step": 10130 }, { "epoch": 4.01, "learning_rate": 6.433498408206579e-08, "loss": 0.2314, "step": 10140 }, { "epoch": 4.01, "learning_rate": 6.406968517863459e-08, "loss": 0.4938, "step": 10150 }, { "epoch": 4.01, "learning_rate": 6.380438627520339e-08, "loss": 0.1713, "step": 10160 }, { "epoch": 4.01, "learning_rate": 6.353908737177219e-08, "loss": 0.3614, "step": 10170 }, { "epoch": 4.01, "learning_rate": 6.3273788468341e-08, "loss": 0.2386, "step": 10180 }, { "epoch": 4.01, "learning_rate": 6.30084895649098e-08, "loss": 0.4071, "step": 10190 }, { "epoch": 4.01, "learning_rate": 6.27431906614786e-08, "loss": 0.3515, "step": 10200 }, { "epoch": 4.01, "learning_rate": 6.24778917580474e-08, "loss": 0.3182, "step": 10210 }, { "epoch": 4.01, "learning_rate": 6.22125928546162e-08, "loss": 0.395, "step": 10220 }, { "epoch": 4.01, "learning_rate": 6.1947293951185e-08, "loss": 0.0892, "step": 10230 }, { "epoch": 4.01, "learning_rate": 6.16819950477538e-08, "loss": 0.4639, "step": 10240 }, { "epoch": 4.02, "learning_rate": 6.141669614432259e-08, "loss": 0.2954, "step": 10250 }, { "epoch": 4.02, "learning_rate": 6.11513972408914e-08, "loss": 0.3138, "step": 10260 }, { "epoch": 4.02, "learning_rate": 6.088609833746021e-08, "loss": 0.5433, "step": 10270 }, { "epoch": 4.02, "learning_rate": 6.062079943402901e-08, "loss": 0.2787, "step": 10280 }, { "epoch": 4.02, "learning_rate": 6.035550053059781e-08, "loss": 0.3296, "step": 10290 }, { "epoch": 4.02, "learning_rate": 6.00902016271666e-08, "loss": 0.1484, "step": 10300 }, { "epoch": 4.02, "learning_rate": 5.982490272373541e-08, "loss": 0.2677, "step": 10310 }, { "epoch": 4.02, "learning_rate": 5.955960382030421e-08, "loss": 0.2695, "step": 10320 }, { "epoch": 4.02, "learning_rate": 5.9294304916873e-08, "loss": 0.3003, "step": 10330 }, { "epoch": 4.02, "learning_rate": 5.902900601344181e-08, "loss": 0.1844, "step": 10340 }, { "epoch": 4.02, "learning_rate": 5.876370711001061e-08, "loss": 0.3414, "step": 10350 }, { "epoch": 4.02, "learning_rate": 5.849840820657941e-08, "loss": 0.2815, "step": 10360 }, { "epoch": 4.02, "learning_rate": 5.8233109303148216e-08, "loss": 0.2292, "step": 10370 }, { "epoch": 4.03, "learning_rate": 5.796781039971701e-08, "loss": 0.3515, "step": 10380 }, { "epoch": 4.03, "learning_rate": 5.770251149628581e-08, "loss": 0.3803, "step": 10390 }, { "epoch": 4.03, "learning_rate": 5.7437212592854614e-08, "loss": 0.29, "step": 10400 }, { "epoch": 4.03, "learning_rate": 5.7171913689423415e-08, "loss": 0.3232, "step": 10410 }, { "epoch": 4.03, "learning_rate": 5.690661478599221e-08, "loss": 0.093, "step": 10420 }, { "epoch": 4.03, "learning_rate": 5.664131588256102e-08, "loss": 0.2088, "step": 10430 }, { "epoch": 4.03, "learning_rate": 5.637601697912982e-08, "loss": 0.487, "step": 10440 }, { "epoch": 4.03, "learning_rate": 5.6110718075698615e-08, "loss": 0.2051, "step": 10450 }, { "epoch": 4.03, "learning_rate": 5.584541917226742e-08, "loss": 0.5831, "step": 10460 }, { "epoch": 4.03, "learning_rate": 5.558012026883622e-08, "loss": 0.4019, "step": 10470 }, { "epoch": 4.03, "learning_rate": 5.531482136540502e-08, "loss": 0.3914, "step": 10480 }, { "epoch": 4.03, "learning_rate": 5.504952246197383e-08, "loss": 0.1919, "step": 10490 }, { "epoch": 4.04, "learning_rate": 5.478422355854262e-08, "loss": 0.2466, "step": 10500 }, { "epoch": 4.04, "learning_rate": 5.451892465511142e-08, "loss": 0.2837, "step": 10510 }, { "epoch": 4.04, "learning_rate": 5.4253625751680225e-08, "loss": 0.3293, "step": 10520 }, { "epoch": 4.04, "learning_rate": 5.3988326848249027e-08, "loss": 0.2261, "step": 10530 }, { "epoch": 4.04, "learning_rate": 5.372302794481782e-08, "loss": 0.4414, "step": 10540 }, { "epoch": 4.04, "learning_rate": 5.345772904138663e-08, "loss": 0.4747, "step": 10550 }, { "epoch": 4.04, "learning_rate": 5.3192430137955425e-08, "loss": 0.297, "step": 10560 }, { "epoch": 4.04, "learning_rate": 5.2927131234524226e-08, "loss": 0.4797, "step": 10570 }, { "epoch": 4.04, "learning_rate": 5.2661832331093034e-08, "loss": 0.0799, "step": 10580 }, { "epoch": 4.04, "learning_rate": 5.239653342766183e-08, "loss": 0.1772, "step": 10590 }, { "epoch": 4.04, "learning_rate": 5.213123452423063e-08, "loss": 0.204, "step": 10600 }, { "epoch": 4.04, "learning_rate": 5.186593562079943e-08, "loss": 0.2605, "step": 10610 }, { "epoch": 4.04, "learning_rate": 5.1600636717368234e-08, "loss": 0.2314, "step": 10620 }, { "epoch": 4.05, "learning_rate": 5.133533781393703e-08, "loss": 0.383, "step": 10630 }, { "epoch": 4.05, "learning_rate": 5.1070038910505837e-08, "loss": 0.3802, "step": 10640 }, { "epoch": 4.05, "learning_rate": 5.080474000707463e-08, "loss": 0.155, "step": 10650 }, { "epoch": 4.05, "learning_rate": 5.053944110364343e-08, "loss": 0.4906, "step": 10660 }, { "epoch": 4.05, "learning_rate": 5.027414220021224e-08, "loss": 0.3776, "step": 10670 }, { "epoch": 4.05, "learning_rate": 5.0008843296781036e-08, "loss": 0.5581, "step": 10680 }, { "epoch": 4.05, "learning_rate": 4.974354439334984e-08, "loss": 0.3237, "step": 10690 }, { "epoch": 4.05, "learning_rate": 4.947824548991864e-08, "loss": 0.3431, "step": 10700 }, { "epoch": 4.05, "learning_rate": 4.921294658648744e-08, "loss": 0.241, "step": 10710 }, { "epoch": 4.05, "learning_rate": 4.894764768305624e-08, "loss": 0.389, "step": 10720 }, { "epoch": 4.05, "learning_rate": 4.8682348779625043e-08, "loss": 0.4295, "step": 10730 }, { "epoch": 4.05, "learning_rate": 4.841704987619384e-08, "loss": 0.2456, "step": 10740 }, { "epoch": 4.06, "learning_rate": 4.8151750972762646e-08, "loss": 0.5222, "step": 10750 }, { "epoch": 4.06, "learning_rate": 4.788645206933145e-08, "loss": 0.3246, "step": 10760 }, { "epoch": 4.06, "learning_rate": 4.762115316590024e-08, "loss": 0.4138, "step": 10770 }, { "epoch": 4.06, "learning_rate": 4.735585426246905e-08, "loss": 0.2455, "step": 10780 }, { "epoch": 4.06, "learning_rate": 4.7090555359037846e-08, "loss": 0.1867, "step": 10790 }, { "epoch": 4.06, "learning_rate": 4.682525645560665e-08, "loss": 0.3166, "step": 10800 }, { "epoch": 4.06, "learning_rate": 4.6559957552175455e-08, "loss": 0.1877, "step": 10810 }, { "epoch": 4.06, "learning_rate": 4.629465864874425e-08, "loss": 0.4553, "step": 10820 }, { "epoch": 4.06, "learning_rate": 4.6029359745313045e-08, "loss": 0.1521, "step": 10830 }, { "epoch": 4.06, "learning_rate": 4.5764060841881853e-08, "loss": 0.2166, "step": 10840 }, { "epoch": 4.06, "learning_rate": 4.5498761938450655e-08, "loss": 0.1694, "step": 10850 }, { "epoch": 4.06, "learning_rate": 4.523346303501945e-08, "loss": 0.1315, "step": 10860 }, { "epoch": 4.06, "learning_rate": 4.496816413158826e-08, "loss": 0.4086, "step": 10870 }, { "epoch": 4.07, "learning_rate": 4.470286522815705e-08, "loss": 0.3522, "step": 10880 }, { "epoch": 4.07, "learning_rate": 4.4437566324725854e-08, "loss": 0.1301, "step": 10890 }, { "epoch": 4.07, "learning_rate": 4.417226742129466e-08, "loss": 0.1597, "step": 10900 }, { "epoch": 4.07, "learning_rate": 4.390696851786346e-08, "loss": 0.4818, "step": 10910 }, { "epoch": 4.07, "learning_rate": 4.364166961443226e-08, "loss": 0.2074, "step": 10920 }, { "epoch": 4.07, "learning_rate": 4.337637071100106e-08, "loss": 0.0484, "step": 10930 }, { "epoch": 4.07, "learning_rate": 4.311107180756986e-08, "loss": 0.1093, "step": 10940 }, { "epoch": 4.07, "learning_rate": 4.2845772904138657e-08, "loss": 0.3121, "step": 10950 }, { "epoch": 4.07, "learning_rate": 4.2580474000707465e-08, "loss": 0.1872, "step": 10960 }, { "epoch": 4.07, "learning_rate": 4.231517509727626e-08, "loss": 0.4645, "step": 10970 }, { "epoch": 4.07, "learning_rate": 4.204987619384506e-08, "loss": 0.2252, "step": 10980 }, { "epoch": 4.07, "learning_rate": 4.178457729041387e-08, "loss": 0.3651, "step": 10990 }, { "epoch": 4.08, "learning_rate": 4.1519278386982664e-08, "loss": 0.272, "step": 11000 }, { "epoch": 4.08, "learning_rate": 4.1253979483551466e-08, "loss": 0.408, "step": 11010 }, { "epoch": 4.08, "learning_rate": 4.098868058012027e-08, "loss": 0.2049, "step": 11020 }, { "epoch": 4.08, "learning_rate": 4.072338167668907e-08, "loss": 0.1285, "step": 11030 }, { "epoch": 4.08, "learning_rate": 4.0458082773257864e-08, "loss": 0.4743, "step": 11040 }, { "epoch": 4.08, "learning_rate": 4.019278386982667e-08, "loss": 0.2603, "step": 11050 }, { "epoch": 4.08, "learning_rate": 3.9927484966395467e-08, "loss": 0.5623, "step": 11060 }, { "epoch": 4.08, "learning_rate": 3.966218606296427e-08, "loss": 0.2478, "step": 11070 }, { "epoch": 4.08, "learning_rate": 3.9396887159533076e-08, "loss": 0.2938, "step": 11080 }, { "epoch": 4.08, "learning_rate": 3.913158825610187e-08, "loss": 0.3001, "step": 11090 }, { "epoch": 4.08, "learning_rate": 3.886628935267067e-08, "loss": 0.1743, "step": 11100 }, { "epoch": 4.08, "learning_rate": 3.8600990449239474e-08, "loss": 0.3494, "step": 11110 }, { "epoch": 4.08, "learning_rate": 3.8335691545808276e-08, "loss": 0.3701, "step": 11120 }, { "epoch": 4.09, "learning_rate": 3.8070392642377084e-08, "loss": 0.6493, "step": 11130 }, { "epoch": 4.09, "learning_rate": 3.780509373894588e-08, "loss": 0.2456, "step": 11140 }, { "epoch": 4.09, "learning_rate": 3.7539794835514673e-08, "loss": 0.1634, "step": 11150 }, { "epoch": 4.09, "learning_rate": 3.727449593208348e-08, "loss": 0.2219, "step": 11160 }, { "epoch": 4.09, "learning_rate": 3.700919702865228e-08, "loss": 0.2627, "step": 11170 }, { "epoch": 4.09, "learning_rate": 3.674389812522108e-08, "loss": 0.6152, "step": 11180 }, { "epoch": 4.09, "learning_rate": 3.647859922178988e-08, "loss": 0.273, "step": 11190 }, { "epoch": 4.09, "learning_rate": 3.621330031835868e-08, "loss": 0.2135, "step": 11200 }, { "epoch": 4.09, "learning_rate": 3.594800141492748e-08, "loss": 0.2077, "step": 11210 }, { "epoch": 4.09, "learning_rate": 3.5682702511496284e-08, "loss": 0.6408, "step": 11220 }, { "epoch": 4.09, "learning_rate": 3.5417403608065085e-08, "loss": 0.1691, "step": 11230 }, { "epoch": 4.09, "learning_rate": 3.515210470463389e-08, "loss": 0.4814, "step": 11240 }, { "epoch": 4.1, "learning_rate": 3.488680580120269e-08, "loss": 0.1419, "step": 11250 }, { "epoch": 4.1, "learning_rate": 3.462150689777149e-08, "loss": 0.3493, "step": 11260 }, { "epoch": 4.1, "learning_rate": 3.435620799434029e-08, "loss": 0.5488, "step": 11270 }, { "epoch": 4.1, "learning_rate": 3.4090909090909086e-08, "loss": 0.4774, "step": 11280 }, { "epoch": 4.1, "learning_rate": 3.382561018747789e-08, "loss": 0.3423, "step": 11290 }, { "epoch": 4.1, "learning_rate": 3.356031128404669e-08, "loss": 0.3831, "step": 11300 }, { "epoch": 4.1, "learning_rate": 3.329501238061549e-08, "loss": 0.3576, "step": 11310 }, { "epoch": 4.1, "learning_rate": 3.302971347718429e-08, "loss": 0.2333, "step": 11320 }, { "epoch": 4.1, "learning_rate": 3.2764414573753094e-08, "loss": 0.2804, "step": 11330 }, { "epoch": 4.1, "learning_rate": 3.2499115670321895e-08, "loss": 0.2403, "step": 11340 }, { "epoch": 4.1, "learning_rate": 3.22338167668907e-08, "loss": 0.2754, "step": 11350 }, { "epoch": 4.1, "learning_rate": 3.19685178634595e-08, "loss": 0.3769, "step": 11360 }, { "epoch": 4.1, "learning_rate": 3.170321896002829e-08, "loss": 0.2209, "step": 11370 }, { "epoch": 4.11, "learning_rate": 3.1437920056597095e-08, "loss": 0.338, "step": 11380 }, { "epoch": 4.11, "learning_rate": 3.11726211531659e-08, "loss": 0.2673, "step": 11390 }, { "epoch": 4.11, "learning_rate": 3.09073222497347e-08, "loss": 0.2406, "step": 11400 }, { "epoch": 4.11, "learning_rate": 3.06420233463035e-08, "loss": 0.3665, "step": 11410 }, { "epoch": 4.11, "learning_rate": 3.03767244428723e-08, "loss": 0.1279, "step": 11420 }, { "epoch": 4.11, "learning_rate": 3.01114255394411e-08, "loss": 0.1948, "step": 11430 }, { "epoch": 4.11, "learning_rate": 2.9846126636009904e-08, "loss": 0.3244, "step": 11440 }, { "epoch": 4.11, "learning_rate": 2.9580827732578705e-08, "loss": 0.4653, "step": 11450 }, { "epoch": 4.11, "learning_rate": 2.9315528829147507e-08, "loss": 0.6083, "step": 11460 }, { "epoch": 4.11, "learning_rate": 2.9050229925716305e-08, "loss": 0.3703, "step": 11470 }, { "epoch": 4.11, "learning_rate": 2.8784931022285106e-08, "loss": 0.4843, "step": 11480 }, { "epoch": 4.11, "learning_rate": 2.8519632118853908e-08, "loss": 0.5002, "step": 11490 }, { "epoch": 4.11, "learning_rate": 2.8254333215422706e-08, "loss": 0.1321, "step": 11500 }, { "epoch": 4.12, "learning_rate": 2.7989034311991508e-08, "loss": 0.2233, "step": 11510 }, { "epoch": 4.12, "learning_rate": 2.7723735408560312e-08, "loss": 0.3006, "step": 11520 }, { "epoch": 4.12, "learning_rate": 2.745843650512911e-08, "loss": 0.5109, "step": 11530 }, { "epoch": 4.12, "learning_rate": 2.7193137601697912e-08, "loss": 0.5902, "step": 11540 }, { "epoch": 4.12, "learning_rate": 2.6927838698266714e-08, "loss": 0.4463, "step": 11550 }, { "epoch": 4.12, "learning_rate": 2.6662539794835512e-08, "loss": 0.1674, "step": 11560 }, { "epoch": 4.12, "learning_rate": 2.6397240891404313e-08, "loss": 0.3116, "step": 11570 }, { "epoch": 4.12, "learning_rate": 2.6131941987973115e-08, "loss": 0.5825, "step": 11580 }, { "epoch": 4.12, "learning_rate": 2.5866643084541913e-08, "loss": 0.2447, "step": 11590 }, { "epoch": 4.12, "learning_rate": 2.5601344181110718e-08, "loss": 0.5195, "step": 11600 }, { "epoch": 4.12, "learning_rate": 2.533604527767952e-08, "loss": 0.2391, "step": 11610 }, { "epoch": 4.12, "learning_rate": 2.5070746374248318e-08, "loss": 0.2846, "step": 11620 }, { "epoch": 4.13, "learning_rate": 2.480544747081712e-08, "loss": 0.1091, "step": 11630 }, { "epoch": 4.13, "learning_rate": 2.454014856738592e-08, "loss": 0.2265, "step": 11640 }, { "epoch": 4.13, "learning_rate": 2.4274849663954722e-08, "loss": 0.1529, "step": 11650 }, { "epoch": 4.13, "learning_rate": 2.400955076052352e-08, "loss": 0.1775, "step": 11660 }, { "epoch": 4.13, "learning_rate": 2.3744251857092322e-08, "loss": 0.2777, "step": 11670 }, { "epoch": 4.13, "learning_rate": 2.3478952953661127e-08, "loss": 0.1498, "step": 11680 }, { "epoch": 4.13, "learning_rate": 2.3213654050229925e-08, "loss": 0.0952, "step": 11690 }, { "epoch": 4.13, "learning_rate": 2.2948355146798726e-08, "loss": 0.2005, "step": 11700 }, { "epoch": 4.13, "learning_rate": 2.2683056243367528e-08, "loss": 0.4254, "step": 11710 }, { "epoch": 4.13, "learning_rate": 2.2417757339936326e-08, "loss": 0.5729, "step": 11720 }, { "epoch": 4.13, "learning_rate": 2.2152458436505127e-08, "loss": 0.2423, "step": 11730 }, { "epoch": 4.13, "learning_rate": 2.188715953307393e-08, "loss": 0.4267, "step": 11740 }, { "epoch": 4.13, "learning_rate": 2.1621860629642727e-08, "loss": 0.2208, "step": 11750 }, { "epoch": 4.14, "learning_rate": 2.1356561726211532e-08, "loss": 0.2322, "step": 11760 }, { "epoch": 4.14, "learning_rate": 2.1091262822780334e-08, "loss": 0.2109, "step": 11770 }, { "epoch": 4.14, "learning_rate": 2.0825963919349132e-08, "loss": 0.3221, "step": 11780 }, { "epoch": 4.14, "learning_rate": 2.0560665015917933e-08, "loss": 0.2768, "step": 11790 }, { "epoch": 4.14, "learning_rate": 2.0295366112486735e-08, "loss": 0.161, "step": 11800 }, { "epoch": 4.14, "learning_rate": 2.0030067209055533e-08, "loss": 0.4017, "step": 11810 }, { "epoch": 4.14, "learning_rate": 1.9764768305624334e-08, "loss": 0.4653, "step": 11820 }, { "epoch": 4.14, "learning_rate": 1.9499469402193136e-08, "loss": 0.3184, "step": 11830 }, { "epoch": 4.14, "learning_rate": 1.923417049876194e-08, "loss": 0.2338, "step": 11840 }, { "epoch": 4.14, "learning_rate": 1.896887159533074e-08, "loss": 0.5184, "step": 11850 }, { "epoch": 4.14, "learning_rate": 1.870357269189954e-08, "loss": 0.3949, "step": 11860 }, { "epoch": 4.14, "learning_rate": 1.843827378846834e-08, "loss": 0.2224, "step": 11870 }, { "epoch": 4.15, "learning_rate": 1.817297488503714e-08, "loss": 0.2273, "step": 11880 }, { "epoch": 4.15, "learning_rate": 1.790767598160594e-08, "loss": 0.4915, "step": 11890 }, { "epoch": 4.15, "learning_rate": 1.7642377078174743e-08, "loss": 0.2633, "step": 11900 }, { "epoch": 4.15, "learning_rate": 1.7377078174743545e-08, "loss": 0.4765, "step": 11910 }, { "epoch": 4.15, "learning_rate": 1.7111779271312346e-08, "loss": 0.3887, "step": 11920 }, { "epoch": 4.15, "learning_rate": 1.6846480367881144e-08, "loss": 0.4244, "step": 11930 }, { "epoch": 4.15, "learning_rate": 1.6581181464449946e-08, "loss": 0.3747, "step": 11940 }, { "epoch": 4.15, "learning_rate": 1.6315882561018747e-08, "loss": 0.2129, "step": 11950 }, { "epoch": 4.15, "learning_rate": 1.6050583657587546e-08, "loss": 0.2404, "step": 11960 }, { "epoch": 4.15, "learning_rate": 1.578528475415635e-08, "loss": 0.3692, "step": 11970 }, { "epoch": 4.15, "learning_rate": 1.551998585072515e-08, "loss": 0.3378, "step": 11980 }, { "epoch": 4.15, "learning_rate": 1.525468694729395e-08, "loss": 0.4417, "step": 11990 }, { "epoch": 4.15, "learning_rate": 1.498938804386275e-08, "loss": 0.3159, "step": 12000 }, { "epoch": 4.16, "learning_rate": 1.4724089140431551e-08, "loss": 0.4654, "step": 12010 }, { "epoch": 4.16, "learning_rate": 1.4458790237000355e-08, "loss": 0.2322, "step": 12020 }, { "epoch": 4.16, "learning_rate": 1.4193491333569154e-08, "loss": 0.2092, "step": 12030 }, { "epoch": 4.16, "learning_rate": 1.3928192430137954e-08, "loss": 0.2385, "step": 12040 }, { "epoch": 4.16, "learning_rate": 1.3662893526706756e-08, "loss": 0.3575, "step": 12050 }, { "epoch": 4.16, "learning_rate": 1.3397594623275557e-08, "loss": 0.2266, "step": 12060 }, { "epoch": 4.16, "learning_rate": 1.3132295719844357e-08, "loss": 0.234, "step": 12070 }, { "epoch": 4.16, "learning_rate": 1.2866996816413159e-08, "loss": 0.2282, "step": 12080 }, { "epoch": 4.16, "learning_rate": 1.2601697912981958e-08, "loss": 0.2577, "step": 12090 }, { "epoch": 4.16, "learning_rate": 1.2336399009550762e-08, "loss": 0.3597, "step": 12100 }, { "epoch": 4.16, "learning_rate": 1.2071100106119561e-08, "loss": 0.4429, "step": 12110 }, { "epoch": 4.16, "learning_rate": 1.1805801202688361e-08, "loss": 0.1702, "step": 12120 }, { "epoch": 4.17, "learning_rate": 1.1540502299257163e-08, "loss": 0.2193, "step": 12130 }, { "epoch": 4.17, "learning_rate": 1.1275203395825964e-08, "loss": 0.4391, "step": 12140 }, { "epoch": 4.17, "learning_rate": 1.1009904492394764e-08, "loss": 0.2284, "step": 12150 }, { "epoch": 4.17, "learning_rate": 1.0744605588963566e-08, "loss": 0.489, "step": 12160 }, { "epoch": 4.17, "learning_rate": 1.0479306685532365e-08, "loss": 0.1444, "step": 12170 }, { "epoch": 4.17, "learning_rate": 1.0214007782101165e-08, "loss": 0.2967, "step": 12180 }, { "epoch": 4.17, "learning_rate": 9.948708878669968e-09, "loss": 0.3594, "step": 12190 }, { "epoch": 4.17, "learning_rate": 9.683409975238768e-09, "loss": 0.5359, "step": 12200 }, { "epoch": 4.17, "learning_rate": 9.41811107180757e-09, "loss": 0.2757, "step": 12210 }, { "epoch": 4.17, "learning_rate": 9.152812168376371e-09, "loss": 0.3935, "step": 12220 }, { "epoch": 4.17, "learning_rate": 8.887513264945171e-09, "loss": 0.14, "step": 12230 }, { "epoch": 4.17, "learning_rate": 8.622214361513971e-09, "loss": 0.4049, "step": 12240 }, { "epoch": 4.17, "learning_rate": 8.356915458082773e-09, "loss": 0.2653, "step": 12250 }, { "epoch": 4.18, "learning_rate": 8.091616554651574e-09, "loss": 0.1909, "step": 12260 }, { "epoch": 4.18, "learning_rate": 7.826317651220374e-09, "loss": 0.2591, "step": 12270 }, { "epoch": 4.18, "learning_rate": 7.561018747789175e-09, "loss": 0.1013, "step": 12280 }, { "epoch": 4.18, "learning_rate": 7.295719844357976e-09, "loss": 0.1527, "step": 12290 }, { "epoch": 4.18, "learning_rate": 7.0304209409267776e-09, "loss": 0.1982, "step": 12300 }, { "epoch": 4.18, "learning_rate": 6.765122037495577e-09, "loss": 0.3196, "step": 12310 }, { "epoch": 4.18, "learning_rate": 6.499823134064379e-09, "loss": 0.2233, "step": 12320 }, { "epoch": 4.18, "learning_rate": 6.2345242306331796e-09, "loss": 0.2557, "step": 12330 }, { "epoch": 4.18, "learning_rate": 5.969225327201981e-09, "loss": 0.1804, "step": 12340 }, { "epoch": 4.18, "learning_rate": 5.703926423770781e-09, "loss": 0.2618, "step": 12350 }, { "epoch": 4.18, "learning_rate": 5.4386275203395824e-09, "loss": 0.4291, "step": 12360 }, { "epoch": 4.18, "learning_rate": 5.173328616908383e-09, "loss": 0.2799, "step": 12370 }, { "epoch": 4.18, "learning_rate": 4.908029713477184e-09, "loss": 0.1811, "step": 12380 }, { "epoch": 4.19, "learning_rate": 4.6427308100459845e-09, "loss": 0.2275, "step": 12390 }, { "epoch": 4.19, "learning_rate": 4.377431906614786e-09, "loss": 0.4325, "step": 12400 }, { "epoch": 4.19, "learning_rate": 4.112133003183587e-09, "loss": 0.22, "step": 12410 }, { "epoch": 4.19, "learning_rate": 3.846834099752387e-09, "loss": 0.22, "step": 12420 }, { "epoch": 4.19, "learning_rate": 3.5815351963211884e-09, "loss": 0.199, "step": 12430 }, { "epoch": 4.19, "learning_rate": 3.3162362928899895e-09, "loss": 0.4157, "step": 12440 }, { "epoch": 4.19, "learning_rate": 3.05093738945879e-09, "loss": 0.4612, "step": 12450 }, { "epoch": 4.19, "learning_rate": 2.7856384860275913e-09, "loss": 0.2144, "step": 12460 }, { "epoch": 4.19, "learning_rate": 2.520339582596392e-09, "loss": 0.3051, "step": 12470 }, { "epoch": 4.19, "learning_rate": 2.2550406791651926e-09, "loss": 0.4694, "step": 12480 }, { "epoch": 4.19, "learning_rate": 1.9897417757339937e-09, "loss": 0.5988, "step": 12490 }, { "epoch": 4.19, "learning_rate": 1.7244428723027944e-09, "loss": 0.1659, "step": 12500 }, { "epoch": 4.2, "learning_rate": 1.4591439688715953e-09, "loss": 0.4332, "step": 12510 }, { "epoch": 4.2, "learning_rate": 1.1938450654403961e-09, "loss": 0.2026, "step": 12520 }, { "epoch": 4.2, "learning_rate": 9.28546162009197e-10, "loss": 0.3017, "step": 12530 }, { "epoch": 4.2, "learning_rate": 6.632472585779978e-10, "loss": 0.2656, "step": 12540 }, { "epoch": 4.2, "learning_rate": 3.979483551467987e-10, "loss": 0.5015, "step": 12550 }, { "epoch": 4.2, "learning_rate": 1.3264945171559958e-10, "loss": 0.4006, "step": 12560 }, { "epoch": 4.2, "eval_accuracy": 0.8251666013328106, "eval_loss": 0.44296565651893616, "eval_runtime": 623.4351, "eval_samples_per_second": 4.092, "eval_steps_per_second": 1.023, "step": 12565 }, { "epoch": 4.2, "step": 12565, "total_flos": 2.2067541850155437e+20, "train_loss": 0.39015588782197014, "train_runtime": 24412.2348, "train_samples_per_second": 2.059, "train_steps_per_second": 0.515 }, { "epoch": 4.2, "eval_accuracy": 0.8251666013328106, "eval_loss": 0.44296565651893616, "eval_runtime": 654.7979, "eval_samples_per_second": 3.896, "eval_steps_per_second": 0.974, "step": 12565 }, { "epoch": 4.2, "eval_accuracy": 0.8251666013328106, "eval_loss": 0.44296565651893616, "eval_runtime": 750.9501, "eval_samples_per_second": 3.397, "eval_steps_per_second": 0.85, "step": 12565 } ], "logging_steps": 10, "max_steps": 12565, "num_train_epochs": 9223372036854775807, "save_steps": 500, "total_flos": 2.2067541850155437e+20, "trial_name": null, "trial_params": null }