diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,7627 @@ +{ + "best_metric": 0.8251666013328106, + "best_model_checkpoint": "videomae-large-cctv-brawl_extended_v1/checkpoint-12565", + "epoch": 4.199681655391962, + "eval_steps": 500, + "global_step": 12565, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 2.386634844868735e-09, + "loss": 0.7489, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 4.77326968973747e-09, + "loss": 0.724, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 7.159904534606205e-09, + "loss": 0.7649, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 9.54653937947494e-09, + "loss": 0.6728, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 1.1933174224343675e-08, + "loss": 0.7368, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 1.431980906921241e-08, + "loss": 0.7369, + "step": 60 + }, + { + "epoch": 0.01, + "learning_rate": 1.6706443914081144e-08, + "loss": 0.7463, + "step": 70 + }, + { + "epoch": 0.01, + "learning_rate": 1.909307875894988e-08, + "loss": 0.6903, + "step": 80 + }, + { + "epoch": 0.01, + "learning_rate": 2.1479713603818616e-08, + "loss": 0.725, + "step": 90 + }, + { + "epoch": 0.01, + "learning_rate": 2.386634844868735e-08, + "loss": 0.7142, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 2.6252983293556084e-08, + "loss": 0.7131, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 2.863961813842482e-08, + "loss": 0.7178, + "step": 120 + }, + { + "epoch": 0.01, + "learning_rate": 3.102625298329355e-08, + "loss": 0.6786, + "step": 130 + }, + { + "epoch": 0.01, + "learning_rate": 3.341288782816229e-08, + "loss": 0.7279, + "step": 140 + }, + { + "epoch": 0.01, + "learning_rate": 3.579952267303102e-08, + "loss": 0.6817, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 3.818615751789976e-08, + "loss": 0.6895, + "step": 160 + }, + { + "epoch": 0.01, + "learning_rate": 4.05727923627685e-08, + "loss": 0.6714, + "step": 170 + }, + { + "epoch": 0.01, + "learning_rate": 4.295942720763723e-08, + "loss": 0.7969, + "step": 180 + }, + { + "epoch": 0.02, + "learning_rate": 4.5346062052505965e-08, + "loss": 0.6794, + "step": 190 + }, + { + "epoch": 0.02, + "learning_rate": 4.77326968973747e-08, + "loss": 0.7054, + "step": 200 + }, + { + "epoch": 0.02, + "learning_rate": 5.0119331742243434e-08, + "loss": 0.7599, + "step": 210 + }, + { + "epoch": 0.02, + "learning_rate": 5.250596658711217e-08, + "loss": 0.7148, + "step": 220 + }, + { + "epoch": 0.02, + "learning_rate": 5.48926014319809e-08, + "loss": 0.7105, + "step": 230 + }, + { + "epoch": 0.02, + "learning_rate": 5.727923627684964e-08, + "loss": 0.7208, + "step": 240 + }, + { + "epoch": 0.02, + "learning_rate": 5.966587112171838e-08, + "loss": 0.6222, + "step": 250 + }, + { + "epoch": 0.02, + "learning_rate": 6.20525059665871e-08, + "loss": 0.6742, + "step": 260 + }, + { + "epoch": 0.02, + "learning_rate": 6.443914081145585e-08, + "loss": 0.661, + "step": 270 + }, + { + "epoch": 0.02, + "learning_rate": 6.682577565632457e-08, + "loss": 0.722, + "step": 280 + }, + { + "epoch": 0.02, + "learning_rate": 6.921241050119332e-08, + "loss": 0.7342, + "step": 290 + }, + { + "epoch": 0.02, + "learning_rate": 7.159904534606204e-08, + "loss": 0.6948, + "step": 300 + }, + { + "epoch": 0.02, + "learning_rate": 7.398568019093078e-08, + "loss": 0.7301, + "step": 310 + }, + { + "epoch": 0.03, + "learning_rate": 7.637231503579952e-08, + "loss": 0.6596, + "step": 320 + }, + { + "epoch": 0.03, + "learning_rate": 7.875894988066825e-08, + "loss": 0.6857, + "step": 330 + }, + { + "epoch": 0.03, + "learning_rate": 8.1145584725537e-08, + "loss": 0.7129, + "step": 340 + }, + { + "epoch": 0.03, + "learning_rate": 8.353221957040572e-08, + "loss": 0.6889, + "step": 350 + }, + { + "epoch": 0.03, + "learning_rate": 8.591885441527446e-08, + "loss": 0.6673, + "step": 360 + }, + { + "epoch": 0.03, + "learning_rate": 8.830548926014319e-08, + "loss": 0.6823, + "step": 370 + }, + { + "epoch": 0.03, + "learning_rate": 9.069212410501193e-08, + "loss": 0.6655, + "step": 380 + }, + { + "epoch": 0.03, + "learning_rate": 9.307875894988066e-08, + "loss": 0.6166, + "step": 390 + }, + { + "epoch": 0.03, + "learning_rate": 9.54653937947494e-08, + "loss": 0.6371, + "step": 400 + }, + { + "epoch": 0.03, + "learning_rate": 9.785202863961813e-08, + "loss": 0.704, + "step": 410 + }, + { + "epoch": 0.03, + "learning_rate": 1.0023866348448687e-07, + "loss": 0.7303, + "step": 420 + }, + { + "epoch": 0.03, + "learning_rate": 1.0262529832935561e-07, + "loss": 0.709, + "step": 430 + }, + { + "epoch": 0.04, + "learning_rate": 1.0501193317422434e-07, + "loss": 0.6405, + "step": 440 + }, + { + "epoch": 0.04, + "learning_rate": 1.0739856801909308e-07, + "loss": 0.6122, + "step": 450 + }, + { + "epoch": 0.04, + "learning_rate": 1.097852028639618e-07, + "loss": 0.6458, + "step": 460 + }, + { + "epoch": 0.04, + "learning_rate": 1.1217183770883055e-07, + "loss": 0.6574, + "step": 470 + }, + { + "epoch": 0.04, + "learning_rate": 1.1455847255369927e-07, + "loss": 0.6348, + "step": 480 + }, + { + "epoch": 0.04, + "learning_rate": 1.1694510739856802e-07, + "loss": 0.6998, + "step": 490 + }, + { + "epoch": 0.04, + "learning_rate": 1.1933174224343676e-07, + "loss": 0.6245, + "step": 500 + }, + { + "epoch": 0.04, + "learning_rate": 1.2171837708830548e-07, + "loss": 0.6499, + "step": 510 + }, + { + "epoch": 0.04, + "learning_rate": 1.241050119331742e-07, + "loss": 0.5856, + "step": 520 + }, + { + "epoch": 0.04, + "learning_rate": 1.2649164677804294e-07, + "loss": 0.7143, + "step": 530 + }, + { + "epoch": 0.04, + "learning_rate": 1.288782816229117e-07, + "loss": 0.6285, + "step": 540 + }, + { + "epoch": 0.04, + "learning_rate": 1.3126491646778042e-07, + "loss": 0.6485, + "step": 550 + }, + { + "epoch": 0.04, + "learning_rate": 1.3365155131264915e-07, + "loss": 0.6322, + "step": 560 + }, + { + "epoch": 0.05, + "learning_rate": 1.3603818615751788e-07, + "loss": 0.6228, + "step": 570 + }, + { + "epoch": 0.05, + "learning_rate": 1.3842482100238663e-07, + "loss": 0.6469, + "step": 580 + }, + { + "epoch": 0.05, + "learning_rate": 1.4081145584725536e-07, + "loss": 0.6374, + "step": 590 + }, + { + "epoch": 0.05, + "learning_rate": 1.4319809069212409e-07, + "loss": 0.6418, + "step": 600 + }, + { + "epoch": 0.05, + "learning_rate": 1.4558472553699284e-07, + "loss": 0.6212, + "step": 610 + }, + { + "epoch": 0.05, + "learning_rate": 1.4797136038186157e-07, + "loss": 0.66, + "step": 620 + }, + { + "epoch": 0.05, + "learning_rate": 1.503579952267303e-07, + "loss": 0.5956, + "step": 630 + }, + { + "epoch": 0.05, + "learning_rate": 1.5274463007159905e-07, + "loss": 0.6426, + "step": 640 + }, + { + "epoch": 0.05, + "learning_rate": 1.5513126491646775e-07, + "loss": 0.6602, + "step": 650 + }, + { + "epoch": 0.05, + "learning_rate": 1.575178997613365e-07, + "loss": 0.633, + "step": 660 + }, + { + "epoch": 0.05, + "learning_rate": 1.5990453460620523e-07, + "loss": 0.6016, + "step": 670 + }, + { + "epoch": 0.05, + "learning_rate": 1.62291169451074e-07, + "loss": 0.6235, + "step": 680 + }, + { + "epoch": 0.05, + "learning_rate": 1.646778042959427e-07, + "loss": 0.579, + "step": 690 + }, + { + "epoch": 0.06, + "learning_rate": 1.6706443914081144e-07, + "loss": 0.5734, + "step": 700 + }, + { + "epoch": 0.06, + "learning_rate": 1.6945107398568017e-07, + "loss": 0.5904, + "step": 710 + }, + { + "epoch": 0.06, + "learning_rate": 1.7183770883054892e-07, + "loss": 0.5944, + "step": 720 + }, + { + "epoch": 0.06, + "learning_rate": 1.7422434367541765e-07, + "loss": 0.6764, + "step": 730 + }, + { + "epoch": 0.06, + "learning_rate": 1.7661097852028638e-07, + "loss": 0.5815, + "step": 740 + }, + { + "epoch": 0.06, + "learning_rate": 1.789976133651551e-07, + "loss": 0.5883, + "step": 750 + }, + { + "epoch": 0.06, + "learning_rate": 1.8138424821002386e-07, + "loss": 0.6033, + "step": 760 + }, + { + "epoch": 0.06, + "learning_rate": 1.837708830548926e-07, + "loss": 0.5302, + "step": 770 + }, + { + "epoch": 0.06, + "learning_rate": 1.8615751789976132e-07, + "loss": 0.5989, + "step": 780 + }, + { + "epoch": 0.06, + "learning_rate": 1.8854415274463004e-07, + "loss": 0.5534, + "step": 790 + }, + { + "epoch": 0.06, + "learning_rate": 1.909307875894988e-07, + "loss": 0.6403, + "step": 800 + }, + { + "epoch": 0.06, + "learning_rate": 1.9331742243436753e-07, + "loss": 0.4963, + "step": 810 + }, + { + "epoch": 0.07, + "learning_rate": 1.9570405727923625e-07, + "loss": 0.653, + "step": 820 + }, + { + "epoch": 0.07, + "learning_rate": 1.98090692124105e-07, + "loss": 0.606, + "step": 830 + }, + { + "epoch": 0.07, + "learning_rate": 2.0047732696897374e-07, + "loss": 0.6209, + "step": 840 + }, + { + "epoch": 0.07, + "learning_rate": 2.028639618138425e-07, + "loss": 0.5385, + "step": 850 + }, + { + "epoch": 0.07, + "learning_rate": 2.0525059665871122e-07, + "loss": 0.557, + "step": 860 + }, + { + "epoch": 0.07, + "learning_rate": 2.0763723150357995e-07, + "loss": 0.6703, + "step": 870 + }, + { + "epoch": 0.07, + "learning_rate": 2.1002386634844867e-07, + "loss": 0.5217, + "step": 880 + }, + { + "epoch": 0.07, + "learning_rate": 2.1241050119331743e-07, + "loss": 0.5697, + "step": 890 + }, + { + "epoch": 0.07, + "learning_rate": 2.1479713603818616e-07, + "loss": 0.5365, + "step": 900 + }, + { + "epoch": 0.07, + "learning_rate": 2.1718377088305488e-07, + "loss": 0.56, + "step": 910 + }, + { + "epoch": 0.07, + "learning_rate": 2.195704057279236e-07, + "loss": 0.4775, + "step": 920 + }, + { + "epoch": 0.07, + "learning_rate": 2.2195704057279237e-07, + "loss": 0.5328, + "step": 930 + }, + { + "epoch": 0.07, + "learning_rate": 2.243436754176611e-07, + "loss": 0.6135, + "step": 940 + }, + { + "epoch": 0.08, + "learning_rate": 2.2673031026252982e-07, + "loss": 0.5732, + "step": 950 + }, + { + "epoch": 0.08, + "learning_rate": 2.2911694510739855e-07, + "loss": 0.5636, + "step": 960 + }, + { + "epoch": 0.08, + "learning_rate": 2.315035799522673e-07, + "loss": 0.6081, + "step": 970 + }, + { + "epoch": 0.08, + "learning_rate": 2.3389021479713603e-07, + "loss": 0.619, + "step": 980 + }, + { + "epoch": 0.08, + "learning_rate": 2.3627684964200478e-07, + "loss": 0.5916, + "step": 990 + }, + { + "epoch": 0.08, + "learning_rate": 2.386634844868735e-07, + "loss": 0.6113, + "step": 1000 + }, + { + "epoch": 0.08, + "learning_rate": 2.4105011933174227e-07, + "loss": 0.6695, + "step": 1010 + }, + { + "epoch": 0.08, + "learning_rate": 2.4343675417661097e-07, + "loss": 0.559, + "step": 1020 + }, + { + "epoch": 0.08, + "learning_rate": 2.458233890214797e-07, + "loss": 0.5641, + "step": 1030 + }, + { + "epoch": 0.08, + "learning_rate": 2.482100238663484e-07, + "loss": 0.5845, + "step": 1040 + }, + { + "epoch": 0.08, + "learning_rate": 2.505966587112172e-07, + "loss": 0.6465, + "step": 1050 + }, + { + "epoch": 0.08, + "learning_rate": 2.529832935560859e-07, + "loss": 0.5213, + "step": 1060 + }, + { + "epoch": 0.09, + "learning_rate": 2.5536992840095463e-07, + "loss": 0.5663, + "step": 1070 + }, + { + "epoch": 0.09, + "learning_rate": 2.577565632458234e-07, + "loss": 0.6348, + "step": 1080 + }, + { + "epoch": 0.09, + "learning_rate": 2.6014319809069214e-07, + "loss": 0.5389, + "step": 1090 + }, + { + "epoch": 0.09, + "learning_rate": 2.6252983293556084e-07, + "loss": 0.6299, + "step": 1100 + }, + { + "epoch": 0.09, + "learning_rate": 2.649164677804296e-07, + "loss": 0.5185, + "step": 1110 + }, + { + "epoch": 0.09, + "learning_rate": 2.673031026252983e-07, + "loss": 0.4967, + "step": 1120 + }, + { + "epoch": 0.09, + "learning_rate": 2.6968973747016705e-07, + "loss": 0.5323, + "step": 1130 + }, + { + "epoch": 0.09, + "learning_rate": 2.7207637231503575e-07, + "loss": 0.4797, + "step": 1140 + }, + { + "epoch": 0.09, + "learning_rate": 2.744630071599045e-07, + "loss": 0.591, + "step": 1150 + }, + { + "epoch": 0.09, + "learning_rate": 2.7684964200477326e-07, + "loss": 0.488, + "step": 1160 + }, + { + "epoch": 0.09, + "learning_rate": 2.79236276849642e-07, + "loss": 0.4519, + "step": 1170 + }, + { + "epoch": 0.09, + "learning_rate": 2.816229116945107e-07, + "loss": 0.5839, + "step": 1180 + }, + { + "epoch": 0.09, + "learning_rate": 2.8400954653937947e-07, + "loss": 0.5682, + "step": 1190 + }, + { + "epoch": 0.1, + "learning_rate": 2.8639618138424817e-07, + "loss": 0.4765, + "step": 1200 + }, + { + "epoch": 0.1, + "learning_rate": 2.8878281622911693e-07, + "loss": 0.6187, + "step": 1210 + }, + { + "epoch": 0.1, + "learning_rate": 2.911694510739857e-07, + "loss": 0.4894, + "step": 1220 + }, + { + "epoch": 0.1, + "learning_rate": 2.935560859188544e-07, + "loss": 0.5424, + "step": 1230 + }, + { + "epoch": 0.1, + "learning_rate": 2.9594272076372314e-07, + "loss": 0.5351, + "step": 1240 + }, + { + "epoch": 0.1, + "learning_rate": 2.983293556085919e-07, + "loss": 0.5059, + "step": 1250 + }, + { + "epoch": 0.1, + "learning_rate": 2.999204103289706e-07, + "loss": 0.5178, + "step": 1260 + }, + { + "epoch": 0.1, + "learning_rate": 2.996551114255394e-07, + "loss": 0.3589, + "step": 1270 + }, + { + "epoch": 0.1, + "learning_rate": 2.9938981252210824e-07, + "loss": 0.5448, + "step": 1280 + }, + { + "epoch": 0.1, + "learning_rate": 2.9912451361867705e-07, + "loss": 0.4891, + "step": 1290 + }, + { + "epoch": 0.1, + "learning_rate": 2.988592147152458e-07, + "loss": 0.5095, + "step": 1300 + }, + { + "epoch": 0.1, + "learning_rate": 2.9859391581181463e-07, + "loss": 0.5336, + "step": 1310 + }, + { + "epoch": 0.11, + "learning_rate": 2.9832861690838345e-07, + "loss": 0.5037, + "step": 1320 + }, + { + "epoch": 0.11, + "learning_rate": 2.980633180049522e-07, + "loss": 0.4463, + "step": 1330 + }, + { + "epoch": 0.11, + "learning_rate": 2.97798019101521e-07, + "loss": 0.4545, + "step": 1340 + }, + { + "epoch": 0.11, + "learning_rate": 2.9753272019808984e-07, + "loss": 0.5409, + "step": 1350 + }, + { + "epoch": 0.11, + "learning_rate": 2.9726742129465865e-07, + "loss": 0.408, + "step": 1360 + }, + { + "epoch": 0.11, + "learning_rate": 2.9700212239122747e-07, + "loss": 0.5175, + "step": 1370 + }, + { + "epoch": 0.11, + "learning_rate": 2.9673682348779623e-07, + "loss": 0.4775, + "step": 1380 + }, + { + "epoch": 0.11, + "learning_rate": 2.9647152458436504e-07, + "loss": 0.6129, + "step": 1390 + }, + { + "epoch": 0.11, + "learning_rate": 2.9620622568093386e-07, + "loss": 0.366, + "step": 1400 + }, + { + "epoch": 0.11, + "learning_rate": 2.959409267775026e-07, + "loss": 0.4697, + "step": 1410 + }, + { + "epoch": 0.11, + "learning_rate": 2.9567562787407143e-07, + "loss": 0.4706, + "step": 1420 + }, + { + "epoch": 0.11, + "learning_rate": 2.9541032897064025e-07, + "loss": 0.4555, + "step": 1430 + }, + { + "epoch": 0.11, + "learning_rate": 2.9514503006720906e-07, + "loss": 0.5475, + "step": 1440 + }, + { + "epoch": 0.12, + "learning_rate": 2.948797311637778e-07, + "loss": 0.4358, + "step": 1450 + }, + { + "epoch": 0.12, + "learning_rate": 2.9461443226034664e-07, + "loss": 0.5115, + "step": 1460 + }, + { + "epoch": 0.12, + "learning_rate": 2.9434913335691545e-07, + "loss": 0.6776, + "step": 1470 + }, + { + "epoch": 0.12, + "learning_rate": 2.940838344534842e-07, + "loss": 0.4034, + "step": 1480 + }, + { + "epoch": 0.12, + "learning_rate": 2.938185355500531e-07, + "loss": 0.5232, + "step": 1490 + }, + { + "epoch": 0.12, + "learning_rate": 2.9355323664662185e-07, + "loss": 0.5778, + "step": 1500 + }, + { + "epoch": 0.12, + "learning_rate": 2.9328793774319066e-07, + "loss": 0.5217, + "step": 1510 + }, + { + "epoch": 0.12, + "learning_rate": 2.930226388397595e-07, + "loss": 0.5378, + "step": 1520 + }, + { + "epoch": 0.12, + "learning_rate": 2.9275733993632824e-07, + "loss": 0.5131, + "step": 1530 + }, + { + "epoch": 0.12, + "learning_rate": 2.9249204103289705e-07, + "loss": 0.4615, + "step": 1540 + }, + { + "epoch": 0.12, + "learning_rate": 2.9222674212946587e-07, + "loss": 0.5084, + "step": 1550 + }, + { + "epoch": 0.12, + "learning_rate": 2.9196144322603463e-07, + "loss": 0.5639, + "step": 1560 + }, + { + "epoch": 0.12, + "learning_rate": 2.9169614432260344e-07, + "loss": 0.6176, + "step": 1570 + }, + { + "epoch": 0.13, + "learning_rate": 2.9143084541917226e-07, + "loss": 0.4819, + "step": 1580 + }, + { + "epoch": 0.13, + "learning_rate": 2.9116554651574107e-07, + "loss": 0.6385, + "step": 1590 + }, + { + "epoch": 0.13, + "learning_rate": 2.9090024761230983e-07, + "loss": 0.5515, + "step": 1600 + }, + { + "epoch": 0.13, + "learning_rate": 2.9063494870887865e-07, + "loss": 0.5406, + "step": 1610 + }, + { + "epoch": 0.13, + "learning_rate": 2.9036964980544746e-07, + "loss": 0.5813, + "step": 1620 + }, + { + "epoch": 0.13, + "learning_rate": 2.901043509020162e-07, + "loss": 0.6192, + "step": 1630 + }, + { + "epoch": 0.13, + "learning_rate": 2.898390519985851e-07, + "loss": 0.5759, + "step": 1640 + }, + { + "epoch": 0.13, + "learning_rate": 2.8957375309515386e-07, + "loss": 0.4963, + "step": 1650 + }, + { + "epoch": 0.13, + "learning_rate": 2.8930845419172267e-07, + "loss": 0.5021, + "step": 1660 + }, + { + "epoch": 0.13, + "learning_rate": 2.890431552882915e-07, + "loss": 0.5574, + "step": 1670 + }, + { + "epoch": 0.13, + "learning_rate": 2.8877785638486025e-07, + "loss": 0.4434, + "step": 1680 + }, + { + "epoch": 0.13, + "learning_rate": 2.8851255748142906e-07, + "loss": 0.5213, + "step": 1690 + }, + { + "epoch": 0.14, + "learning_rate": 2.882472585779979e-07, + "loss": 0.461, + "step": 1700 + }, + { + "epoch": 0.14, + "learning_rate": 2.879819596745667e-07, + "loss": 0.4587, + "step": 1710 + }, + { + "epoch": 0.14, + "learning_rate": 2.8771666077113545e-07, + "loss": 0.3879, + "step": 1720 + }, + { + "epoch": 0.14, + "learning_rate": 2.8745136186770427e-07, + "loss": 0.48, + "step": 1730 + }, + { + "epoch": 0.14, + "learning_rate": 2.871860629642731e-07, + "loss": 0.6378, + "step": 1740 + }, + { + "epoch": 0.14, + "learning_rate": 2.8692076406084184e-07, + "loss": 0.5548, + "step": 1750 + }, + { + "epoch": 0.14, + "learning_rate": 2.8665546515741066e-07, + "loss": 0.4741, + "step": 1760 + }, + { + "epoch": 0.14, + "learning_rate": 2.8639016625397947e-07, + "loss": 0.4441, + "step": 1770 + }, + { + "epoch": 0.14, + "learning_rate": 2.8612486735054823e-07, + "loss": 0.4345, + "step": 1780 + }, + { + "epoch": 0.14, + "learning_rate": 2.858595684471171e-07, + "loss": 0.6156, + "step": 1790 + }, + { + "epoch": 0.14, + "learning_rate": 2.8559426954368586e-07, + "loss": 0.4104, + "step": 1800 + }, + { + "epoch": 0.14, + "learning_rate": 2.853289706402547e-07, + "loss": 0.5361, + "step": 1810 + }, + { + "epoch": 0.14, + "learning_rate": 2.850636717368235e-07, + "loss": 0.4971, + "step": 1820 + }, + { + "epoch": 0.15, + "learning_rate": 2.8479837283339226e-07, + "loss": 0.4066, + "step": 1830 + }, + { + "epoch": 0.15, + "learning_rate": 2.8453307392996107e-07, + "loss": 0.4786, + "step": 1840 + }, + { + "epoch": 0.15, + "learning_rate": 2.842677750265299e-07, + "loss": 0.4475, + "step": 1850 + }, + { + "epoch": 0.15, + "learning_rate": 2.840024761230987e-07, + "loss": 0.596, + "step": 1860 + }, + { + "epoch": 0.15, + "learning_rate": 2.8373717721966746e-07, + "loss": 0.4797, + "step": 1870 + }, + { + "epoch": 0.15, + "learning_rate": 2.834718783162363e-07, + "loss": 0.6381, + "step": 1880 + }, + { + "epoch": 0.15, + "learning_rate": 2.832065794128051e-07, + "loss": 0.5512, + "step": 1890 + }, + { + "epoch": 0.15, + "learning_rate": 2.8294128050937385e-07, + "loss": 0.526, + "step": 1900 + }, + { + "epoch": 0.15, + "learning_rate": 2.8267598160594267e-07, + "loss": 0.4208, + "step": 1910 + }, + { + "epoch": 0.15, + "learning_rate": 2.824106827025115e-07, + "loss": 0.5019, + "step": 1920 + }, + { + "epoch": 0.15, + "learning_rate": 2.821453837990803e-07, + "loss": 0.5601, + "step": 1930 + }, + { + "epoch": 0.15, + "learning_rate": 2.818800848956491e-07, + "loss": 0.4532, + "step": 1940 + }, + { + "epoch": 0.16, + "learning_rate": 2.816147859922179e-07, + "loss": 0.6079, + "step": 1950 + }, + { + "epoch": 0.16, + "learning_rate": 2.813494870887867e-07, + "loss": 0.4444, + "step": 1960 + }, + { + "epoch": 0.16, + "learning_rate": 2.810841881853555e-07, + "loss": 0.5132, + "step": 1970 + }, + { + "epoch": 0.16, + "learning_rate": 2.8081888928192426e-07, + "loss": 0.5627, + "step": 1980 + }, + { + "epoch": 0.16, + "learning_rate": 2.805535903784931e-07, + "loss": 0.4318, + "step": 1990 + }, + { + "epoch": 0.16, + "learning_rate": 2.802882914750619e-07, + "loss": 0.5026, + "step": 2000 + }, + { + "epoch": 0.16, + "learning_rate": 2.800229925716307e-07, + "loss": 0.5187, + "step": 2010 + }, + { + "epoch": 0.16, + "learning_rate": 2.7975769366819947e-07, + "loss": 0.396, + "step": 2020 + }, + { + "epoch": 0.16, + "learning_rate": 2.794923947647683e-07, + "loss": 0.4171, + "step": 2030 + }, + { + "epoch": 0.16, + "learning_rate": 2.792270958613371e-07, + "loss": 0.4792, + "step": 2040 + }, + { + "epoch": 0.16, + "learning_rate": 2.7896179695790586e-07, + "loss": 0.4526, + "step": 2050 + }, + { + "epoch": 0.16, + "learning_rate": 2.7869649805447473e-07, + "loss": 0.4952, + "step": 2060 + }, + { + "epoch": 0.16, + "learning_rate": 2.784311991510435e-07, + "loss": 0.5462, + "step": 2070 + }, + { + "epoch": 0.17, + "learning_rate": 2.781659002476123e-07, + "loss": 0.6351, + "step": 2080 + }, + { + "epoch": 0.17, + "learning_rate": 2.779006013441811e-07, + "loss": 0.5329, + "step": 2090 + }, + { + "epoch": 0.17, + "learning_rate": 2.776353024407499e-07, + "loss": 0.3761, + "step": 2100 + }, + { + "epoch": 0.17, + "learning_rate": 2.773700035373187e-07, + "loss": 0.4625, + "step": 2110 + }, + { + "epoch": 0.17, + "learning_rate": 2.771047046338875e-07, + "loss": 0.5105, + "step": 2120 + }, + { + "epoch": 0.17, + "learning_rate": 2.768394057304563e-07, + "loss": 0.4063, + "step": 2130 + }, + { + "epoch": 0.17, + "learning_rate": 2.765741068270251e-07, + "loss": 0.4644, + "step": 2140 + }, + { + "epoch": 0.17, + "learning_rate": 2.763088079235939e-07, + "loss": 0.4825, + "step": 2150 + }, + { + "epoch": 0.17, + "learning_rate": 2.760435090201627e-07, + "loss": 0.5125, + "step": 2160 + }, + { + "epoch": 0.17, + "learning_rate": 2.757782101167315e-07, + "loss": 0.5761, + "step": 2170 + }, + { + "epoch": 0.17, + "learning_rate": 2.755129112133003e-07, + "loss": 0.4859, + "step": 2180 + }, + { + "epoch": 0.17, + "learning_rate": 2.752476123098691e-07, + "loss": 0.4388, + "step": 2190 + }, + { + "epoch": 0.18, + "learning_rate": 2.7498231340643787e-07, + "loss": 0.5507, + "step": 2200 + }, + { + "epoch": 0.18, + "learning_rate": 2.7471701450300674e-07, + "loss": 0.4343, + "step": 2210 + }, + { + "epoch": 0.18, + "learning_rate": 2.744517155995755e-07, + "loss": 0.4135, + "step": 2220 + }, + { + "epoch": 0.18, + "learning_rate": 2.741864166961443e-07, + "loss": 0.4108, + "step": 2230 + }, + { + "epoch": 0.18, + "learning_rate": 2.7392111779271313e-07, + "loss": 0.4274, + "step": 2240 + }, + { + "epoch": 0.18, + "learning_rate": 2.736558188892819e-07, + "loss": 0.4261, + "step": 2250 + }, + { + "epoch": 0.18, + "learning_rate": 2.733905199858507e-07, + "loss": 0.4734, + "step": 2260 + }, + { + "epoch": 0.18, + "learning_rate": 2.731252210824195e-07, + "loss": 0.5553, + "step": 2270 + }, + { + "epoch": 0.18, + "learning_rate": 2.7285992217898834e-07, + "loss": 0.4643, + "step": 2280 + }, + { + "epoch": 0.18, + "learning_rate": 2.725946232755571e-07, + "loss": 0.458, + "step": 2290 + }, + { + "epoch": 0.18, + "learning_rate": 2.723293243721259e-07, + "loss": 0.5908, + "step": 2300 + }, + { + "epoch": 0.18, + "learning_rate": 2.7206402546869473e-07, + "loss": 0.533, + "step": 2310 + }, + { + "epoch": 0.18, + "learning_rate": 2.717987265652635e-07, + "loss": 0.6152, + "step": 2320 + }, + { + "epoch": 0.19, + "learning_rate": 2.715334276618323e-07, + "loss": 0.3958, + "step": 2330 + }, + { + "epoch": 0.19, + "learning_rate": 2.712681287584011e-07, + "loss": 0.4243, + "step": 2340 + }, + { + "epoch": 0.19, + "learning_rate": 2.710028298549699e-07, + "loss": 0.4743, + "step": 2350 + }, + { + "epoch": 0.19, + "learning_rate": 2.7073753095153875e-07, + "loss": 0.603, + "step": 2360 + }, + { + "epoch": 0.19, + "learning_rate": 2.704722320481075e-07, + "loss": 0.4256, + "step": 2370 + }, + { + "epoch": 0.19, + "learning_rate": 2.702069331446763e-07, + "loss": 0.5295, + "step": 2380 + }, + { + "epoch": 0.19, + "learning_rate": 2.6994163424124514e-07, + "loss": 0.4739, + "step": 2390 + }, + { + "epoch": 0.19, + "learning_rate": 2.696763353378139e-07, + "loss": 0.3976, + "step": 2400 + }, + { + "epoch": 0.19, + "learning_rate": 2.694110364343827e-07, + "loss": 0.5386, + "step": 2410 + }, + { + "epoch": 0.19, + "learning_rate": 2.6914573753095153e-07, + "loss": 0.5893, + "step": 2420 + }, + { + "epoch": 0.19, + "learning_rate": 2.6888043862752035e-07, + "loss": 0.4234, + "step": 2430 + }, + { + "epoch": 0.19, + "learning_rate": 2.686151397240891e-07, + "loss": 0.411, + "step": 2440 + }, + { + "epoch": 0.19, + "learning_rate": 2.683498408206579e-07, + "loss": 0.5547, + "step": 2450 + }, + { + "epoch": 0.2, + "learning_rate": 2.6808454191722674e-07, + "loss": 0.4212, + "step": 2460 + }, + { + "epoch": 0.2, + "learning_rate": 2.678192430137955e-07, + "loss": 0.3612, + "step": 2470 + }, + { + "epoch": 0.2, + "learning_rate": 2.675539441103643e-07, + "loss": 0.7204, + "step": 2480 + }, + { + "epoch": 0.2, + "learning_rate": 2.6728864520693313e-07, + "loss": 0.5413, + "step": 2490 + }, + { + "epoch": 0.2, + "learning_rate": 2.6702334630350194e-07, + "loss": 0.4005, + "step": 2500 + }, + { + "epoch": 0.2, + "learning_rate": 2.6675804740007076e-07, + "loss": 0.4193, + "step": 2510 + }, + { + "epoch": 0.2, + "eval_accuracy": 0.6985495883967072, + "eval_loss": 0.48386532068252563, + "eval_runtime": 707.7642, + "eval_samples_per_second": 3.604, + "eval_steps_per_second": 0.901, + "step": 2514 + }, + { + "epoch": 1.0, + "learning_rate": 2.664927484966395e-07, + "loss": 0.4189, + "step": 2520 + }, + { + "epoch": 1.0, + "learning_rate": 2.6622744959320833e-07, + "loss": 0.5329, + "step": 2530 + }, + { + "epoch": 1.0, + "learning_rate": 2.6596215068977715e-07, + "loss": 0.528, + "step": 2540 + }, + { + "epoch": 1.0, + "learning_rate": 2.656968517863459e-07, + "loss": 0.4498, + "step": 2550 + }, + { + "epoch": 1.0, + "learning_rate": 2.654315528829147e-07, + "loss": 0.4362, + "step": 2560 + }, + { + "epoch": 1.0, + "learning_rate": 2.6516625397948354e-07, + "loss": 0.4513, + "step": 2570 + }, + { + "epoch": 1.01, + "learning_rate": 2.6490095507605236e-07, + "loss": 0.45, + "step": 2580 + }, + { + "epoch": 1.01, + "learning_rate": 2.6463565617262117e-07, + "loss": 0.5109, + "step": 2590 + }, + { + "epoch": 1.01, + "learning_rate": 2.6437035726918993e-07, + "loss": 0.454, + "step": 2600 + }, + { + "epoch": 1.01, + "learning_rate": 2.6410505836575875e-07, + "loss": 0.5103, + "step": 2610 + }, + { + "epoch": 1.01, + "learning_rate": 2.6383975946232756e-07, + "loss": 0.521, + "step": 2620 + }, + { + "epoch": 1.01, + "learning_rate": 2.635744605588964e-07, + "loss": 0.5211, + "step": 2630 + }, + { + "epoch": 1.01, + "learning_rate": 2.6330916165546514e-07, + "loss": 0.485, + "step": 2640 + }, + { + "epoch": 1.01, + "learning_rate": 2.6304386275203395e-07, + "loss": 0.3995, + "step": 2650 + }, + { + "epoch": 1.01, + "learning_rate": 2.6277856384860277e-07, + "loss": 0.6925, + "step": 2660 + }, + { + "epoch": 1.01, + "learning_rate": 2.6251326494517153e-07, + "loss": 0.3811, + "step": 2670 + }, + { + "epoch": 1.01, + "learning_rate": 2.6224796604174034e-07, + "loss": 0.4942, + "step": 2680 + }, + { + "epoch": 1.01, + "learning_rate": 2.6198266713830916e-07, + "loss": 0.4295, + "step": 2690 + }, + { + "epoch": 1.01, + "learning_rate": 2.617173682348779e-07, + "loss": 0.6002, + "step": 2700 + }, + { + "epoch": 1.02, + "learning_rate": 2.614520693314468e-07, + "loss": 0.3757, + "step": 2710 + }, + { + "epoch": 1.02, + "learning_rate": 2.6118677042801555e-07, + "loss": 0.502, + "step": 2720 + }, + { + "epoch": 1.02, + "learning_rate": 2.6092147152458436e-07, + "loss": 0.2979, + "step": 2730 + }, + { + "epoch": 1.02, + "learning_rate": 2.606561726211532e-07, + "loss": 0.3475, + "step": 2740 + }, + { + "epoch": 1.02, + "learning_rate": 2.6039087371772194e-07, + "loss": 0.5197, + "step": 2750 + }, + { + "epoch": 1.02, + "learning_rate": 2.6012557481429076e-07, + "loss": 0.379, + "step": 2760 + }, + { + "epoch": 1.02, + "learning_rate": 2.5986027591085957e-07, + "loss": 0.4161, + "step": 2770 + }, + { + "epoch": 1.02, + "learning_rate": 2.595949770074284e-07, + "loss": 0.561, + "step": 2780 + }, + { + "epoch": 1.02, + "learning_rate": 2.5932967810399715e-07, + "loss": 0.4509, + "step": 2790 + }, + { + "epoch": 1.02, + "learning_rate": 2.5906437920056596e-07, + "loss": 0.3427, + "step": 2800 + }, + { + "epoch": 1.02, + "learning_rate": 2.587990802971348e-07, + "loss": 0.315, + "step": 2810 + }, + { + "epoch": 1.02, + "learning_rate": 2.5853378139370354e-07, + "loss": 0.5377, + "step": 2820 + }, + { + "epoch": 1.03, + "learning_rate": 2.5826848249027235e-07, + "loss": 0.4747, + "step": 2830 + }, + { + "epoch": 1.03, + "learning_rate": 2.5800318358684117e-07, + "loss": 0.604, + "step": 2840 + }, + { + "epoch": 1.03, + "learning_rate": 2.5773788468341e-07, + "loss": 0.6197, + "step": 2850 + }, + { + "epoch": 1.03, + "learning_rate": 2.574725857799788e-07, + "loss": 0.6246, + "step": 2860 + }, + { + "epoch": 1.03, + "learning_rate": 2.5720728687654756e-07, + "loss": 0.385, + "step": 2870 + }, + { + "epoch": 1.03, + "learning_rate": 2.569419879731164e-07, + "loss": 0.3885, + "step": 2880 + }, + { + "epoch": 1.03, + "learning_rate": 2.566766890696852e-07, + "loss": 0.3586, + "step": 2890 + }, + { + "epoch": 1.03, + "learning_rate": 2.5641139016625395e-07, + "loss": 0.4253, + "step": 2900 + }, + { + "epoch": 1.03, + "learning_rate": 2.5614609126282276e-07, + "loss": 0.3906, + "step": 2910 + }, + { + "epoch": 1.03, + "learning_rate": 2.558807923593916e-07, + "loss": 0.5617, + "step": 2920 + }, + { + "epoch": 1.03, + "learning_rate": 2.556154934559604e-07, + "loss": 0.5688, + "step": 2930 + }, + { + "epoch": 1.03, + "learning_rate": 2.5535019455252916e-07, + "loss": 0.5803, + "step": 2940 + }, + { + "epoch": 1.03, + "learning_rate": 2.5508489564909797e-07, + "loss": 0.4535, + "step": 2950 + }, + { + "epoch": 1.04, + "learning_rate": 2.548195967456668e-07, + "loss": 0.4301, + "step": 2960 + }, + { + "epoch": 1.04, + "learning_rate": 2.5455429784223555e-07, + "loss": 0.4844, + "step": 2970 + }, + { + "epoch": 1.04, + "learning_rate": 2.542889989388044e-07, + "loss": 0.4359, + "step": 2980 + }, + { + "epoch": 1.04, + "learning_rate": 2.540237000353732e-07, + "loss": 0.4562, + "step": 2990 + }, + { + "epoch": 1.04, + "learning_rate": 2.53758401131942e-07, + "loss": 0.3864, + "step": 3000 + }, + { + "epoch": 1.04, + "learning_rate": 2.534931022285108e-07, + "loss": 0.5046, + "step": 3010 + }, + { + "epoch": 1.04, + "learning_rate": 2.5322780332507957e-07, + "loss": 0.2887, + "step": 3020 + }, + { + "epoch": 1.04, + "learning_rate": 2.529625044216484e-07, + "loss": 0.4083, + "step": 3030 + }, + { + "epoch": 1.04, + "learning_rate": 2.526972055182172e-07, + "loss": 0.3877, + "step": 3040 + }, + { + "epoch": 1.04, + "learning_rate": 2.5243190661478596e-07, + "loss": 0.5078, + "step": 3050 + }, + { + "epoch": 1.04, + "learning_rate": 2.521666077113548e-07, + "loss": 0.4621, + "step": 3060 + }, + { + "epoch": 1.04, + "learning_rate": 2.519013088079236e-07, + "loss": 0.4411, + "step": 3070 + }, + { + "epoch": 1.05, + "learning_rate": 2.516360099044924e-07, + "loss": 0.5897, + "step": 3080 + }, + { + "epoch": 1.05, + "learning_rate": 2.5137071100106117e-07, + "loss": 0.4126, + "step": 3090 + }, + { + "epoch": 1.05, + "learning_rate": 2.5110541209763e-07, + "loss": 0.4725, + "step": 3100 + }, + { + "epoch": 1.05, + "learning_rate": 2.508401131941988e-07, + "loss": 0.4072, + "step": 3110 + }, + { + "epoch": 1.05, + "learning_rate": 2.5057481429076756e-07, + "loss": 0.5103, + "step": 3120 + }, + { + "epoch": 1.05, + "learning_rate": 2.503095153873364e-07, + "loss": 0.4725, + "step": 3130 + }, + { + "epoch": 1.05, + "learning_rate": 2.500442164839052e-07, + "loss": 0.4113, + "step": 3140 + }, + { + "epoch": 1.05, + "learning_rate": 2.49778917580474e-07, + "loss": 0.3694, + "step": 3150 + }, + { + "epoch": 1.05, + "learning_rate": 2.495136186770428e-07, + "loss": 0.4339, + "step": 3160 + }, + { + "epoch": 1.05, + "learning_rate": 2.492483197736116e-07, + "loss": 0.4924, + "step": 3170 + }, + { + "epoch": 1.05, + "learning_rate": 2.489830208701804e-07, + "loss": 0.4624, + "step": 3180 + }, + { + "epoch": 1.05, + "learning_rate": 2.487177219667492e-07, + "loss": 0.406, + "step": 3190 + }, + { + "epoch": 1.05, + "learning_rate": 2.4845242306331797e-07, + "loss": 0.4311, + "step": 3200 + }, + { + "epoch": 1.06, + "learning_rate": 2.481871241598868e-07, + "loss": 0.3505, + "step": 3210 + }, + { + "epoch": 1.06, + "learning_rate": 2.479218252564556e-07, + "loss": 0.5599, + "step": 3220 + }, + { + "epoch": 1.06, + "learning_rate": 2.476565263530244e-07, + "loss": 0.4658, + "step": 3230 + }, + { + "epoch": 1.06, + "learning_rate": 2.473912274495932e-07, + "loss": 0.3503, + "step": 3240 + }, + { + "epoch": 1.06, + "learning_rate": 2.47125928546162e-07, + "loss": 0.4346, + "step": 3250 + }, + { + "epoch": 1.06, + "learning_rate": 2.468606296427308e-07, + "loss": 0.4888, + "step": 3260 + }, + { + "epoch": 1.06, + "learning_rate": 2.4659533073929957e-07, + "loss": 0.4938, + "step": 3270 + }, + { + "epoch": 1.06, + "learning_rate": 2.4633003183586843e-07, + "loss": 0.3205, + "step": 3280 + }, + { + "epoch": 1.06, + "learning_rate": 2.460647329324372e-07, + "loss": 0.3941, + "step": 3290 + }, + { + "epoch": 1.06, + "learning_rate": 2.45799434029006e-07, + "loss": 0.447, + "step": 3300 + }, + { + "epoch": 1.06, + "learning_rate": 2.455341351255748e-07, + "loss": 0.2545, + "step": 3310 + }, + { + "epoch": 1.06, + "learning_rate": 2.452688362221436e-07, + "loss": 0.4893, + "step": 3320 + }, + { + "epoch": 1.06, + "learning_rate": 2.450035373187124e-07, + "loss": 0.4399, + "step": 3330 + }, + { + "epoch": 1.07, + "learning_rate": 2.447382384152812e-07, + "loss": 0.4247, + "step": 3340 + }, + { + "epoch": 1.07, + "learning_rate": 2.4447293951185003e-07, + "loss": 0.5176, + "step": 3350 + }, + { + "epoch": 1.07, + "learning_rate": 2.442076406084188e-07, + "loss": 0.5129, + "step": 3360 + }, + { + "epoch": 1.07, + "learning_rate": 2.439423417049876e-07, + "loss": 0.3134, + "step": 3370 + }, + { + "epoch": 1.07, + "learning_rate": 2.436770428015564e-07, + "loss": 0.4207, + "step": 3380 + }, + { + "epoch": 1.07, + "learning_rate": 2.434117438981252e-07, + "loss": 0.6662, + "step": 3390 + }, + { + "epoch": 1.07, + "learning_rate": 2.43146444994694e-07, + "loss": 0.4619, + "step": 3400 + }, + { + "epoch": 1.07, + "learning_rate": 2.428811460912628e-07, + "loss": 0.5505, + "step": 3410 + }, + { + "epoch": 1.07, + "learning_rate": 2.426158471878316e-07, + "loss": 0.5201, + "step": 3420 + }, + { + "epoch": 1.07, + "learning_rate": 2.4235054828440044e-07, + "loss": 0.3945, + "step": 3430 + }, + { + "epoch": 1.07, + "learning_rate": 2.420852493809692e-07, + "loss": 0.4216, + "step": 3440 + }, + { + "epoch": 1.07, + "learning_rate": 2.41819950477538e-07, + "loss": 0.4044, + "step": 3450 + }, + { + "epoch": 1.08, + "learning_rate": 2.4155465157410683e-07, + "loss": 0.4147, + "step": 3460 + }, + { + "epoch": 1.08, + "learning_rate": 2.412893526706756e-07, + "loss": 0.4367, + "step": 3470 + }, + { + "epoch": 1.08, + "learning_rate": 2.410240537672444e-07, + "loss": 0.4442, + "step": 3480 + }, + { + "epoch": 1.08, + "learning_rate": 2.407587548638132e-07, + "loss": 0.3516, + "step": 3490 + }, + { + "epoch": 1.08, + "learning_rate": 2.4049345596038204e-07, + "loss": 0.4012, + "step": 3500 + }, + { + "epoch": 1.08, + "learning_rate": 2.402281570569508e-07, + "loss": 0.3966, + "step": 3510 + }, + { + "epoch": 1.08, + "learning_rate": 2.399628581535196e-07, + "loss": 0.4944, + "step": 3520 + }, + { + "epoch": 1.08, + "learning_rate": 2.3969755925008843e-07, + "loss": 0.5081, + "step": 3530 + }, + { + "epoch": 1.08, + "learning_rate": 2.394322603466572e-07, + "loss": 0.5445, + "step": 3540 + }, + { + "epoch": 1.08, + "learning_rate": 2.39166961443226e-07, + "loss": 0.3361, + "step": 3550 + }, + { + "epoch": 1.08, + "learning_rate": 2.389016625397948e-07, + "loss": 0.3811, + "step": 3560 + }, + { + "epoch": 1.08, + "learning_rate": 2.3863636363636364e-07, + "loss": 0.4163, + "step": 3570 + }, + { + "epoch": 1.08, + "learning_rate": 2.3837106473293243e-07, + "loss": 0.3715, + "step": 3580 + }, + { + "epoch": 1.09, + "learning_rate": 2.3810576582950121e-07, + "loss": 0.4421, + "step": 3590 + }, + { + "epoch": 1.09, + "learning_rate": 2.3784046692607003e-07, + "loss": 0.5121, + "step": 3600 + }, + { + "epoch": 1.09, + "learning_rate": 2.3757516802263884e-07, + "loss": 0.4881, + "step": 3610 + }, + { + "epoch": 1.09, + "learning_rate": 2.3730986911920763e-07, + "loss": 0.4282, + "step": 3620 + }, + { + "epoch": 1.09, + "learning_rate": 2.3704457021577642e-07, + "loss": 0.3844, + "step": 3630 + }, + { + "epoch": 1.09, + "learning_rate": 2.3677927131234523e-07, + "loss": 0.3984, + "step": 3640 + }, + { + "epoch": 1.09, + "learning_rate": 2.3651397240891402e-07, + "loss": 0.3395, + "step": 3650 + }, + { + "epoch": 1.09, + "learning_rate": 2.362486735054828e-07, + "loss": 0.3538, + "step": 3660 + }, + { + "epoch": 1.09, + "learning_rate": 2.3598337460205165e-07, + "loss": 0.469, + "step": 3670 + }, + { + "epoch": 1.09, + "learning_rate": 2.3571807569862044e-07, + "loss": 0.4087, + "step": 3680 + }, + { + "epoch": 1.09, + "learning_rate": 2.3545277679518923e-07, + "loss": 0.5183, + "step": 3690 + }, + { + "epoch": 1.09, + "learning_rate": 2.3518747789175804e-07, + "loss": 0.4856, + "step": 3700 + }, + { + "epoch": 1.1, + "learning_rate": 2.3492217898832683e-07, + "loss": 0.3906, + "step": 3710 + }, + { + "epoch": 1.1, + "learning_rate": 2.3465688008489562e-07, + "loss": 0.4084, + "step": 3720 + }, + { + "epoch": 1.1, + "learning_rate": 2.3439158118146444e-07, + "loss": 0.4071, + "step": 3730 + }, + { + "epoch": 1.1, + "learning_rate": 2.3412628227803325e-07, + "loss": 0.4447, + "step": 3740 + }, + { + "epoch": 1.1, + "learning_rate": 2.3386098337460204e-07, + "loss": 0.4718, + "step": 3750 + }, + { + "epoch": 1.1, + "learning_rate": 2.3359568447117085e-07, + "loss": 0.4005, + "step": 3760 + }, + { + "epoch": 1.1, + "learning_rate": 2.3333038556773964e-07, + "loss": 0.4809, + "step": 3770 + }, + { + "epoch": 1.1, + "learning_rate": 2.3306508666430843e-07, + "loss": 0.3627, + "step": 3780 + }, + { + "epoch": 1.1, + "learning_rate": 2.3279978776087724e-07, + "loss": 0.4105, + "step": 3790 + }, + { + "epoch": 1.1, + "learning_rate": 2.3253448885744603e-07, + "loss": 0.4147, + "step": 3800 + }, + { + "epoch": 1.1, + "learning_rate": 2.3226918995401482e-07, + "loss": 0.5299, + "step": 3810 + }, + { + "epoch": 1.1, + "learning_rate": 2.3200389105058366e-07, + "loss": 0.4322, + "step": 3820 + }, + { + "epoch": 1.1, + "learning_rate": 2.3173859214715245e-07, + "loss": 0.3699, + "step": 3830 + }, + { + "epoch": 1.11, + "learning_rate": 2.3147329324372124e-07, + "loss": 0.3699, + "step": 3840 + }, + { + "epoch": 1.11, + "learning_rate": 2.3120799434029005e-07, + "loss": 0.2841, + "step": 3850 + }, + { + "epoch": 1.11, + "learning_rate": 2.3094269543685884e-07, + "loss": 0.5602, + "step": 3860 + }, + { + "epoch": 1.11, + "learning_rate": 2.3067739653342763e-07, + "loss": 0.3621, + "step": 3870 + }, + { + "epoch": 1.11, + "learning_rate": 2.3041209762999647e-07, + "loss": 0.3471, + "step": 3880 + }, + { + "epoch": 1.11, + "learning_rate": 2.3014679872656526e-07, + "loss": 0.3144, + "step": 3890 + }, + { + "epoch": 1.11, + "learning_rate": 2.2988149982313405e-07, + "loss": 0.3478, + "step": 3900 + }, + { + "epoch": 1.11, + "learning_rate": 2.2961620091970286e-07, + "loss": 0.3689, + "step": 3910 + }, + { + "epoch": 1.11, + "learning_rate": 2.2935090201627165e-07, + "loss": 0.4915, + "step": 3920 + }, + { + "epoch": 1.11, + "learning_rate": 2.2908560311284044e-07, + "loss": 0.5927, + "step": 3930 + }, + { + "epoch": 1.11, + "learning_rate": 2.2882030420940925e-07, + "loss": 0.3461, + "step": 3940 + }, + { + "epoch": 1.11, + "learning_rate": 2.2855500530597804e-07, + "loss": 0.4687, + "step": 3950 + }, + { + "epoch": 1.12, + "learning_rate": 2.2828970640254686e-07, + "loss": 0.3873, + "step": 3960 + }, + { + "epoch": 1.12, + "learning_rate": 2.2802440749911567e-07, + "loss": 0.4167, + "step": 3970 + }, + { + "epoch": 1.12, + "learning_rate": 2.2775910859568446e-07, + "loss": 0.369, + "step": 3980 + }, + { + "epoch": 1.12, + "learning_rate": 2.2749380969225325e-07, + "loss": 0.3097, + "step": 3990 + }, + { + "epoch": 1.12, + "learning_rate": 2.2722851078882206e-07, + "loss": 0.4325, + "step": 4000 + }, + { + "epoch": 1.12, + "learning_rate": 2.2696321188539085e-07, + "loss": 0.3702, + "step": 4010 + }, + { + "epoch": 1.12, + "learning_rate": 2.2669791298195964e-07, + "loss": 0.5312, + "step": 4020 + }, + { + "epoch": 1.12, + "learning_rate": 2.2643261407852848e-07, + "loss": 0.2592, + "step": 4030 + }, + { + "epoch": 1.12, + "learning_rate": 2.2616731517509727e-07, + "loss": 0.5263, + "step": 4040 + }, + { + "epoch": 1.12, + "learning_rate": 2.2590201627166606e-07, + "loss": 0.3491, + "step": 4050 + }, + { + "epoch": 1.12, + "learning_rate": 2.2563671736823487e-07, + "loss": 0.4487, + "step": 4060 + }, + { + "epoch": 1.12, + "learning_rate": 2.2537141846480366e-07, + "loss": 0.4003, + "step": 4070 + }, + { + "epoch": 1.12, + "learning_rate": 2.2510611956137245e-07, + "loss": 0.554, + "step": 4080 + }, + { + "epoch": 1.13, + "learning_rate": 2.248408206579413e-07, + "loss": 0.3426, + "step": 4090 + }, + { + "epoch": 1.13, + "learning_rate": 2.2457552175451008e-07, + "loss": 0.3495, + "step": 4100 + }, + { + "epoch": 1.13, + "learning_rate": 2.2431022285107887e-07, + "loss": 0.3407, + "step": 4110 + }, + { + "epoch": 1.13, + "learning_rate": 2.2404492394764768e-07, + "loss": 0.4444, + "step": 4120 + }, + { + "epoch": 1.13, + "learning_rate": 2.2377962504421647e-07, + "loss": 0.3747, + "step": 4130 + }, + { + "epoch": 1.13, + "learning_rate": 2.2351432614078526e-07, + "loss": 0.4015, + "step": 4140 + }, + { + "epoch": 1.13, + "learning_rate": 2.2324902723735407e-07, + "loss": 0.34, + "step": 4150 + }, + { + "epoch": 1.13, + "learning_rate": 2.2298372833392286e-07, + "loss": 0.4813, + "step": 4160 + }, + { + "epoch": 1.13, + "learning_rate": 2.2271842943049165e-07, + "loss": 0.4426, + "step": 4170 + }, + { + "epoch": 1.13, + "learning_rate": 2.224531305270605e-07, + "loss": 0.314, + "step": 4180 + }, + { + "epoch": 1.13, + "learning_rate": 2.2218783162362928e-07, + "loss": 0.4372, + "step": 4190 + }, + { + "epoch": 1.13, + "learning_rate": 2.2192253272019807e-07, + "loss": 0.5747, + "step": 4200 + }, + { + "epoch": 1.13, + "learning_rate": 2.2165723381676688e-07, + "loss": 0.3215, + "step": 4210 + }, + { + "epoch": 1.14, + "learning_rate": 2.2139193491333567e-07, + "loss": 0.4074, + "step": 4220 + }, + { + "epoch": 1.14, + "learning_rate": 2.2112663600990446e-07, + "loss": 0.3729, + "step": 4230 + }, + { + "epoch": 1.14, + "learning_rate": 2.208613371064733e-07, + "loss": 0.4418, + "step": 4240 + }, + { + "epoch": 1.14, + "learning_rate": 2.205960382030421e-07, + "loss": 0.4403, + "step": 4250 + }, + { + "epoch": 1.14, + "learning_rate": 2.203307392996109e-07, + "loss": 0.4159, + "step": 4260 + }, + { + "epoch": 1.14, + "learning_rate": 2.200654403961797e-07, + "loss": 0.4298, + "step": 4270 + }, + { + "epoch": 1.14, + "learning_rate": 2.1980014149274848e-07, + "loss": 0.4128, + "step": 4280 + }, + { + "epoch": 1.14, + "learning_rate": 2.195348425893173e-07, + "loss": 0.3948, + "step": 4290 + }, + { + "epoch": 1.14, + "learning_rate": 2.1926954368588608e-07, + "loss": 0.3271, + "step": 4300 + }, + { + "epoch": 1.14, + "learning_rate": 2.190042447824549e-07, + "loss": 0.4198, + "step": 4310 + }, + { + "epoch": 1.14, + "learning_rate": 2.187389458790237e-07, + "loss": 0.4266, + "step": 4320 + }, + { + "epoch": 1.14, + "learning_rate": 2.184736469755925e-07, + "loss": 0.4236, + "step": 4330 + }, + { + "epoch": 1.15, + "learning_rate": 2.182083480721613e-07, + "loss": 0.4734, + "step": 4340 + }, + { + "epoch": 1.15, + "learning_rate": 2.179430491687301e-07, + "loss": 0.4639, + "step": 4350 + }, + { + "epoch": 1.15, + "learning_rate": 2.176777502652989e-07, + "loss": 0.3681, + "step": 4360 + }, + { + "epoch": 1.15, + "learning_rate": 2.1741245136186768e-07, + "loss": 0.2595, + "step": 4370 + }, + { + "epoch": 1.15, + "learning_rate": 2.1714715245843652e-07, + "loss": 0.4727, + "step": 4380 + }, + { + "epoch": 1.15, + "learning_rate": 2.168818535550053e-07, + "loss": 0.3284, + "step": 4390 + }, + { + "epoch": 1.15, + "learning_rate": 2.166165546515741e-07, + "loss": 0.3289, + "step": 4400 + }, + { + "epoch": 1.15, + "learning_rate": 2.163512557481429e-07, + "loss": 0.4443, + "step": 4410 + }, + { + "epoch": 1.15, + "learning_rate": 2.160859568447117e-07, + "loss": 0.2787, + "step": 4420 + }, + { + "epoch": 1.15, + "learning_rate": 2.158206579412805e-07, + "loss": 0.6671, + "step": 4430 + }, + { + "epoch": 1.15, + "learning_rate": 2.1555535903784933e-07, + "loss": 0.3741, + "step": 4440 + }, + { + "epoch": 1.15, + "learning_rate": 2.1529006013441812e-07, + "loss": 0.3775, + "step": 4450 + }, + { + "epoch": 1.15, + "learning_rate": 2.150247612309869e-07, + "loss": 0.3939, + "step": 4460 + }, + { + "epoch": 1.16, + "learning_rate": 2.1475946232755572e-07, + "loss": 0.2898, + "step": 4470 + }, + { + "epoch": 1.16, + "learning_rate": 2.144941634241245e-07, + "loss": 0.535, + "step": 4480 + }, + { + "epoch": 1.16, + "learning_rate": 2.142288645206933e-07, + "loss": 0.3356, + "step": 4490 + }, + { + "epoch": 1.16, + "learning_rate": 2.139635656172621e-07, + "loss": 0.4034, + "step": 4500 + }, + { + "epoch": 1.16, + "learning_rate": 2.136982667138309e-07, + "loss": 0.3448, + "step": 4510 + }, + { + "epoch": 1.16, + "learning_rate": 2.134329678103997e-07, + "loss": 0.4358, + "step": 4520 + }, + { + "epoch": 1.16, + "learning_rate": 2.1316766890696853e-07, + "loss": 0.2735, + "step": 4530 + }, + { + "epoch": 1.16, + "learning_rate": 2.1290237000353732e-07, + "loss": 0.4325, + "step": 4540 + }, + { + "epoch": 1.16, + "learning_rate": 2.126370711001061e-07, + "loss": 0.3883, + "step": 4550 + }, + { + "epoch": 1.16, + "learning_rate": 2.1237177219667492e-07, + "loss": 0.3338, + "step": 4560 + }, + { + "epoch": 1.16, + "learning_rate": 2.121064732932437e-07, + "loss": 0.3989, + "step": 4570 + }, + { + "epoch": 1.16, + "learning_rate": 2.118411743898125e-07, + "loss": 0.3722, + "step": 4580 + }, + { + "epoch": 1.17, + "learning_rate": 2.1157587548638134e-07, + "loss": 0.3492, + "step": 4590 + }, + { + "epoch": 1.17, + "learning_rate": 2.1131057658295013e-07, + "loss": 0.3775, + "step": 4600 + }, + { + "epoch": 1.17, + "learning_rate": 2.1104527767951891e-07, + "loss": 0.3562, + "step": 4610 + }, + { + "epoch": 1.17, + "learning_rate": 2.1077997877608773e-07, + "loss": 0.6652, + "step": 4620 + }, + { + "epoch": 1.17, + "learning_rate": 2.1051467987265652e-07, + "loss": 0.3927, + "step": 4630 + }, + { + "epoch": 1.17, + "learning_rate": 2.102493809692253e-07, + "loss": 0.3628, + "step": 4640 + }, + { + "epoch": 1.17, + "learning_rate": 2.0998408206579412e-07, + "loss": 0.5185, + "step": 4650 + }, + { + "epoch": 1.17, + "learning_rate": 2.0971878316236294e-07, + "loss": 0.3643, + "step": 4660 + }, + { + "epoch": 1.17, + "learning_rate": 2.0945348425893172e-07, + "loss": 0.4946, + "step": 4670 + }, + { + "epoch": 1.17, + "learning_rate": 2.0918818535550054e-07, + "loss": 0.4316, + "step": 4680 + }, + { + "epoch": 1.17, + "learning_rate": 2.0892288645206933e-07, + "loss": 0.3947, + "step": 4690 + }, + { + "epoch": 1.17, + "learning_rate": 2.0865758754863811e-07, + "loss": 0.3337, + "step": 4700 + }, + { + "epoch": 1.17, + "learning_rate": 2.0839228864520693e-07, + "loss": 0.3022, + "step": 4710 + }, + { + "epoch": 1.18, + "learning_rate": 2.0812698974177572e-07, + "loss": 0.3628, + "step": 4720 + }, + { + "epoch": 1.18, + "learning_rate": 2.078616908383445e-07, + "loss": 0.3703, + "step": 4730 + }, + { + "epoch": 1.18, + "learning_rate": 2.0759639193491335e-07, + "loss": 0.275, + "step": 4740 + }, + { + "epoch": 1.18, + "learning_rate": 2.0733109303148214e-07, + "loss": 0.3861, + "step": 4750 + }, + { + "epoch": 1.18, + "learning_rate": 2.0706579412805092e-07, + "loss": 0.416, + "step": 4760 + }, + { + "epoch": 1.18, + "learning_rate": 2.0680049522461974e-07, + "loss": 0.3163, + "step": 4770 + }, + { + "epoch": 1.18, + "learning_rate": 2.0653519632118853e-07, + "loss": 0.371, + "step": 4780 + }, + { + "epoch": 1.18, + "learning_rate": 2.0626989741775732e-07, + "loss": 0.2409, + "step": 4790 + }, + { + "epoch": 1.18, + "learning_rate": 2.0600459851432616e-07, + "loss": 0.5185, + "step": 4800 + }, + { + "epoch": 1.18, + "learning_rate": 2.0573929961089494e-07, + "loss": 0.3575, + "step": 4810 + }, + { + "epoch": 1.18, + "learning_rate": 2.0547400070746373e-07, + "loss": 0.5805, + "step": 4820 + }, + { + "epoch": 1.18, + "learning_rate": 2.0520870180403255e-07, + "loss": 0.3353, + "step": 4830 + }, + { + "epoch": 1.19, + "learning_rate": 2.0494340290060134e-07, + "loss": 0.3381, + "step": 4840 + }, + { + "epoch": 1.19, + "learning_rate": 2.0467810399717012e-07, + "loss": 0.3048, + "step": 4850 + }, + { + "epoch": 1.19, + "learning_rate": 2.0441280509373894e-07, + "loss": 0.3347, + "step": 4860 + }, + { + "epoch": 1.19, + "learning_rate": 2.0414750619030773e-07, + "loss": 0.3736, + "step": 4870 + }, + { + "epoch": 1.19, + "learning_rate": 2.0388220728687654e-07, + "loss": 0.3378, + "step": 4880 + }, + { + "epoch": 1.19, + "learning_rate": 2.0361690838344536e-07, + "loss": 0.3411, + "step": 4890 + }, + { + "epoch": 1.19, + "learning_rate": 2.0335160948001414e-07, + "loss": 0.2934, + "step": 4900 + }, + { + "epoch": 1.19, + "learning_rate": 2.0308631057658293e-07, + "loss": 0.4118, + "step": 4910 + }, + { + "epoch": 1.19, + "learning_rate": 2.0282101167315175e-07, + "loss": 0.3951, + "step": 4920 + }, + { + "epoch": 1.19, + "learning_rate": 2.0255571276972054e-07, + "loss": 0.322, + "step": 4930 + }, + { + "epoch": 1.19, + "learning_rate": 2.0229041386628932e-07, + "loss": 0.3692, + "step": 4940 + }, + { + "epoch": 1.19, + "learning_rate": 2.0202511496285817e-07, + "loss": 0.3782, + "step": 4950 + }, + { + "epoch": 1.19, + "learning_rate": 2.0175981605942695e-07, + "loss": 0.4777, + "step": 4960 + }, + { + "epoch": 1.2, + "learning_rate": 2.0149451715599574e-07, + "loss": 0.421, + "step": 4970 + }, + { + "epoch": 1.2, + "learning_rate": 2.0122921825256456e-07, + "loss": 0.3586, + "step": 4980 + }, + { + "epoch": 1.2, + "learning_rate": 2.0096391934913335e-07, + "loss": 0.3617, + "step": 4990 + }, + { + "epoch": 1.2, + "learning_rate": 2.0069862044570213e-07, + "loss": 0.2693, + "step": 5000 + }, + { + "epoch": 1.2, + "learning_rate": 2.0043332154227095e-07, + "loss": 0.2447, + "step": 5010 + }, + { + "epoch": 1.2, + "learning_rate": 2.0016802263883976e-07, + "loss": 0.3542, + "step": 5020 + }, + { + "epoch": 1.2, + "eval_accuracy": 0.7593100744805958, + "eval_loss": 0.44453132152557373, + "eval_runtime": 710.6654, + "eval_samples_per_second": 3.59, + "eval_steps_per_second": 0.898, + "step": 5028 + }, + { + "epoch": 2.0, + "learning_rate": 1.9990272373540855e-07, + "loss": 0.3873, + "step": 5030 + }, + { + "epoch": 2.0, + "learning_rate": 1.9963742483197737e-07, + "loss": 0.3811, + "step": 5040 + }, + { + "epoch": 2.0, + "learning_rate": 1.9937212592854615e-07, + "loss": 0.2772, + "step": 5050 + }, + { + "epoch": 2.0, + "learning_rate": 1.9910682702511494e-07, + "loss": 0.4806, + "step": 5060 + }, + { + "epoch": 2.0, + "learning_rate": 1.9884152812168376e-07, + "loss": 0.3124, + "step": 5070 + }, + { + "epoch": 2.0, + "learning_rate": 1.9857622921825255e-07, + "loss": 0.247, + "step": 5080 + }, + { + "epoch": 2.0, + "learning_rate": 1.9831093031482133e-07, + "loss": 0.3793, + "step": 5090 + }, + { + "epoch": 2.01, + "learning_rate": 1.9804563141139017e-07, + "loss": 0.5119, + "step": 5100 + }, + { + "epoch": 2.01, + "learning_rate": 1.9778033250795896e-07, + "loss": 0.2919, + "step": 5110 + }, + { + "epoch": 2.01, + "learning_rate": 1.9751503360452775e-07, + "loss": 0.3725, + "step": 5120 + }, + { + "epoch": 2.01, + "learning_rate": 1.9724973470109657e-07, + "loss": 0.4659, + "step": 5130 + }, + { + "epoch": 2.01, + "learning_rate": 1.9698443579766535e-07, + "loss": 0.3603, + "step": 5140 + }, + { + "epoch": 2.01, + "learning_rate": 1.9671913689423414e-07, + "loss": 0.3629, + "step": 5150 + }, + { + "epoch": 2.01, + "learning_rate": 1.9645383799080298e-07, + "loss": 0.4292, + "step": 5160 + }, + { + "epoch": 2.01, + "learning_rate": 1.9618853908737177e-07, + "loss": 0.3256, + "step": 5170 + }, + { + "epoch": 2.01, + "learning_rate": 1.9592324018394056e-07, + "loss": 0.2833, + "step": 5180 + }, + { + "epoch": 2.01, + "learning_rate": 1.9565794128050938e-07, + "loss": 0.47, + "step": 5190 + }, + { + "epoch": 2.01, + "learning_rate": 1.9539264237707816e-07, + "loss": 0.2898, + "step": 5200 + }, + { + "epoch": 2.01, + "learning_rate": 1.9512734347364695e-07, + "loss": 0.2773, + "step": 5210 + }, + { + "epoch": 2.02, + "learning_rate": 1.9486204457021577e-07, + "loss": 0.4663, + "step": 5220 + }, + { + "epoch": 2.02, + "learning_rate": 1.9459674566678455e-07, + "loss": 0.4463, + "step": 5230 + }, + { + "epoch": 2.02, + "learning_rate": 1.9433144676335337e-07, + "loss": 0.306, + "step": 5240 + }, + { + "epoch": 2.02, + "learning_rate": 1.9406614785992218e-07, + "loss": 0.5645, + "step": 5250 + }, + { + "epoch": 2.02, + "learning_rate": 1.9380084895649097e-07, + "loss": 0.3439, + "step": 5260 + }, + { + "epoch": 2.02, + "learning_rate": 1.9353555005305976e-07, + "loss": 0.311, + "step": 5270 + }, + { + "epoch": 2.02, + "learning_rate": 1.9327025114962858e-07, + "loss": 0.4669, + "step": 5280 + }, + { + "epoch": 2.02, + "learning_rate": 1.9300495224619736e-07, + "loss": 0.2812, + "step": 5290 + }, + { + "epoch": 2.02, + "learning_rate": 1.9273965334276615e-07, + "loss": 0.3005, + "step": 5300 + }, + { + "epoch": 2.02, + "learning_rate": 1.92474354439335e-07, + "loss": 0.3576, + "step": 5310 + }, + { + "epoch": 2.02, + "learning_rate": 1.9220905553590378e-07, + "loss": 0.2867, + "step": 5320 + }, + { + "epoch": 2.02, + "learning_rate": 1.9194375663247257e-07, + "loss": 0.2798, + "step": 5330 + }, + { + "epoch": 2.02, + "learning_rate": 1.9167845772904138e-07, + "loss": 0.3237, + "step": 5340 + }, + { + "epoch": 2.03, + "learning_rate": 1.9141315882561017e-07, + "loss": 0.3129, + "step": 5350 + }, + { + "epoch": 2.03, + "learning_rate": 1.9114785992217896e-07, + "loss": 0.3229, + "step": 5360 + }, + { + "epoch": 2.03, + "learning_rate": 1.908825610187478e-07, + "loss": 0.4269, + "step": 5370 + }, + { + "epoch": 2.03, + "learning_rate": 1.906172621153166e-07, + "loss": 0.4231, + "step": 5380 + }, + { + "epoch": 2.03, + "learning_rate": 1.9035196321188538e-07, + "loss": 0.3866, + "step": 5390 + }, + { + "epoch": 2.03, + "learning_rate": 1.900866643084542e-07, + "loss": 0.2466, + "step": 5400 + }, + { + "epoch": 2.03, + "learning_rate": 1.8982136540502298e-07, + "loss": 0.4208, + "step": 5410 + }, + { + "epoch": 2.03, + "learning_rate": 1.8955606650159177e-07, + "loss": 0.3165, + "step": 5420 + }, + { + "epoch": 2.03, + "learning_rate": 1.8929076759816058e-07, + "loss": 0.3127, + "step": 5430 + }, + { + "epoch": 2.03, + "learning_rate": 1.8902546869472937e-07, + "loss": 0.3945, + "step": 5440 + }, + { + "epoch": 2.03, + "learning_rate": 1.8876016979129816e-07, + "loss": 0.381, + "step": 5450 + }, + { + "epoch": 2.03, + "learning_rate": 1.88494870887867e-07, + "loss": 0.4172, + "step": 5460 + }, + { + "epoch": 2.04, + "learning_rate": 1.882295719844358e-07, + "loss": 0.461, + "step": 5470 + }, + { + "epoch": 2.04, + "learning_rate": 1.8796427308100458e-07, + "loss": 0.46, + "step": 5480 + }, + { + "epoch": 2.04, + "learning_rate": 1.876989741775734e-07, + "loss": 0.2381, + "step": 5490 + }, + { + "epoch": 2.04, + "learning_rate": 1.8743367527414218e-07, + "loss": 0.4672, + "step": 5500 + }, + { + "epoch": 2.04, + "learning_rate": 1.8716837637071097e-07, + "loss": 0.2909, + "step": 5510 + }, + { + "epoch": 2.04, + "learning_rate": 1.869030774672798e-07, + "loss": 0.4985, + "step": 5520 + }, + { + "epoch": 2.04, + "learning_rate": 1.866377785638486e-07, + "loss": 0.3446, + "step": 5530 + }, + { + "epoch": 2.04, + "learning_rate": 1.863724796604174e-07, + "loss": 0.3878, + "step": 5540 + }, + { + "epoch": 2.04, + "learning_rate": 1.861071807569862e-07, + "loss": 0.2926, + "step": 5550 + }, + { + "epoch": 2.04, + "learning_rate": 1.85841881853555e-07, + "loss": 0.2187, + "step": 5560 + }, + { + "epoch": 2.04, + "learning_rate": 1.8557658295012378e-07, + "loss": 0.338, + "step": 5570 + }, + { + "epoch": 2.04, + "learning_rate": 1.853112840466926e-07, + "loss": 0.2448, + "step": 5580 + }, + { + "epoch": 2.04, + "learning_rate": 1.850459851432614e-07, + "loss": 0.2129, + "step": 5590 + }, + { + "epoch": 2.05, + "learning_rate": 1.847806862398302e-07, + "loss": 0.3554, + "step": 5600 + }, + { + "epoch": 2.05, + "learning_rate": 1.84515387336399e-07, + "loss": 0.444, + "step": 5610 + }, + { + "epoch": 2.05, + "learning_rate": 1.842500884329678e-07, + "loss": 0.306, + "step": 5620 + }, + { + "epoch": 2.05, + "learning_rate": 1.839847895295366e-07, + "loss": 0.4596, + "step": 5630 + }, + { + "epoch": 2.05, + "learning_rate": 1.837194906261054e-07, + "loss": 0.4518, + "step": 5640 + }, + { + "epoch": 2.05, + "learning_rate": 1.834541917226742e-07, + "loss": 0.2654, + "step": 5650 + }, + { + "epoch": 2.05, + "learning_rate": 1.8318889281924298e-07, + "loss": 0.2054, + "step": 5660 + }, + { + "epoch": 2.05, + "learning_rate": 1.8292359391581182e-07, + "loss": 0.2483, + "step": 5670 + }, + { + "epoch": 2.05, + "learning_rate": 1.826582950123806e-07, + "loss": 0.3451, + "step": 5680 + }, + { + "epoch": 2.05, + "learning_rate": 1.823929961089494e-07, + "loss": 0.3117, + "step": 5690 + }, + { + "epoch": 2.05, + "learning_rate": 1.821276972055182e-07, + "loss": 0.2829, + "step": 5700 + }, + { + "epoch": 2.05, + "learning_rate": 1.81862398302087e-07, + "loss": 0.3713, + "step": 5710 + }, + { + "epoch": 2.06, + "learning_rate": 1.815970993986558e-07, + "loss": 0.2603, + "step": 5720 + }, + { + "epoch": 2.06, + "learning_rate": 1.8133180049522463e-07, + "loss": 0.2378, + "step": 5730 + }, + { + "epoch": 2.06, + "learning_rate": 1.8106650159179342e-07, + "loss": 0.4374, + "step": 5740 + }, + { + "epoch": 2.06, + "learning_rate": 1.808012026883622e-07, + "loss": 0.4291, + "step": 5750 + }, + { + "epoch": 2.06, + "learning_rate": 1.8053590378493102e-07, + "loss": 0.4139, + "step": 5760 + }, + { + "epoch": 2.06, + "learning_rate": 1.802706048814998e-07, + "loss": 0.385, + "step": 5770 + }, + { + "epoch": 2.06, + "learning_rate": 1.800053059780686e-07, + "loss": 0.3487, + "step": 5780 + }, + { + "epoch": 2.06, + "learning_rate": 1.797400070746374e-07, + "loss": 0.316, + "step": 5790 + }, + { + "epoch": 2.06, + "learning_rate": 1.794747081712062e-07, + "loss": 0.3861, + "step": 5800 + }, + { + "epoch": 2.06, + "learning_rate": 1.7920940926777502e-07, + "loss": 0.3524, + "step": 5810 + }, + { + "epoch": 2.06, + "learning_rate": 1.7894411036434383e-07, + "loss": 0.2247, + "step": 5820 + }, + { + "epoch": 2.06, + "learning_rate": 1.7867881146091262e-07, + "loss": 0.3113, + "step": 5830 + }, + { + "epoch": 2.06, + "learning_rate": 1.784135125574814e-07, + "loss": 0.5685, + "step": 5840 + }, + { + "epoch": 2.07, + "learning_rate": 1.7814821365405022e-07, + "loss": 0.4044, + "step": 5850 + }, + { + "epoch": 2.07, + "learning_rate": 1.77882914750619e-07, + "loss": 0.3159, + "step": 5860 + }, + { + "epoch": 2.07, + "learning_rate": 1.776176158471878e-07, + "loss": 0.372, + "step": 5870 + }, + { + "epoch": 2.07, + "learning_rate": 1.7735231694375664e-07, + "loss": 0.2586, + "step": 5880 + }, + { + "epoch": 2.07, + "learning_rate": 1.7708701804032543e-07, + "loss": 0.3496, + "step": 5890 + }, + { + "epoch": 2.07, + "learning_rate": 1.7682171913689422e-07, + "loss": 0.3252, + "step": 5900 + }, + { + "epoch": 2.07, + "learning_rate": 1.7655642023346303e-07, + "loss": 0.2223, + "step": 5910 + }, + { + "epoch": 2.07, + "learning_rate": 1.7629112133003182e-07, + "loss": 0.3757, + "step": 5920 + }, + { + "epoch": 2.07, + "learning_rate": 1.7602582242660063e-07, + "loss": 0.3672, + "step": 5930 + }, + { + "epoch": 2.07, + "learning_rate": 1.7576052352316945e-07, + "loss": 0.2711, + "step": 5940 + }, + { + "epoch": 2.07, + "learning_rate": 1.7549522461973824e-07, + "loss": 0.3355, + "step": 5950 + }, + { + "epoch": 2.07, + "learning_rate": 1.7522992571630705e-07, + "loss": 0.3543, + "step": 5960 + }, + { + "epoch": 2.07, + "learning_rate": 1.7496462681287584e-07, + "loss": 0.3793, + "step": 5970 + }, + { + "epoch": 2.08, + "learning_rate": 1.7469932790944463e-07, + "loss": 0.5126, + "step": 5980 + }, + { + "epoch": 2.08, + "learning_rate": 1.7443402900601344e-07, + "loss": 0.2448, + "step": 5990 + }, + { + "epoch": 2.08, + "learning_rate": 1.7416873010258223e-07, + "loss": 0.2939, + "step": 6000 + }, + { + "epoch": 2.08, + "learning_rate": 1.7390343119915102e-07, + "loss": 0.246, + "step": 6010 + }, + { + "epoch": 2.08, + "learning_rate": 1.7363813229571986e-07, + "loss": 0.4639, + "step": 6020 + }, + { + "epoch": 2.08, + "learning_rate": 1.7337283339228865e-07, + "loss": 0.3554, + "step": 6030 + }, + { + "epoch": 2.08, + "learning_rate": 1.7310753448885744e-07, + "loss": 0.5248, + "step": 6040 + }, + { + "epoch": 2.08, + "learning_rate": 1.7284223558542625e-07, + "loss": 0.2787, + "step": 6050 + }, + { + "epoch": 2.08, + "learning_rate": 1.7257693668199504e-07, + "loss": 0.3388, + "step": 6060 + }, + { + "epoch": 2.08, + "learning_rate": 1.7231163777856383e-07, + "loss": 0.306, + "step": 6070 + }, + { + "epoch": 2.08, + "learning_rate": 1.7204633887513267e-07, + "loss": 0.3312, + "step": 6080 + }, + { + "epoch": 2.08, + "learning_rate": 1.7178103997170146e-07, + "loss": 0.3058, + "step": 6090 + }, + { + "epoch": 2.09, + "learning_rate": 1.7151574106827025e-07, + "loss": 0.6773, + "step": 6100 + }, + { + "epoch": 2.09, + "learning_rate": 1.7125044216483906e-07, + "loss": 0.2879, + "step": 6110 + }, + { + "epoch": 2.09, + "learning_rate": 1.7098514326140785e-07, + "loss": 0.3534, + "step": 6120 + }, + { + "epoch": 2.09, + "learning_rate": 1.7071984435797664e-07, + "loss": 0.3959, + "step": 6130 + }, + { + "epoch": 2.09, + "learning_rate": 1.7045454545454545e-07, + "loss": 0.4433, + "step": 6140 + }, + { + "epoch": 2.09, + "learning_rate": 1.7018924655111424e-07, + "loss": 0.3306, + "step": 6150 + }, + { + "epoch": 2.09, + "learning_rate": 1.6992394764768303e-07, + "loss": 0.2876, + "step": 6160 + }, + { + "epoch": 2.09, + "learning_rate": 1.6965864874425187e-07, + "loss": 0.2279, + "step": 6170 + }, + { + "epoch": 2.09, + "learning_rate": 1.6939334984082066e-07, + "loss": 0.47, + "step": 6180 + }, + { + "epoch": 2.09, + "learning_rate": 1.6912805093738945e-07, + "loss": 0.5794, + "step": 6190 + }, + { + "epoch": 2.09, + "learning_rate": 1.6886275203395826e-07, + "loss": 0.3706, + "step": 6200 + }, + { + "epoch": 2.09, + "learning_rate": 1.6859745313052705e-07, + "loss": 0.3674, + "step": 6210 + }, + { + "epoch": 2.09, + "learning_rate": 1.6833215422709584e-07, + "loss": 0.4053, + "step": 6220 + }, + { + "epoch": 2.1, + "learning_rate": 1.6806685532366468e-07, + "loss": 0.3724, + "step": 6230 + }, + { + "epoch": 2.1, + "learning_rate": 1.6780155642023347e-07, + "loss": 0.4831, + "step": 6240 + }, + { + "epoch": 2.1, + "learning_rate": 1.6753625751680225e-07, + "loss": 0.1894, + "step": 6250 + }, + { + "epoch": 2.1, + "learning_rate": 1.6727095861337107e-07, + "loss": 0.4633, + "step": 6260 + }, + { + "epoch": 2.1, + "learning_rate": 1.6700565970993986e-07, + "loss": 0.2882, + "step": 6270 + }, + { + "epoch": 2.1, + "learning_rate": 1.6674036080650865e-07, + "loss": 0.4873, + "step": 6280 + }, + { + "epoch": 2.1, + "learning_rate": 1.6647506190307746e-07, + "loss": 0.4315, + "step": 6290 + }, + { + "epoch": 2.1, + "learning_rate": 1.6620976299964628e-07, + "loss": 0.4249, + "step": 6300 + }, + { + "epoch": 2.1, + "learning_rate": 1.6594446409621506e-07, + "loss": 0.3353, + "step": 6310 + }, + { + "epoch": 2.1, + "learning_rate": 1.6567916519278388e-07, + "loss": 0.7332, + "step": 6320 + }, + { + "epoch": 2.1, + "learning_rate": 1.6541386628935267e-07, + "loss": 0.1961, + "step": 6330 + }, + { + "epoch": 2.1, + "learning_rate": 1.6514856738592146e-07, + "loss": 0.3474, + "step": 6340 + }, + { + "epoch": 2.11, + "learning_rate": 1.6488326848249027e-07, + "loss": 0.2356, + "step": 6350 + }, + { + "epoch": 2.11, + "learning_rate": 1.6461796957905906e-07, + "loss": 0.3085, + "step": 6360 + }, + { + "epoch": 2.11, + "learning_rate": 1.6435267067562785e-07, + "loss": 0.3192, + "step": 6370 + }, + { + "epoch": 2.11, + "learning_rate": 1.640873717721967e-07, + "loss": 0.2222, + "step": 6380 + }, + { + "epoch": 2.11, + "learning_rate": 1.6382207286876548e-07, + "loss": 0.4065, + "step": 6390 + }, + { + "epoch": 2.11, + "learning_rate": 1.6355677396533426e-07, + "loss": 0.4045, + "step": 6400 + }, + { + "epoch": 2.11, + "learning_rate": 1.6329147506190308e-07, + "loss": 0.3183, + "step": 6410 + }, + { + "epoch": 2.11, + "learning_rate": 1.6302617615847187e-07, + "loss": 0.1826, + "step": 6420 + }, + { + "epoch": 2.11, + "learning_rate": 1.6276087725504066e-07, + "loss": 0.3059, + "step": 6430 + }, + { + "epoch": 2.11, + "learning_rate": 1.624955783516095e-07, + "loss": 0.2374, + "step": 6440 + }, + { + "epoch": 2.11, + "learning_rate": 1.6223027944817828e-07, + "loss": 0.3193, + "step": 6450 + }, + { + "epoch": 2.11, + "learning_rate": 1.6196498054474707e-07, + "loss": 0.2761, + "step": 6460 + }, + { + "epoch": 2.11, + "learning_rate": 1.616996816413159e-07, + "loss": 0.3633, + "step": 6470 + }, + { + "epoch": 2.12, + "learning_rate": 1.6143438273788468e-07, + "loss": 0.3956, + "step": 6480 + }, + { + "epoch": 2.12, + "learning_rate": 1.6116908383445346e-07, + "loss": 0.2156, + "step": 6490 + }, + { + "epoch": 2.12, + "learning_rate": 1.6090378493102228e-07, + "loss": 0.3898, + "step": 6500 + }, + { + "epoch": 2.12, + "learning_rate": 1.6063848602759107e-07, + "loss": 0.2793, + "step": 6510 + }, + { + "epoch": 2.12, + "learning_rate": 1.6037318712415988e-07, + "loss": 0.3232, + "step": 6520 + }, + { + "epoch": 2.12, + "learning_rate": 1.601078882207287e-07, + "loss": 0.2486, + "step": 6530 + }, + { + "epoch": 2.12, + "learning_rate": 1.5984258931729749e-07, + "loss": 0.447, + "step": 6540 + }, + { + "epoch": 2.12, + "learning_rate": 1.5957729041386627e-07, + "loss": 0.368, + "step": 6550 + }, + { + "epoch": 2.12, + "learning_rate": 1.593119915104351e-07, + "loss": 0.3826, + "step": 6560 + }, + { + "epoch": 2.12, + "learning_rate": 1.5904669260700388e-07, + "loss": 0.2456, + "step": 6570 + }, + { + "epoch": 2.12, + "learning_rate": 1.5878139370357266e-07, + "loss": 0.3172, + "step": 6580 + }, + { + "epoch": 2.12, + "learning_rate": 1.585160948001415e-07, + "loss": 0.2154, + "step": 6590 + }, + { + "epoch": 2.13, + "learning_rate": 1.582507958967103e-07, + "loss": 0.3329, + "step": 6600 + }, + { + "epoch": 2.13, + "learning_rate": 1.5798549699327908e-07, + "loss": 0.266, + "step": 6610 + }, + { + "epoch": 2.13, + "learning_rate": 1.577201980898479e-07, + "loss": 0.3691, + "step": 6620 + }, + { + "epoch": 2.13, + "learning_rate": 1.5745489918641669e-07, + "loss": 0.2765, + "step": 6630 + }, + { + "epoch": 2.13, + "learning_rate": 1.5718960028298547e-07, + "loss": 0.2228, + "step": 6640 + }, + { + "epoch": 2.13, + "learning_rate": 1.5692430137955432e-07, + "loss": 0.1927, + "step": 6650 + }, + { + "epoch": 2.13, + "learning_rate": 1.566590024761231e-07, + "loss": 0.4745, + "step": 6660 + }, + { + "epoch": 2.13, + "learning_rate": 1.563937035726919e-07, + "loss": 0.1534, + "step": 6670 + }, + { + "epoch": 2.13, + "learning_rate": 1.561284046692607e-07, + "loss": 0.1242, + "step": 6680 + }, + { + "epoch": 2.13, + "learning_rate": 1.558631057658295e-07, + "loss": 0.4647, + "step": 6690 + }, + { + "epoch": 2.13, + "learning_rate": 1.5559780686239828e-07, + "loss": 0.5586, + "step": 6700 + }, + { + "epoch": 2.13, + "learning_rate": 1.553325079589671e-07, + "loss": 0.4222, + "step": 6710 + }, + { + "epoch": 2.13, + "learning_rate": 1.5506720905553589e-07, + "loss": 0.206, + "step": 6720 + }, + { + "epoch": 2.14, + "learning_rate": 1.5480191015210467e-07, + "loss": 0.3112, + "step": 6730 + }, + { + "epoch": 2.14, + "learning_rate": 1.5453661124867352e-07, + "loss": 0.4122, + "step": 6740 + }, + { + "epoch": 2.14, + "learning_rate": 1.542713123452423e-07, + "loss": 0.3272, + "step": 6750 + }, + { + "epoch": 2.14, + "learning_rate": 1.540060134418111e-07, + "loss": 0.3635, + "step": 6760 + }, + { + "epoch": 2.14, + "learning_rate": 1.537407145383799e-07, + "loss": 0.5525, + "step": 6770 + }, + { + "epoch": 2.14, + "learning_rate": 1.534754156349487e-07, + "loss": 0.2226, + "step": 6780 + }, + { + "epoch": 2.14, + "learning_rate": 1.5321011673151748e-07, + "loss": 0.1621, + "step": 6790 + }, + { + "epoch": 2.14, + "learning_rate": 1.5294481782808632e-07, + "loss": 0.5783, + "step": 6800 + }, + { + "epoch": 2.14, + "learning_rate": 1.526795189246551e-07, + "loss": 0.3406, + "step": 6810 + }, + { + "epoch": 2.14, + "learning_rate": 1.524142200212239e-07, + "loss": 0.2203, + "step": 6820 + }, + { + "epoch": 2.14, + "learning_rate": 1.5214892111779272e-07, + "loss": 0.4043, + "step": 6830 + }, + { + "epoch": 2.14, + "learning_rate": 1.518836222143615e-07, + "loss": 0.3353, + "step": 6840 + }, + { + "epoch": 2.15, + "learning_rate": 1.516183233109303e-07, + "loss": 0.1902, + "step": 6850 + }, + { + "epoch": 2.15, + "learning_rate": 1.513530244074991e-07, + "loss": 0.3804, + "step": 6860 + }, + { + "epoch": 2.15, + "learning_rate": 1.5108772550406792e-07, + "loss": 0.3891, + "step": 6870 + }, + { + "epoch": 2.15, + "learning_rate": 1.508224266006367e-07, + "loss": 0.2982, + "step": 6880 + }, + { + "epoch": 2.15, + "learning_rate": 1.5055712769720552e-07, + "loss": 0.319, + "step": 6890 + }, + { + "epoch": 2.15, + "learning_rate": 1.502918287937743e-07, + "loss": 0.3148, + "step": 6900 + }, + { + "epoch": 2.15, + "learning_rate": 1.500265298903431e-07, + "loss": 0.3978, + "step": 6910 + }, + { + "epoch": 2.15, + "learning_rate": 1.4976123098691192e-07, + "loss": 0.2571, + "step": 6920 + }, + { + "epoch": 2.15, + "learning_rate": 1.494959320834807e-07, + "loss": 0.4223, + "step": 6930 + }, + { + "epoch": 2.15, + "learning_rate": 1.4923063318004952e-07, + "loss": 0.4157, + "step": 6940 + }, + { + "epoch": 2.15, + "learning_rate": 1.489653342766183e-07, + "loss": 0.4624, + "step": 6950 + }, + { + "epoch": 2.15, + "learning_rate": 1.4870003537318712e-07, + "loss": 0.2294, + "step": 6960 + }, + { + "epoch": 2.15, + "learning_rate": 1.484347364697559e-07, + "loss": 0.2723, + "step": 6970 + }, + { + "epoch": 2.16, + "learning_rate": 1.4816943756632472e-07, + "loss": 0.3129, + "step": 6980 + }, + { + "epoch": 2.16, + "learning_rate": 1.479041386628935e-07, + "loss": 0.2511, + "step": 6990 + }, + { + "epoch": 2.16, + "learning_rate": 1.4763883975946233e-07, + "loss": 0.4619, + "step": 7000 + }, + { + "epoch": 2.16, + "learning_rate": 1.4737354085603112e-07, + "loss": 0.3391, + "step": 7010 + }, + { + "epoch": 2.16, + "learning_rate": 1.4710824195259993e-07, + "loss": 0.3051, + "step": 7020 + }, + { + "epoch": 2.16, + "learning_rate": 1.4684294304916872e-07, + "loss": 0.2132, + "step": 7030 + }, + { + "epoch": 2.16, + "learning_rate": 1.465776441457375e-07, + "loss": 0.3529, + "step": 7040 + }, + { + "epoch": 2.16, + "learning_rate": 1.4631234524230632e-07, + "loss": 0.411, + "step": 7050 + }, + { + "epoch": 2.16, + "learning_rate": 1.4604704633887514e-07, + "loss": 0.5373, + "step": 7060 + }, + { + "epoch": 2.16, + "learning_rate": 1.4578174743544393e-07, + "loss": 0.2255, + "step": 7070 + }, + { + "epoch": 2.16, + "learning_rate": 1.4551644853201271e-07, + "loss": 0.3849, + "step": 7080 + }, + { + "epoch": 2.16, + "learning_rate": 1.4525114962858153e-07, + "loss": 0.3697, + "step": 7090 + }, + { + "epoch": 2.16, + "learning_rate": 1.4498585072515032e-07, + "loss": 0.3, + "step": 7100 + }, + { + "epoch": 2.17, + "learning_rate": 1.4472055182171913e-07, + "loss": 0.2502, + "step": 7110 + }, + { + "epoch": 2.17, + "learning_rate": 1.4445525291828795e-07, + "loss": 0.2659, + "step": 7120 + }, + { + "epoch": 2.17, + "learning_rate": 1.4418995401485673e-07, + "loss": 0.2379, + "step": 7130 + }, + { + "epoch": 2.17, + "learning_rate": 1.4392465511142552e-07, + "loss": 0.3465, + "step": 7140 + }, + { + "epoch": 2.17, + "learning_rate": 1.4365935620799434e-07, + "loss": 0.2112, + "step": 7150 + }, + { + "epoch": 2.17, + "learning_rate": 1.4339405730456313e-07, + "loss": 0.2263, + "step": 7160 + }, + { + "epoch": 2.17, + "learning_rate": 1.4312875840113194e-07, + "loss": 0.2231, + "step": 7170 + }, + { + "epoch": 2.17, + "learning_rate": 1.4286345949770073e-07, + "loss": 0.3545, + "step": 7180 + }, + { + "epoch": 2.17, + "learning_rate": 1.4259816059426954e-07, + "loss": 0.3163, + "step": 7190 + }, + { + "epoch": 2.17, + "learning_rate": 1.4233286169083833e-07, + "loss": 0.4264, + "step": 7200 + }, + { + "epoch": 2.17, + "learning_rate": 1.4206756278740715e-07, + "loss": 0.1768, + "step": 7210 + }, + { + "epoch": 2.17, + "learning_rate": 1.4180226388397596e-07, + "loss": 0.3796, + "step": 7220 + }, + { + "epoch": 2.18, + "learning_rate": 1.4153696498054475e-07, + "loss": 0.3193, + "step": 7230 + }, + { + "epoch": 2.18, + "learning_rate": 1.4127166607711354e-07, + "loss": 0.531, + "step": 7240 + }, + { + "epoch": 2.18, + "learning_rate": 1.4100636717368235e-07, + "loss": 0.2121, + "step": 7250 + }, + { + "epoch": 2.18, + "learning_rate": 1.4074106827025114e-07, + "loss": 0.3917, + "step": 7260 + }, + { + "epoch": 2.18, + "learning_rate": 1.4047576936681996e-07, + "loss": 0.0989, + "step": 7270 + }, + { + "epoch": 2.18, + "learning_rate": 1.4021047046338874e-07, + "loss": 0.5345, + "step": 7280 + }, + { + "epoch": 2.18, + "learning_rate": 1.3994517155995753e-07, + "loss": 0.1947, + "step": 7290 + }, + { + "epoch": 2.18, + "learning_rate": 1.3967987265652635e-07, + "loss": 0.3083, + "step": 7300 + }, + { + "epoch": 2.18, + "learning_rate": 1.3941457375309516e-07, + "loss": 0.4358, + "step": 7310 + }, + { + "epoch": 2.18, + "learning_rate": 1.3914927484966395e-07, + "loss": 0.1397, + "step": 7320 + }, + { + "epoch": 2.18, + "learning_rate": 1.3888397594623274e-07, + "loss": 0.3461, + "step": 7330 + }, + { + "epoch": 2.18, + "learning_rate": 1.3861867704280155e-07, + "loss": 0.237, + "step": 7340 + }, + { + "epoch": 2.18, + "learning_rate": 1.3835337813937034e-07, + "loss": 0.3058, + "step": 7350 + }, + { + "epoch": 2.19, + "learning_rate": 1.3808807923593916e-07, + "loss": 0.2037, + "step": 7360 + }, + { + "epoch": 2.19, + "learning_rate": 1.3782278033250797e-07, + "loss": 0.3189, + "step": 7370 + }, + { + "epoch": 2.19, + "learning_rate": 1.3755748142907676e-07, + "loss": 0.2856, + "step": 7380 + }, + { + "epoch": 2.19, + "learning_rate": 1.3729218252564555e-07, + "loss": 0.3189, + "step": 7390 + }, + { + "epoch": 2.19, + "learning_rate": 1.3702688362221436e-07, + "loss": 0.27, + "step": 7400 + }, + { + "epoch": 2.19, + "learning_rate": 1.3676158471878315e-07, + "loss": 0.3278, + "step": 7410 + }, + { + "epoch": 2.19, + "learning_rate": 1.3649628581535196e-07, + "loss": 0.5432, + "step": 7420 + }, + { + "epoch": 2.19, + "learning_rate": 1.3623098691192075e-07, + "loss": 0.3994, + "step": 7430 + }, + { + "epoch": 2.19, + "learning_rate": 1.3596568800848954e-07, + "loss": 0.1706, + "step": 7440 + }, + { + "epoch": 2.19, + "learning_rate": 1.3570038910505836e-07, + "loss": 0.2554, + "step": 7450 + }, + { + "epoch": 2.19, + "learning_rate": 1.3543509020162717e-07, + "loss": 0.3261, + "step": 7460 + }, + { + "epoch": 2.19, + "learning_rate": 1.3516979129819596e-07, + "loss": 0.4152, + "step": 7470 + }, + { + "epoch": 2.2, + "learning_rate": 1.3490449239476477e-07, + "loss": 0.4484, + "step": 7480 + }, + { + "epoch": 2.2, + "learning_rate": 1.3463919349133356e-07, + "loss": 0.5551, + "step": 7490 + }, + { + "epoch": 2.2, + "learning_rate": 1.3437389458790235e-07, + "loss": 0.5414, + "step": 7500 + }, + { + "epoch": 2.2, + "learning_rate": 1.3410859568447116e-07, + "loss": 0.3728, + "step": 7510 + }, + { + "epoch": 2.2, + "learning_rate": 1.3384329678103998e-07, + "loss": 0.576, + "step": 7520 + }, + { + "epoch": 2.2, + "learning_rate": 1.3357799787760877e-07, + "loss": 0.5139, + "step": 7530 + }, + { + "epoch": 2.2, + "learning_rate": 1.3331269897417756e-07, + "loss": 0.2673, + "step": 7540 + }, + { + "epoch": 2.2, + "eval_accuracy": 0.7992943943551548, + "eval_loss": 0.4343836009502411, + "eval_runtime": 667.7031, + "eval_samples_per_second": 3.821, + "eval_steps_per_second": 0.956, + "step": 7542 + }, + { + "epoch": 3.0, + "learning_rate": 1.3304740007074637e-07, + "loss": 0.3858, + "step": 7550 + }, + { + "epoch": 3.0, + "learning_rate": 1.3278210116731516e-07, + "loss": 0.4057, + "step": 7560 + }, + { + "epoch": 3.0, + "learning_rate": 1.3251680226388397e-07, + "loss": 0.2731, + "step": 7570 + }, + { + "epoch": 3.0, + "learning_rate": 1.322515033604528e-07, + "loss": 0.3348, + "step": 7580 + }, + { + "epoch": 3.0, + "learning_rate": 1.3198620445702158e-07, + "loss": 0.1794, + "step": 7590 + }, + { + "epoch": 3.0, + "learning_rate": 1.3172090555359037e-07, + "loss": 0.2986, + "step": 7600 + }, + { + "epoch": 3.01, + "learning_rate": 1.3145560665015918e-07, + "loss": 0.3266, + "step": 7610 + }, + { + "epoch": 3.01, + "learning_rate": 1.3119030774672797e-07, + "loss": 0.3126, + "step": 7620 + }, + { + "epoch": 3.01, + "learning_rate": 1.3092500884329678e-07, + "loss": 0.3153, + "step": 7630 + }, + { + "epoch": 3.01, + "learning_rate": 1.3065970993986557e-07, + "loss": 0.3477, + "step": 7640 + }, + { + "epoch": 3.01, + "learning_rate": 1.3039441103643436e-07, + "loss": 0.3177, + "step": 7650 + }, + { + "epoch": 3.01, + "learning_rate": 1.3012911213300317e-07, + "loss": 0.2935, + "step": 7660 + }, + { + "epoch": 3.01, + "learning_rate": 1.29863813229572e-07, + "loss": 0.3827, + "step": 7670 + }, + { + "epoch": 3.01, + "learning_rate": 1.2959851432614078e-07, + "loss": 0.3086, + "step": 7680 + }, + { + "epoch": 3.01, + "learning_rate": 1.293332154227096e-07, + "loss": 0.2633, + "step": 7690 + }, + { + "epoch": 3.01, + "learning_rate": 1.2906791651927838e-07, + "loss": 0.2911, + "step": 7700 + }, + { + "epoch": 3.01, + "learning_rate": 1.2880261761584717e-07, + "loss": 0.3615, + "step": 7710 + }, + { + "epoch": 3.01, + "learning_rate": 1.2853731871241598e-07, + "loss": 0.4242, + "step": 7720 + }, + { + "epoch": 3.01, + "learning_rate": 1.282720198089848e-07, + "loss": 0.2024, + "step": 7730 + }, + { + "epoch": 3.02, + "learning_rate": 1.2800672090555359e-07, + "loss": 0.4539, + "step": 7740 + }, + { + "epoch": 3.02, + "learning_rate": 1.2774142200212237e-07, + "loss": 0.2429, + "step": 7750 + }, + { + "epoch": 3.02, + "learning_rate": 1.274761230986912e-07, + "loss": 0.2325, + "step": 7760 + }, + { + "epoch": 3.02, + "learning_rate": 1.2721082419525998e-07, + "loss": 0.4162, + "step": 7770 + }, + { + "epoch": 3.02, + "learning_rate": 1.269455252918288e-07, + "loss": 0.3089, + "step": 7780 + }, + { + "epoch": 3.02, + "learning_rate": 1.2668022638839758e-07, + "loss": 0.4546, + "step": 7790 + }, + { + "epoch": 3.02, + "learning_rate": 1.264149274849664e-07, + "loss": 0.2543, + "step": 7800 + }, + { + "epoch": 3.02, + "learning_rate": 1.2614962858153518e-07, + "loss": 0.377, + "step": 7810 + }, + { + "epoch": 3.02, + "learning_rate": 1.25884329678104e-07, + "loss": 0.3386, + "step": 7820 + }, + { + "epoch": 3.02, + "learning_rate": 1.2561903077467279e-07, + "loss": 0.3492, + "step": 7830 + }, + { + "epoch": 3.02, + "learning_rate": 1.253537318712416e-07, + "loss": 0.275, + "step": 7840 + }, + { + "epoch": 3.02, + "learning_rate": 1.250884329678104e-07, + "loss": 0.3005, + "step": 7850 + }, + { + "epoch": 3.03, + "learning_rate": 1.2482313406437918e-07, + "loss": 0.301, + "step": 7860 + }, + { + "epoch": 3.03, + "learning_rate": 1.24557835160948e-07, + "loss": 0.3116, + "step": 7870 + }, + { + "epoch": 3.03, + "learning_rate": 1.242925362575168e-07, + "loss": 0.6061, + "step": 7880 + }, + { + "epoch": 3.03, + "learning_rate": 1.240272373540856e-07, + "loss": 0.2186, + "step": 7890 + }, + { + "epoch": 3.03, + "learning_rate": 1.2376193845065438e-07, + "loss": 0.2526, + "step": 7900 + }, + { + "epoch": 3.03, + "learning_rate": 1.234966395472232e-07, + "loss": 0.3165, + "step": 7910 + }, + { + "epoch": 3.03, + "learning_rate": 1.2323134064379199e-07, + "loss": 0.1801, + "step": 7920 + }, + { + "epoch": 3.03, + "learning_rate": 1.229660417403608e-07, + "loss": 0.4456, + "step": 7930 + }, + { + "epoch": 3.03, + "learning_rate": 1.2270074283692962e-07, + "loss": 0.1817, + "step": 7940 + }, + { + "epoch": 3.03, + "learning_rate": 1.224354439334984e-07, + "loss": 0.3027, + "step": 7950 + }, + { + "epoch": 3.03, + "learning_rate": 1.221701450300672e-07, + "loss": 0.2641, + "step": 7960 + }, + { + "epoch": 3.03, + "learning_rate": 1.21904846126636e-07, + "loss": 0.1637, + "step": 7970 + }, + { + "epoch": 3.03, + "learning_rate": 1.216395472232048e-07, + "loss": 0.4759, + "step": 7980 + }, + { + "epoch": 3.04, + "learning_rate": 1.213742483197736e-07, + "loss": 0.2518, + "step": 7990 + }, + { + "epoch": 3.04, + "learning_rate": 1.211089494163424e-07, + "loss": 0.3353, + "step": 8000 + }, + { + "epoch": 3.04, + "learning_rate": 1.2084365051291121e-07, + "loss": 0.2325, + "step": 8010 + }, + { + "epoch": 3.04, + "learning_rate": 1.2057835160948e-07, + "loss": 0.2963, + "step": 8020 + }, + { + "epoch": 3.04, + "learning_rate": 1.2031305270604882e-07, + "loss": 0.3176, + "step": 8030 + }, + { + "epoch": 3.04, + "learning_rate": 1.2004775380261763e-07, + "loss": 0.2076, + "step": 8040 + }, + { + "epoch": 3.04, + "learning_rate": 1.1978245489918642e-07, + "loss": 0.3213, + "step": 8050 + }, + { + "epoch": 3.04, + "learning_rate": 1.195171559957552e-07, + "loss": 0.2378, + "step": 8060 + }, + { + "epoch": 3.04, + "learning_rate": 1.1925185709232402e-07, + "loss": 0.1516, + "step": 8070 + }, + { + "epoch": 3.04, + "learning_rate": 1.1898655818889281e-07, + "loss": 0.2366, + "step": 8080 + }, + { + "epoch": 3.04, + "learning_rate": 1.1872125928546161e-07, + "loss": 0.4461, + "step": 8090 + }, + { + "epoch": 3.04, + "learning_rate": 1.1845596038203043e-07, + "loss": 0.402, + "step": 8100 + }, + { + "epoch": 3.05, + "learning_rate": 1.1819066147859922e-07, + "loss": 0.477, + "step": 8110 + }, + { + "epoch": 3.05, + "learning_rate": 1.1792536257516802e-07, + "loss": 0.331, + "step": 8120 + }, + { + "epoch": 3.05, + "learning_rate": 1.1766006367173683e-07, + "loss": 0.1779, + "step": 8130 + }, + { + "epoch": 3.05, + "learning_rate": 1.1739476476830562e-07, + "loss": 0.2126, + "step": 8140 + }, + { + "epoch": 3.05, + "learning_rate": 1.1712946586487442e-07, + "loss": 0.3229, + "step": 8150 + }, + { + "epoch": 3.05, + "learning_rate": 1.1686416696144322e-07, + "loss": 0.3967, + "step": 8160 + }, + { + "epoch": 3.05, + "learning_rate": 1.1659886805801201e-07, + "loss": 0.12, + "step": 8170 + }, + { + "epoch": 3.05, + "learning_rate": 1.1633356915458083e-07, + "loss": 0.2832, + "step": 8180 + }, + { + "epoch": 3.05, + "learning_rate": 1.1606827025114963e-07, + "loss": 0.2127, + "step": 8190 + }, + { + "epoch": 3.05, + "learning_rate": 1.1580297134771842e-07, + "loss": 0.2145, + "step": 8200 + }, + { + "epoch": 3.05, + "learning_rate": 1.1553767244428723e-07, + "loss": 0.2717, + "step": 8210 + }, + { + "epoch": 3.05, + "learning_rate": 1.1527237354085603e-07, + "loss": 0.2418, + "step": 8220 + }, + { + "epoch": 3.05, + "learning_rate": 1.1500707463742482e-07, + "loss": 0.2614, + "step": 8230 + }, + { + "epoch": 3.06, + "learning_rate": 1.1474177573399363e-07, + "loss": 0.1934, + "step": 8240 + }, + { + "epoch": 3.06, + "learning_rate": 1.1447647683056244e-07, + "loss": 0.4718, + "step": 8250 + }, + { + "epoch": 3.06, + "learning_rate": 1.1421117792713122e-07, + "loss": 0.2791, + "step": 8260 + }, + { + "epoch": 3.06, + "learning_rate": 1.1394587902370003e-07, + "loss": 0.3744, + "step": 8270 + }, + { + "epoch": 3.06, + "learning_rate": 1.1368058012026884e-07, + "loss": 0.4266, + "step": 8280 + }, + { + "epoch": 3.06, + "learning_rate": 1.1341528121683763e-07, + "loss": 0.2921, + "step": 8290 + }, + { + "epoch": 3.06, + "learning_rate": 1.1314998231340643e-07, + "loss": 0.3229, + "step": 8300 + }, + { + "epoch": 3.06, + "learning_rate": 1.1288468340997525e-07, + "loss": 0.3329, + "step": 8310 + }, + { + "epoch": 3.06, + "learning_rate": 1.1261938450654403e-07, + "loss": 0.1501, + "step": 8320 + }, + { + "epoch": 3.06, + "learning_rate": 1.1235408560311284e-07, + "loss": 0.3098, + "step": 8330 + }, + { + "epoch": 3.06, + "learning_rate": 1.1208878669968164e-07, + "loss": 0.3795, + "step": 8340 + }, + { + "epoch": 3.06, + "learning_rate": 1.1182348779625044e-07, + "loss": 0.2139, + "step": 8350 + }, + { + "epoch": 3.07, + "learning_rate": 1.1155818889281924e-07, + "loss": 0.2172, + "step": 8360 + }, + { + "epoch": 3.07, + "learning_rate": 1.1129288998938804e-07, + "loss": 0.4199, + "step": 8370 + }, + { + "epoch": 3.07, + "learning_rate": 1.1102759108595683e-07, + "loss": 0.1432, + "step": 8380 + }, + { + "epoch": 3.07, + "learning_rate": 1.1076229218252564e-07, + "loss": 0.2769, + "step": 8390 + }, + { + "epoch": 3.07, + "learning_rate": 1.1049699327909445e-07, + "loss": 0.4188, + "step": 8400 + }, + { + "epoch": 3.07, + "learning_rate": 1.1023169437566323e-07, + "loss": 0.5459, + "step": 8410 + }, + { + "epoch": 3.07, + "learning_rate": 1.0996639547223205e-07, + "loss": 0.5241, + "step": 8420 + }, + { + "epoch": 3.07, + "learning_rate": 1.0970109656880085e-07, + "loss": 0.573, + "step": 8430 + }, + { + "epoch": 3.07, + "learning_rate": 1.0943579766536964e-07, + "loss": 0.2892, + "step": 8440 + }, + { + "epoch": 3.07, + "learning_rate": 1.0917049876193844e-07, + "loss": 0.2946, + "step": 8450 + }, + { + "epoch": 3.07, + "learning_rate": 1.0890519985850725e-07, + "loss": 0.2701, + "step": 8460 + }, + { + "epoch": 3.07, + "learning_rate": 1.0863990095507604e-07, + "loss": 0.3341, + "step": 8470 + }, + { + "epoch": 3.07, + "learning_rate": 1.0837460205164484e-07, + "loss": 0.347, + "step": 8480 + }, + { + "epoch": 3.08, + "learning_rate": 1.0810930314821366e-07, + "loss": 0.3488, + "step": 8490 + }, + { + "epoch": 3.08, + "learning_rate": 1.0784400424478245e-07, + "loss": 0.2516, + "step": 8500 + }, + { + "epoch": 3.08, + "learning_rate": 1.0757870534135125e-07, + "loss": 0.4856, + "step": 8510 + }, + { + "epoch": 3.08, + "learning_rate": 1.0731340643792005e-07, + "loss": 0.3262, + "step": 8520 + }, + { + "epoch": 3.08, + "learning_rate": 1.0704810753448885e-07, + "loss": 0.4792, + "step": 8530 + }, + { + "epoch": 3.08, + "learning_rate": 1.0678280863105765e-07, + "loss": 0.3977, + "step": 8540 + }, + { + "epoch": 3.08, + "learning_rate": 1.0651750972762645e-07, + "loss": 0.247, + "step": 8550 + }, + { + "epoch": 3.08, + "learning_rate": 1.0625221082419524e-07, + "loss": 0.5944, + "step": 8560 + }, + { + "epoch": 3.08, + "learning_rate": 1.0598691192076406e-07, + "loss": 0.4538, + "step": 8570 + }, + { + "epoch": 3.08, + "learning_rate": 1.0572161301733286e-07, + "loss": 0.322, + "step": 8580 + }, + { + "epoch": 3.08, + "learning_rate": 1.0545631411390165e-07, + "loss": 0.8246, + "step": 8590 + }, + { + "epoch": 3.08, + "learning_rate": 1.0519101521047046e-07, + "loss": 0.2921, + "step": 8600 + }, + { + "epoch": 3.08, + "learning_rate": 1.0492571630703926e-07, + "loss": 0.2552, + "step": 8610 + }, + { + "epoch": 3.09, + "learning_rate": 1.0466041740360805e-07, + "loss": 0.2207, + "step": 8620 + }, + { + "epoch": 3.09, + "learning_rate": 1.0439511850017685e-07, + "loss": 0.136, + "step": 8630 + }, + { + "epoch": 3.09, + "learning_rate": 1.0412981959674567e-07, + "loss": 0.5574, + "step": 8640 + }, + { + "epoch": 3.09, + "learning_rate": 1.0386452069331446e-07, + "loss": 0.2432, + "step": 8650 + }, + { + "epoch": 3.09, + "learning_rate": 1.0359922178988326e-07, + "loss": 0.4907, + "step": 8660 + }, + { + "epoch": 3.09, + "learning_rate": 1.0333392288645207e-07, + "loss": 0.2984, + "step": 8670 + }, + { + "epoch": 3.09, + "learning_rate": 1.0306862398302086e-07, + "loss": 0.3477, + "step": 8680 + }, + { + "epoch": 3.09, + "learning_rate": 1.0280332507958966e-07, + "loss": 0.4005, + "step": 8690 + }, + { + "epoch": 3.09, + "learning_rate": 1.0253802617615848e-07, + "loss": 0.4083, + "step": 8700 + }, + { + "epoch": 3.09, + "learning_rate": 1.0227272727272727e-07, + "loss": 0.3129, + "step": 8710 + }, + { + "epoch": 3.09, + "learning_rate": 1.0200742836929607e-07, + "loss": 0.4127, + "step": 8720 + }, + { + "epoch": 3.09, + "learning_rate": 1.0174212946586487e-07, + "loss": 0.1239, + "step": 8730 + }, + { + "epoch": 3.1, + "learning_rate": 1.0147683056243366e-07, + "loss": 0.1863, + "step": 8740 + }, + { + "epoch": 3.1, + "learning_rate": 1.0121153165900247e-07, + "loss": 0.3504, + "step": 8750 + }, + { + "epoch": 3.1, + "learning_rate": 1.0094623275557127e-07, + "loss": 0.3059, + "step": 8760 + }, + { + "epoch": 3.1, + "learning_rate": 1.0068093385214006e-07, + "loss": 0.1524, + "step": 8770 + }, + { + "epoch": 3.1, + "learning_rate": 1.0041563494870888e-07, + "loss": 0.38, + "step": 8780 + }, + { + "epoch": 3.1, + "learning_rate": 1.0015033604527768e-07, + "loss": 0.3079, + "step": 8790 + }, + { + "epoch": 3.1, + "learning_rate": 9.988503714184647e-08, + "loss": 0.3288, + "step": 8800 + }, + { + "epoch": 3.1, + "learning_rate": 9.961973823841527e-08, + "loss": 0.554, + "step": 8810 + }, + { + "epoch": 3.1, + "learning_rate": 9.935443933498408e-08, + "loss": 0.2589, + "step": 8820 + }, + { + "epoch": 3.1, + "learning_rate": 9.908914043155288e-08, + "loss": 0.2241, + "step": 8830 + }, + { + "epoch": 3.1, + "learning_rate": 9.882384152812167e-08, + "loss": 0.291, + "step": 8840 + }, + { + "epoch": 3.1, + "learning_rate": 9.855854262469049e-08, + "loss": 0.1931, + "step": 8850 + }, + { + "epoch": 3.1, + "learning_rate": 9.829324372125929e-08, + "loss": 0.2891, + "step": 8860 + }, + { + "epoch": 3.11, + "learning_rate": 9.802794481782808e-08, + "loss": 0.301, + "step": 8870 + }, + { + "epoch": 3.11, + "learning_rate": 9.776264591439689e-08, + "loss": 0.1596, + "step": 8880 + }, + { + "epoch": 3.11, + "learning_rate": 9.749734701096569e-08, + "loss": 0.261, + "step": 8890 + }, + { + "epoch": 3.11, + "learning_rate": 9.723204810753448e-08, + "loss": 0.2761, + "step": 8900 + }, + { + "epoch": 3.11, + "learning_rate": 9.696674920410328e-08, + "loss": 0.2916, + "step": 8910 + }, + { + "epoch": 3.11, + "learning_rate": 9.67014503006721e-08, + "loss": 0.4751, + "step": 8920 + }, + { + "epoch": 3.11, + "learning_rate": 9.643615139724089e-08, + "loss": 0.2561, + "step": 8930 + }, + { + "epoch": 3.11, + "learning_rate": 9.617085249380969e-08, + "loss": 0.1122, + "step": 8940 + }, + { + "epoch": 3.11, + "learning_rate": 9.59055535903785e-08, + "loss": 0.1563, + "step": 8950 + }, + { + "epoch": 3.11, + "learning_rate": 9.564025468694729e-08, + "loss": 0.2828, + "step": 8960 + }, + { + "epoch": 3.11, + "learning_rate": 9.537495578351609e-08, + "loss": 0.3116, + "step": 8970 + }, + { + "epoch": 3.11, + "learning_rate": 9.510965688008489e-08, + "loss": 0.1198, + "step": 8980 + }, + { + "epoch": 3.12, + "learning_rate": 9.48443579766537e-08, + "loss": 0.2329, + "step": 8990 + }, + { + "epoch": 3.12, + "learning_rate": 9.45790590732225e-08, + "loss": 0.2435, + "step": 9000 + }, + { + "epoch": 3.12, + "learning_rate": 9.43137601697913e-08, + "loss": 0.2814, + "step": 9010 + }, + { + "epoch": 3.12, + "learning_rate": 9.404846126636009e-08, + "loss": 0.4588, + "step": 9020 + }, + { + "epoch": 3.12, + "learning_rate": 9.37831623629289e-08, + "loss": 0.3259, + "step": 9030 + }, + { + "epoch": 3.12, + "learning_rate": 9.35178634594977e-08, + "loss": 0.2281, + "step": 9040 + }, + { + "epoch": 3.12, + "learning_rate": 9.325256455606649e-08, + "loss": 0.1325, + "step": 9050 + }, + { + "epoch": 3.12, + "learning_rate": 9.29872656526353e-08, + "loss": 0.1756, + "step": 9060 + }, + { + "epoch": 3.12, + "learning_rate": 9.27219667492041e-08, + "loss": 0.4611, + "step": 9070 + }, + { + "epoch": 3.12, + "learning_rate": 9.24566678457729e-08, + "loss": 0.3821, + "step": 9080 + }, + { + "epoch": 3.12, + "learning_rate": 9.21913689423417e-08, + "loss": 0.4035, + "step": 9090 + }, + { + "epoch": 3.12, + "learning_rate": 9.192607003891051e-08, + "loss": 0.2634, + "step": 9100 + }, + { + "epoch": 3.12, + "learning_rate": 9.16607711354793e-08, + "loss": 0.5165, + "step": 9110 + }, + { + "epoch": 3.13, + "learning_rate": 9.13954722320481e-08, + "loss": 0.2734, + "step": 9120 + }, + { + "epoch": 3.13, + "learning_rate": 9.113017332861692e-08, + "loss": 0.3244, + "step": 9130 + }, + { + "epoch": 3.13, + "learning_rate": 9.08648744251857e-08, + "loss": 0.5966, + "step": 9140 + }, + { + "epoch": 3.13, + "learning_rate": 9.05995755217545e-08, + "loss": 0.2808, + "step": 9150 + }, + { + "epoch": 3.13, + "learning_rate": 9.033427661832331e-08, + "loss": 0.0861, + "step": 9160 + }, + { + "epoch": 3.13, + "learning_rate": 9.006897771489211e-08, + "loss": 0.5532, + "step": 9170 + }, + { + "epoch": 3.13, + "learning_rate": 8.980367881146091e-08, + "loss": 0.1335, + "step": 9180 + }, + { + "epoch": 3.13, + "learning_rate": 8.953837990802971e-08, + "loss": 0.2124, + "step": 9190 + }, + { + "epoch": 3.13, + "learning_rate": 8.92730810045985e-08, + "loss": 0.3196, + "step": 9200 + }, + { + "epoch": 3.13, + "learning_rate": 8.900778210116731e-08, + "loss": 0.3537, + "step": 9210 + }, + { + "epoch": 3.13, + "learning_rate": 8.874248319773612e-08, + "loss": 0.2052, + "step": 9220 + }, + { + "epoch": 3.13, + "learning_rate": 8.84771842943049e-08, + "loss": 0.2914, + "step": 9230 + }, + { + "epoch": 3.14, + "learning_rate": 8.821188539087372e-08, + "loss": 0.201, + "step": 9240 + }, + { + "epoch": 3.14, + "learning_rate": 8.794658648744252e-08, + "loss": 0.5735, + "step": 9250 + }, + { + "epoch": 3.14, + "learning_rate": 8.768128758401131e-08, + "loss": 0.3856, + "step": 9260 + }, + { + "epoch": 3.14, + "learning_rate": 8.741598868058011e-08, + "loss": 0.3567, + "step": 9270 + }, + { + "epoch": 3.14, + "learning_rate": 8.715068977714892e-08, + "loss": 0.1276, + "step": 9280 + }, + { + "epoch": 3.14, + "learning_rate": 8.688539087371771e-08, + "loss": 0.2726, + "step": 9290 + }, + { + "epoch": 3.14, + "learning_rate": 8.662009197028651e-08, + "loss": 0.1824, + "step": 9300 + }, + { + "epoch": 3.14, + "learning_rate": 8.635479306685533e-08, + "loss": 0.2436, + "step": 9310 + }, + { + "epoch": 3.14, + "learning_rate": 8.608949416342412e-08, + "loss": 0.1757, + "step": 9320 + }, + { + "epoch": 3.14, + "learning_rate": 8.582419525999292e-08, + "loss": 0.115, + "step": 9330 + }, + { + "epoch": 3.14, + "learning_rate": 8.555889635656172e-08, + "loss": 0.3133, + "step": 9340 + }, + { + "epoch": 3.14, + "learning_rate": 8.529359745313052e-08, + "loss": 0.3231, + "step": 9350 + }, + { + "epoch": 3.14, + "learning_rate": 8.502829854969932e-08, + "loss": 0.3843, + "step": 9360 + }, + { + "epoch": 3.15, + "learning_rate": 8.476299964626813e-08, + "loss": 0.3218, + "step": 9370 + }, + { + "epoch": 3.15, + "learning_rate": 8.449770074283691e-08, + "loss": 0.2972, + "step": 9380 + }, + { + "epoch": 3.15, + "learning_rate": 8.423240183940573e-08, + "loss": 0.1626, + "step": 9390 + }, + { + "epoch": 3.15, + "learning_rate": 8.396710293597453e-08, + "loss": 0.3293, + "step": 9400 + }, + { + "epoch": 3.15, + "learning_rate": 8.370180403254332e-08, + "loss": 0.1546, + "step": 9410 + }, + { + "epoch": 3.15, + "learning_rate": 8.343650512911213e-08, + "loss": 0.2222, + "step": 9420 + }, + { + "epoch": 3.15, + "learning_rate": 8.317120622568093e-08, + "loss": 0.5815, + "step": 9430 + }, + { + "epoch": 3.15, + "learning_rate": 8.290590732224972e-08, + "loss": 0.4438, + "step": 9440 + }, + { + "epoch": 3.15, + "learning_rate": 8.264060841881852e-08, + "loss": 0.1896, + "step": 9450 + }, + { + "epoch": 3.15, + "learning_rate": 8.237530951538734e-08, + "loss": 0.5805, + "step": 9460 + }, + { + "epoch": 3.15, + "learning_rate": 8.211001061195613e-08, + "loss": 0.5619, + "step": 9470 + }, + { + "epoch": 3.15, + "learning_rate": 8.184471170852493e-08, + "loss": 0.256, + "step": 9480 + }, + { + "epoch": 3.16, + "learning_rate": 8.157941280509374e-08, + "loss": 0.1082, + "step": 9490 + }, + { + "epoch": 3.16, + "learning_rate": 8.131411390166253e-08, + "loss": 0.4024, + "step": 9500 + }, + { + "epoch": 3.16, + "learning_rate": 8.104881499823133e-08, + "loss": 0.4838, + "step": 9510 + }, + { + "epoch": 3.16, + "learning_rate": 8.078351609480015e-08, + "loss": 0.4411, + "step": 9520 + }, + { + "epoch": 3.16, + "learning_rate": 8.051821719136894e-08, + "loss": 0.4502, + "step": 9530 + }, + { + "epoch": 3.16, + "learning_rate": 8.025291828793774e-08, + "loss": 0.3072, + "step": 9540 + }, + { + "epoch": 3.16, + "learning_rate": 7.998761938450654e-08, + "loss": 0.0834, + "step": 9550 + }, + { + "epoch": 3.16, + "learning_rate": 7.972232048107533e-08, + "loss": 0.2351, + "step": 9560 + }, + { + "epoch": 3.16, + "learning_rate": 7.945702157764414e-08, + "loss": 0.293, + "step": 9570 + }, + { + "epoch": 3.16, + "learning_rate": 7.919172267421294e-08, + "loss": 0.4146, + "step": 9580 + }, + { + "epoch": 3.16, + "learning_rate": 7.892642377078173e-08, + "loss": 0.2291, + "step": 9590 + }, + { + "epoch": 3.16, + "learning_rate": 7.866112486735055e-08, + "loss": 0.1389, + "step": 9600 + }, + { + "epoch": 3.16, + "learning_rate": 7.839582596391935e-08, + "loss": 0.2594, + "step": 9610 + }, + { + "epoch": 3.17, + "learning_rate": 7.813052706048814e-08, + "loss": 0.5084, + "step": 9620 + }, + { + "epoch": 3.17, + "learning_rate": 7.786522815705695e-08, + "loss": 0.4067, + "step": 9630 + }, + { + "epoch": 3.17, + "learning_rate": 7.759992925362575e-08, + "loss": 0.3945, + "step": 9640 + }, + { + "epoch": 3.17, + "learning_rate": 7.733463035019454e-08, + "loss": 0.3778, + "step": 9650 + }, + { + "epoch": 3.17, + "learning_rate": 7.706933144676334e-08, + "loss": 0.2721, + "step": 9660 + }, + { + "epoch": 3.17, + "learning_rate": 7.680403254333216e-08, + "loss": 0.304, + "step": 9670 + }, + { + "epoch": 3.17, + "learning_rate": 7.653873363990096e-08, + "loss": 0.2496, + "step": 9680 + }, + { + "epoch": 3.17, + "learning_rate": 7.627343473646975e-08, + "loss": 0.3475, + "step": 9690 + }, + { + "epoch": 3.17, + "learning_rate": 7.600813583303856e-08, + "loss": 0.2106, + "step": 9700 + }, + { + "epoch": 3.17, + "learning_rate": 7.574283692960736e-08, + "loss": 0.4637, + "step": 9710 + }, + { + "epoch": 3.17, + "learning_rate": 7.547753802617615e-08, + "loss": 0.2955, + "step": 9720 + }, + { + "epoch": 3.17, + "learning_rate": 7.521223912274495e-08, + "loss": 0.4538, + "step": 9730 + }, + { + "epoch": 3.17, + "learning_rate": 7.494694021931375e-08, + "loss": 0.4784, + "step": 9740 + }, + { + "epoch": 3.18, + "learning_rate": 7.468164131588256e-08, + "loss": 0.2853, + "step": 9750 + }, + { + "epoch": 3.18, + "learning_rate": 7.441634241245136e-08, + "loss": 0.1619, + "step": 9760 + }, + { + "epoch": 3.18, + "learning_rate": 7.415104350902016e-08, + "loss": 0.4363, + "step": 9770 + }, + { + "epoch": 3.18, + "learning_rate": 7.388574460558896e-08, + "loss": 0.3628, + "step": 9780 + }, + { + "epoch": 3.18, + "learning_rate": 7.362044570215776e-08, + "loss": 0.5235, + "step": 9790 + }, + { + "epoch": 3.18, + "learning_rate": 7.335514679872656e-08, + "loss": 0.276, + "step": 9800 + }, + { + "epoch": 3.18, + "learning_rate": 7.308984789529536e-08, + "loss": 0.3532, + "step": 9810 + }, + { + "epoch": 3.18, + "learning_rate": 7.282454899186417e-08, + "loss": 0.3284, + "step": 9820 + }, + { + "epoch": 3.18, + "learning_rate": 7.255925008843297e-08, + "loss": 0.305, + "step": 9830 + }, + { + "epoch": 3.18, + "learning_rate": 7.229395118500176e-08, + "loss": 0.1895, + "step": 9840 + }, + { + "epoch": 3.18, + "learning_rate": 7.202865228157057e-08, + "loss": 0.6018, + "step": 9850 + }, + { + "epoch": 3.18, + "learning_rate": 7.176335337813937e-08, + "loss": 0.277, + "step": 9860 + }, + { + "epoch": 3.19, + "learning_rate": 7.149805447470817e-08, + "loss": 0.1618, + "step": 9870 + }, + { + "epoch": 3.19, + "learning_rate": 7.123275557127698e-08, + "loss": 0.1706, + "step": 9880 + }, + { + "epoch": 3.19, + "learning_rate": 7.096745666784576e-08, + "loss": 0.1546, + "step": 9890 + }, + { + "epoch": 3.19, + "learning_rate": 7.070215776441458e-08, + "loss": 0.2086, + "step": 9900 + }, + { + "epoch": 3.19, + "learning_rate": 7.043685886098337e-08, + "loss": 0.2052, + "step": 9910 + }, + { + "epoch": 3.19, + "learning_rate": 7.017155995755217e-08, + "loss": 0.2529, + "step": 9920 + }, + { + "epoch": 3.19, + "learning_rate": 6.990626105412098e-08, + "loss": 0.2568, + "step": 9930 + }, + { + "epoch": 3.19, + "learning_rate": 6.964096215068977e-08, + "loss": 0.5543, + "step": 9940 + }, + { + "epoch": 3.19, + "learning_rate": 6.937566324725857e-08, + "loss": 0.4673, + "step": 9950 + }, + { + "epoch": 3.19, + "learning_rate": 6.911036434382737e-08, + "loss": 0.2104, + "step": 9960 + }, + { + "epoch": 3.19, + "learning_rate": 6.884506544039618e-08, + "loss": 0.1235, + "step": 9970 + }, + { + "epoch": 3.19, + "learning_rate": 6.857976653696498e-08, + "loss": 0.3148, + "step": 9980 + }, + { + "epoch": 3.19, + "learning_rate": 6.831446763353378e-08, + "loss": 0.2482, + "step": 9990 + }, + { + "epoch": 3.2, + "learning_rate": 6.804916873010258e-08, + "loss": 0.2479, + "step": 10000 + }, + { + "epoch": 3.2, + "learning_rate": 6.778386982667138e-08, + "loss": 0.4603, + "step": 10010 + }, + { + "epoch": 3.2, + "learning_rate": 6.751857092324018e-08, + "loss": 0.181, + "step": 10020 + }, + { + "epoch": 3.2, + "learning_rate": 6.725327201980898e-08, + "loss": 0.0648, + "step": 10030 + }, + { + "epoch": 3.2, + "learning_rate": 6.698797311637779e-08, + "loss": 0.1411, + "step": 10040 + }, + { + "epoch": 3.2, + "learning_rate": 6.672267421294659e-08, + "loss": 0.3185, + "step": 10050 + }, + { + "epoch": 3.2, + "eval_accuracy": 0.8200705605644845, + "eval_loss": 0.4328227639198303, + "eval_runtime": 675.1569, + "eval_samples_per_second": 3.778, + "eval_steps_per_second": 0.945, + "step": 10056 + }, + { + "epoch": 4.0, + "learning_rate": 6.645737530951539e-08, + "loss": 0.3281, + "step": 10060 + }, + { + "epoch": 4.0, + "learning_rate": 6.619207640608418e-08, + "loss": 0.1367, + "step": 10070 + }, + { + "epoch": 4.0, + "learning_rate": 6.592677750265299e-08, + "loss": 0.1347, + "step": 10080 + }, + { + "epoch": 4.0, + "learning_rate": 6.566147859922178e-08, + "loss": 0.2287, + "step": 10090 + }, + { + "epoch": 4.0, + "learning_rate": 6.539617969579058e-08, + "loss": 0.079, + "step": 10100 + }, + { + "epoch": 4.0, + "learning_rate": 6.51308807923594e-08, + "loss": 0.4218, + "step": 10110 + }, + { + "epoch": 4.01, + "learning_rate": 6.486558188892818e-08, + "loss": 0.1522, + "step": 10120 + }, + { + "epoch": 4.01, + "learning_rate": 6.460028298549699e-08, + "loss": 0.2411, + "step": 10130 + }, + { + "epoch": 4.01, + "learning_rate": 6.433498408206579e-08, + "loss": 0.2314, + "step": 10140 + }, + { + "epoch": 4.01, + "learning_rate": 6.406968517863459e-08, + "loss": 0.4938, + "step": 10150 + }, + { + "epoch": 4.01, + "learning_rate": 6.380438627520339e-08, + "loss": 0.1713, + "step": 10160 + }, + { + "epoch": 4.01, + "learning_rate": 6.353908737177219e-08, + "loss": 0.3614, + "step": 10170 + }, + { + "epoch": 4.01, + "learning_rate": 6.3273788468341e-08, + "loss": 0.2386, + "step": 10180 + }, + { + "epoch": 4.01, + "learning_rate": 6.30084895649098e-08, + "loss": 0.4071, + "step": 10190 + }, + { + "epoch": 4.01, + "learning_rate": 6.27431906614786e-08, + "loss": 0.3515, + "step": 10200 + }, + { + "epoch": 4.01, + "learning_rate": 6.24778917580474e-08, + "loss": 0.3182, + "step": 10210 + }, + { + "epoch": 4.01, + "learning_rate": 6.22125928546162e-08, + "loss": 0.395, + "step": 10220 + }, + { + "epoch": 4.01, + "learning_rate": 6.1947293951185e-08, + "loss": 0.0892, + "step": 10230 + }, + { + "epoch": 4.01, + "learning_rate": 6.16819950477538e-08, + "loss": 0.4639, + "step": 10240 + }, + { + "epoch": 4.02, + "learning_rate": 6.141669614432259e-08, + "loss": 0.2954, + "step": 10250 + }, + { + "epoch": 4.02, + "learning_rate": 6.11513972408914e-08, + "loss": 0.3138, + "step": 10260 + }, + { + "epoch": 4.02, + "learning_rate": 6.088609833746021e-08, + "loss": 0.5433, + "step": 10270 + }, + { + "epoch": 4.02, + "learning_rate": 6.062079943402901e-08, + "loss": 0.2787, + "step": 10280 + }, + { + "epoch": 4.02, + "learning_rate": 6.035550053059781e-08, + "loss": 0.3296, + "step": 10290 + }, + { + "epoch": 4.02, + "learning_rate": 6.00902016271666e-08, + "loss": 0.1484, + "step": 10300 + }, + { + "epoch": 4.02, + "learning_rate": 5.982490272373541e-08, + "loss": 0.2677, + "step": 10310 + }, + { + "epoch": 4.02, + "learning_rate": 5.955960382030421e-08, + "loss": 0.2695, + "step": 10320 + }, + { + "epoch": 4.02, + "learning_rate": 5.9294304916873e-08, + "loss": 0.3003, + "step": 10330 + }, + { + "epoch": 4.02, + "learning_rate": 5.902900601344181e-08, + "loss": 0.1844, + "step": 10340 + }, + { + "epoch": 4.02, + "learning_rate": 5.876370711001061e-08, + "loss": 0.3414, + "step": 10350 + }, + { + "epoch": 4.02, + "learning_rate": 5.849840820657941e-08, + "loss": 0.2815, + "step": 10360 + }, + { + "epoch": 4.02, + "learning_rate": 5.8233109303148216e-08, + "loss": 0.2292, + "step": 10370 + }, + { + "epoch": 4.03, + "learning_rate": 5.796781039971701e-08, + "loss": 0.3515, + "step": 10380 + }, + { + "epoch": 4.03, + "learning_rate": 5.770251149628581e-08, + "loss": 0.3803, + "step": 10390 + }, + { + "epoch": 4.03, + "learning_rate": 5.7437212592854614e-08, + "loss": 0.29, + "step": 10400 + }, + { + "epoch": 4.03, + "learning_rate": 5.7171913689423415e-08, + "loss": 0.3232, + "step": 10410 + }, + { + "epoch": 4.03, + "learning_rate": 5.690661478599221e-08, + "loss": 0.093, + "step": 10420 + }, + { + "epoch": 4.03, + "learning_rate": 5.664131588256102e-08, + "loss": 0.2088, + "step": 10430 + }, + { + "epoch": 4.03, + "learning_rate": 5.637601697912982e-08, + "loss": 0.487, + "step": 10440 + }, + { + "epoch": 4.03, + "learning_rate": 5.6110718075698615e-08, + "loss": 0.2051, + "step": 10450 + }, + { + "epoch": 4.03, + "learning_rate": 5.584541917226742e-08, + "loss": 0.5831, + "step": 10460 + }, + { + "epoch": 4.03, + "learning_rate": 5.558012026883622e-08, + "loss": 0.4019, + "step": 10470 + }, + { + "epoch": 4.03, + "learning_rate": 5.531482136540502e-08, + "loss": 0.3914, + "step": 10480 + }, + { + "epoch": 4.03, + "learning_rate": 5.504952246197383e-08, + "loss": 0.1919, + "step": 10490 + }, + { + "epoch": 4.04, + "learning_rate": 5.478422355854262e-08, + "loss": 0.2466, + "step": 10500 + }, + { + "epoch": 4.04, + "learning_rate": 5.451892465511142e-08, + "loss": 0.2837, + "step": 10510 + }, + { + "epoch": 4.04, + "learning_rate": 5.4253625751680225e-08, + "loss": 0.3293, + "step": 10520 + }, + { + "epoch": 4.04, + "learning_rate": 5.3988326848249027e-08, + "loss": 0.2261, + "step": 10530 + }, + { + "epoch": 4.04, + "learning_rate": 5.372302794481782e-08, + "loss": 0.4414, + "step": 10540 + }, + { + "epoch": 4.04, + "learning_rate": 5.345772904138663e-08, + "loss": 0.4747, + "step": 10550 + }, + { + "epoch": 4.04, + "learning_rate": 5.3192430137955425e-08, + "loss": 0.297, + "step": 10560 + }, + { + "epoch": 4.04, + "learning_rate": 5.2927131234524226e-08, + "loss": 0.4797, + "step": 10570 + }, + { + "epoch": 4.04, + "learning_rate": 5.2661832331093034e-08, + "loss": 0.0799, + "step": 10580 + }, + { + "epoch": 4.04, + "learning_rate": 5.239653342766183e-08, + "loss": 0.1772, + "step": 10590 + }, + { + "epoch": 4.04, + "learning_rate": 5.213123452423063e-08, + "loss": 0.204, + "step": 10600 + }, + { + "epoch": 4.04, + "learning_rate": 5.186593562079943e-08, + "loss": 0.2605, + "step": 10610 + }, + { + "epoch": 4.04, + "learning_rate": 5.1600636717368234e-08, + "loss": 0.2314, + "step": 10620 + }, + { + "epoch": 4.05, + "learning_rate": 5.133533781393703e-08, + "loss": 0.383, + "step": 10630 + }, + { + "epoch": 4.05, + "learning_rate": 5.1070038910505837e-08, + "loss": 0.3802, + "step": 10640 + }, + { + "epoch": 4.05, + "learning_rate": 5.080474000707463e-08, + "loss": 0.155, + "step": 10650 + }, + { + "epoch": 4.05, + "learning_rate": 5.053944110364343e-08, + "loss": 0.4906, + "step": 10660 + }, + { + "epoch": 4.05, + "learning_rate": 5.027414220021224e-08, + "loss": 0.3776, + "step": 10670 + }, + { + "epoch": 4.05, + "learning_rate": 5.0008843296781036e-08, + "loss": 0.5581, + "step": 10680 + }, + { + "epoch": 4.05, + "learning_rate": 4.974354439334984e-08, + "loss": 0.3237, + "step": 10690 + }, + { + "epoch": 4.05, + "learning_rate": 4.947824548991864e-08, + "loss": 0.3431, + "step": 10700 + }, + { + "epoch": 4.05, + "learning_rate": 4.921294658648744e-08, + "loss": 0.241, + "step": 10710 + }, + { + "epoch": 4.05, + "learning_rate": 4.894764768305624e-08, + "loss": 0.389, + "step": 10720 + }, + { + "epoch": 4.05, + "learning_rate": 4.8682348779625043e-08, + "loss": 0.4295, + "step": 10730 + }, + { + "epoch": 4.05, + "learning_rate": 4.841704987619384e-08, + "loss": 0.2456, + "step": 10740 + }, + { + "epoch": 4.06, + "learning_rate": 4.8151750972762646e-08, + "loss": 0.5222, + "step": 10750 + }, + { + "epoch": 4.06, + "learning_rate": 4.788645206933145e-08, + "loss": 0.3246, + "step": 10760 + }, + { + "epoch": 4.06, + "learning_rate": 4.762115316590024e-08, + "loss": 0.4138, + "step": 10770 + }, + { + "epoch": 4.06, + "learning_rate": 4.735585426246905e-08, + "loss": 0.2455, + "step": 10780 + }, + { + "epoch": 4.06, + "learning_rate": 4.7090555359037846e-08, + "loss": 0.1867, + "step": 10790 + }, + { + "epoch": 4.06, + "learning_rate": 4.682525645560665e-08, + "loss": 0.3166, + "step": 10800 + }, + { + "epoch": 4.06, + "learning_rate": 4.6559957552175455e-08, + "loss": 0.1877, + "step": 10810 + }, + { + "epoch": 4.06, + "learning_rate": 4.629465864874425e-08, + "loss": 0.4553, + "step": 10820 + }, + { + "epoch": 4.06, + "learning_rate": 4.6029359745313045e-08, + "loss": 0.1521, + "step": 10830 + }, + { + "epoch": 4.06, + "learning_rate": 4.5764060841881853e-08, + "loss": 0.2166, + "step": 10840 + }, + { + "epoch": 4.06, + "learning_rate": 4.5498761938450655e-08, + "loss": 0.1694, + "step": 10850 + }, + { + "epoch": 4.06, + "learning_rate": 4.523346303501945e-08, + "loss": 0.1315, + "step": 10860 + }, + { + "epoch": 4.06, + "learning_rate": 4.496816413158826e-08, + "loss": 0.4086, + "step": 10870 + }, + { + "epoch": 4.07, + "learning_rate": 4.470286522815705e-08, + "loss": 0.3522, + "step": 10880 + }, + { + "epoch": 4.07, + "learning_rate": 4.4437566324725854e-08, + "loss": 0.1301, + "step": 10890 + }, + { + "epoch": 4.07, + "learning_rate": 4.417226742129466e-08, + "loss": 0.1597, + "step": 10900 + }, + { + "epoch": 4.07, + "learning_rate": 4.390696851786346e-08, + "loss": 0.4818, + "step": 10910 + }, + { + "epoch": 4.07, + "learning_rate": 4.364166961443226e-08, + "loss": 0.2074, + "step": 10920 + }, + { + "epoch": 4.07, + "learning_rate": 4.337637071100106e-08, + "loss": 0.0484, + "step": 10930 + }, + { + "epoch": 4.07, + "learning_rate": 4.311107180756986e-08, + "loss": 0.1093, + "step": 10940 + }, + { + "epoch": 4.07, + "learning_rate": 4.2845772904138657e-08, + "loss": 0.3121, + "step": 10950 + }, + { + "epoch": 4.07, + "learning_rate": 4.2580474000707465e-08, + "loss": 0.1872, + "step": 10960 + }, + { + "epoch": 4.07, + "learning_rate": 4.231517509727626e-08, + "loss": 0.4645, + "step": 10970 + }, + { + "epoch": 4.07, + "learning_rate": 4.204987619384506e-08, + "loss": 0.2252, + "step": 10980 + }, + { + "epoch": 4.07, + "learning_rate": 4.178457729041387e-08, + "loss": 0.3651, + "step": 10990 + }, + { + "epoch": 4.08, + "learning_rate": 4.1519278386982664e-08, + "loss": 0.272, + "step": 11000 + }, + { + "epoch": 4.08, + "learning_rate": 4.1253979483551466e-08, + "loss": 0.408, + "step": 11010 + }, + { + "epoch": 4.08, + "learning_rate": 4.098868058012027e-08, + "loss": 0.2049, + "step": 11020 + }, + { + "epoch": 4.08, + "learning_rate": 4.072338167668907e-08, + "loss": 0.1285, + "step": 11030 + }, + { + "epoch": 4.08, + "learning_rate": 4.0458082773257864e-08, + "loss": 0.4743, + "step": 11040 + }, + { + "epoch": 4.08, + "learning_rate": 4.019278386982667e-08, + "loss": 0.2603, + "step": 11050 + }, + { + "epoch": 4.08, + "learning_rate": 3.9927484966395467e-08, + "loss": 0.5623, + "step": 11060 + }, + { + "epoch": 4.08, + "learning_rate": 3.966218606296427e-08, + "loss": 0.2478, + "step": 11070 + }, + { + "epoch": 4.08, + "learning_rate": 3.9396887159533076e-08, + "loss": 0.2938, + "step": 11080 + }, + { + "epoch": 4.08, + "learning_rate": 3.913158825610187e-08, + "loss": 0.3001, + "step": 11090 + }, + { + "epoch": 4.08, + "learning_rate": 3.886628935267067e-08, + "loss": 0.1743, + "step": 11100 + }, + { + "epoch": 4.08, + "learning_rate": 3.8600990449239474e-08, + "loss": 0.3494, + "step": 11110 + }, + { + "epoch": 4.08, + "learning_rate": 3.8335691545808276e-08, + "loss": 0.3701, + "step": 11120 + }, + { + "epoch": 4.09, + "learning_rate": 3.8070392642377084e-08, + "loss": 0.6493, + "step": 11130 + }, + { + "epoch": 4.09, + "learning_rate": 3.780509373894588e-08, + "loss": 0.2456, + "step": 11140 + }, + { + "epoch": 4.09, + "learning_rate": 3.7539794835514673e-08, + "loss": 0.1634, + "step": 11150 + }, + { + "epoch": 4.09, + "learning_rate": 3.727449593208348e-08, + "loss": 0.2219, + "step": 11160 + }, + { + "epoch": 4.09, + "learning_rate": 3.700919702865228e-08, + "loss": 0.2627, + "step": 11170 + }, + { + "epoch": 4.09, + "learning_rate": 3.674389812522108e-08, + "loss": 0.6152, + "step": 11180 + }, + { + "epoch": 4.09, + "learning_rate": 3.647859922178988e-08, + "loss": 0.273, + "step": 11190 + }, + { + "epoch": 4.09, + "learning_rate": 3.621330031835868e-08, + "loss": 0.2135, + "step": 11200 + }, + { + "epoch": 4.09, + "learning_rate": 3.594800141492748e-08, + "loss": 0.2077, + "step": 11210 + }, + { + "epoch": 4.09, + "learning_rate": 3.5682702511496284e-08, + "loss": 0.6408, + "step": 11220 + }, + { + "epoch": 4.09, + "learning_rate": 3.5417403608065085e-08, + "loss": 0.1691, + "step": 11230 + }, + { + "epoch": 4.09, + "learning_rate": 3.515210470463389e-08, + "loss": 0.4814, + "step": 11240 + }, + { + "epoch": 4.1, + "learning_rate": 3.488680580120269e-08, + "loss": 0.1419, + "step": 11250 + }, + { + "epoch": 4.1, + "learning_rate": 3.462150689777149e-08, + "loss": 0.3493, + "step": 11260 + }, + { + "epoch": 4.1, + "learning_rate": 3.435620799434029e-08, + "loss": 0.5488, + "step": 11270 + }, + { + "epoch": 4.1, + "learning_rate": 3.4090909090909086e-08, + "loss": 0.4774, + "step": 11280 + }, + { + "epoch": 4.1, + "learning_rate": 3.382561018747789e-08, + "loss": 0.3423, + "step": 11290 + }, + { + "epoch": 4.1, + "learning_rate": 3.356031128404669e-08, + "loss": 0.3831, + "step": 11300 + }, + { + "epoch": 4.1, + "learning_rate": 3.329501238061549e-08, + "loss": 0.3576, + "step": 11310 + }, + { + "epoch": 4.1, + "learning_rate": 3.302971347718429e-08, + "loss": 0.2333, + "step": 11320 + }, + { + "epoch": 4.1, + "learning_rate": 3.2764414573753094e-08, + "loss": 0.2804, + "step": 11330 + }, + { + "epoch": 4.1, + "learning_rate": 3.2499115670321895e-08, + "loss": 0.2403, + "step": 11340 + }, + { + "epoch": 4.1, + "learning_rate": 3.22338167668907e-08, + "loss": 0.2754, + "step": 11350 + }, + { + "epoch": 4.1, + "learning_rate": 3.19685178634595e-08, + "loss": 0.3769, + "step": 11360 + }, + { + "epoch": 4.1, + "learning_rate": 3.170321896002829e-08, + "loss": 0.2209, + "step": 11370 + }, + { + "epoch": 4.11, + "learning_rate": 3.1437920056597095e-08, + "loss": 0.338, + "step": 11380 + }, + { + "epoch": 4.11, + "learning_rate": 3.11726211531659e-08, + "loss": 0.2673, + "step": 11390 + }, + { + "epoch": 4.11, + "learning_rate": 3.09073222497347e-08, + "loss": 0.2406, + "step": 11400 + }, + { + "epoch": 4.11, + "learning_rate": 3.06420233463035e-08, + "loss": 0.3665, + "step": 11410 + }, + { + "epoch": 4.11, + "learning_rate": 3.03767244428723e-08, + "loss": 0.1279, + "step": 11420 + }, + { + "epoch": 4.11, + "learning_rate": 3.01114255394411e-08, + "loss": 0.1948, + "step": 11430 + }, + { + "epoch": 4.11, + "learning_rate": 2.9846126636009904e-08, + "loss": 0.3244, + "step": 11440 + }, + { + "epoch": 4.11, + "learning_rate": 2.9580827732578705e-08, + "loss": 0.4653, + "step": 11450 + }, + { + "epoch": 4.11, + "learning_rate": 2.9315528829147507e-08, + "loss": 0.6083, + "step": 11460 + }, + { + "epoch": 4.11, + "learning_rate": 2.9050229925716305e-08, + "loss": 0.3703, + "step": 11470 + }, + { + "epoch": 4.11, + "learning_rate": 2.8784931022285106e-08, + "loss": 0.4843, + "step": 11480 + }, + { + "epoch": 4.11, + "learning_rate": 2.8519632118853908e-08, + "loss": 0.5002, + "step": 11490 + }, + { + "epoch": 4.11, + "learning_rate": 2.8254333215422706e-08, + "loss": 0.1321, + "step": 11500 + }, + { + "epoch": 4.12, + "learning_rate": 2.7989034311991508e-08, + "loss": 0.2233, + "step": 11510 + }, + { + "epoch": 4.12, + "learning_rate": 2.7723735408560312e-08, + "loss": 0.3006, + "step": 11520 + }, + { + "epoch": 4.12, + "learning_rate": 2.745843650512911e-08, + "loss": 0.5109, + "step": 11530 + }, + { + "epoch": 4.12, + "learning_rate": 2.7193137601697912e-08, + "loss": 0.5902, + "step": 11540 + }, + { + "epoch": 4.12, + "learning_rate": 2.6927838698266714e-08, + "loss": 0.4463, + "step": 11550 + }, + { + "epoch": 4.12, + "learning_rate": 2.6662539794835512e-08, + "loss": 0.1674, + "step": 11560 + }, + { + "epoch": 4.12, + "learning_rate": 2.6397240891404313e-08, + "loss": 0.3116, + "step": 11570 + }, + { + "epoch": 4.12, + "learning_rate": 2.6131941987973115e-08, + "loss": 0.5825, + "step": 11580 + }, + { + "epoch": 4.12, + "learning_rate": 2.5866643084541913e-08, + "loss": 0.2447, + "step": 11590 + }, + { + "epoch": 4.12, + "learning_rate": 2.5601344181110718e-08, + "loss": 0.5195, + "step": 11600 + }, + { + "epoch": 4.12, + "learning_rate": 2.533604527767952e-08, + "loss": 0.2391, + "step": 11610 + }, + { + "epoch": 4.12, + "learning_rate": 2.5070746374248318e-08, + "loss": 0.2846, + "step": 11620 + }, + { + "epoch": 4.13, + "learning_rate": 2.480544747081712e-08, + "loss": 0.1091, + "step": 11630 + }, + { + "epoch": 4.13, + "learning_rate": 2.454014856738592e-08, + "loss": 0.2265, + "step": 11640 + }, + { + "epoch": 4.13, + "learning_rate": 2.4274849663954722e-08, + "loss": 0.1529, + "step": 11650 + }, + { + "epoch": 4.13, + "learning_rate": 2.400955076052352e-08, + "loss": 0.1775, + "step": 11660 + }, + { + "epoch": 4.13, + "learning_rate": 2.3744251857092322e-08, + "loss": 0.2777, + "step": 11670 + }, + { + "epoch": 4.13, + "learning_rate": 2.3478952953661127e-08, + "loss": 0.1498, + "step": 11680 + }, + { + "epoch": 4.13, + "learning_rate": 2.3213654050229925e-08, + "loss": 0.0952, + "step": 11690 + }, + { + "epoch": 4.13, + "learning_rate": 2.2948355146798726e-08, + "loss": 0.2005, + "step": 11700 + }, + { + "epoch": 4.13, + "learning_rate": 2.2683056243367528e-08, + "loss": 0.4254, + "step": 11710 + }, + { + "epoch": 4.13, + "learning_rate": 2.2417757339936326e-08, + "loss": 0.5729, + "step": 11720 + }, + { + "epoch": 4.13, + "learning_rate": 2.2152458436505127e-08, + "loss": 0.2423, + "step": 11730 + }, + { + "epoch": 4.13, + "learning_rate": 2.188715953307393e-08, + "loss": 0.4267, + "step": 11740 + }, + { + "epoch": 4.13, + "learning_rate": 2.1621860629642727e-08, + "loss": 0.2208, + "step": 11750 + }, + { + "epoch": 4.14, + "learning_rate": 2.1356561726211532e-08, + "loss": 0.2322, + "step": 11760 + }, + { + "epoch": 4.14, + "learning_rate": 2.1091262822780334e-08, + "loss": 0.2109, + "step": 11770 + }, + { + "epoch": 4.14, + "learning_rate": 2.0825963919349132e-08, + "loss": 0.3221, + "step": 11780 + }, + { + "epoch": 4.14, + "learning_rate": 2.0560665015917933e-08, + "loss": 0.2768, + "step": 11790 + }, + { + "epoch": 4.14, + "learning_rate": 2.0295366112486735e-08, + "loss": 0.161, + "step": 11800 + }, + { + "epoch": 4.14, + "learning_rate": 2.0030067209055533e-08, + "loss": 0.4017, + "step": 11810 + }, + { + "epoch": 4.14, + "learning_rate": 1.9764768305624334e-08, + "loss": 0.4653, + "step": 11820 + }, + { + "epoch": 4.14, + "learning_rate": 1.9499469402193136e-08, + "loss": 0.3184, + "step": 11830 + }, + { + "epoch": 4.14, + "learning_rate": 1.923417049876194e-08, + "loss": 0.2338, + "step": 11840 + }, + { + "epoch": 4.14, + "learning_rate": 1.896887159533074e-08, + "loss": 0.5184, + "step": 11850 + }, + { + "epoch": 4.14, + "learning_rate": 1.870357269189954e-08, + "loss": 0.3949, + "step": 11860 + }, + { + "epoch": 4.14, + "learning_rate": 1.843827378846834e-08, + "loss": 0.2224, + "step": 11870 + }, + { + "epoch": 4.15, + "learning_rate": 1.817297488503714e-08, + "loss": 0.2273, + "step": 11880 + }, + { + "epoch": 4.15, + "learning_rate": 1.790767598160594e-08, + "loss": 0.4915, + "step": 11890 + }, + { + "epoch": 4.15, + "learning_rate": 1.7642377078174743e-08, + "loss": 0.2633, + "step": 11900 + }, + { + "epoch": 4.15, + "learning_rate": 1.7377078174743545e-08, + "loss": 0.4765, + "step": 11910 + }, + { + "epoch": 4.15, + "learning_rate": 1.7111779271312346e-08, + "loss": 0.3887, + "step": 11920 + }, + { + "epoch": 4.15, + "learning_rate": 1.6846480367881144e-08, + "loss": 0.4244, + "step": 11930 + }, + { + "epoch": 4.15, + "learning_rate": 1.6581181464449946e-08, + "loss": 0.3747, + "step": 11940 + }, + { + "epoch": 4.15, + "learning_rate": 1.6315882561018747e-08, + "loss": 0.2129, + "step": 11950 + }, + { + "epoch": 4.15, + "learning_rate": 1.6050583657587546e-08, + "loss": 0.2404, + "step": 11960 + }, + { + "epoch": 4.15, + "learning_rate": 1.578528475415635e-08, + "loss": 0.3692, + "step": 11970 + }, + { + "epoch": 4.15, + "learning_rate": 1.551998585072515e-08, + "loss": 0.3378, + "step": 11980 + }, + { + "epoch": 4.15, + "learning_rate": 1.525468694729395e-08, + "loss": 0.4417, + "step": 11990 + }, + { + "epoch": 4.15, + "learning_rate": 1.498938804386275e-08, + "loss": 0.3159, + "step": 12000 + }, + { + "epoch": 4.16, + "learning_rate": 1.4724089140431551e-08, + "loss": 0.4654, + "step": 12010 + }, + { + "epoch": 4.16, + "learning_rate": 1.4458790237000355e-08, + "loss": 0.2322, + "step": 12020 + }, + { + "epoch": 4.16, + "learning_rate": 1.4193491333569154e-08, + "loss": 0.2092, + "step": 12030 + }, + { + "epoch": 4.16, + "learning_rate": 1.3928192430137954e-08, + "loss": 0.2385, + "step": 12040 + }, + { + "epoch": 4.16, + "learning_rate": 1.3662893526706756e-08, + "loss": 0.3575, + "step": 12050 + }, + { + "epoch": 4.16, + "learning_rate": 1.3397594623275557e-08, + "loss": 0.2266, + "step": 12060 + }, + { + "epoch": 4.16, + "learning_rate": 1.3132295719844357e-08, + "loss": 0.234, + "step": 12070 + }, + { + "epoch": 4.16, + "learning_rate": 1.2866996816413159e-08, + "loss": 0.2282, + "step": 12080 + }, + { + "epoch": 4.16, + "learning_rate": 1.2601697912981958e-08, + "loss": 0.2577, + "step": 12090 + }, + { + "epoch": 4.16, + "learning_rate": 1.2336399009550762e-08, + "loss": 0.3597, + "step": 12100 + }, + { + "epoch": 4.16, + "learning_rate": 1.2071100106119561e-08, + "loss": 0.4429, + "step": 12110 + }, + { + "epoch": 4.16, + "learning_rate": 1.1805801202688361e-08, + "loss": 0.1702, + "step": 12120 + }, + { + "epoch": 4.17, + "learning_rate": 1.1540502299257163e-08, + "loss": 0.2193, + "step": 12130 + }, + { + "epoch": 4.17, + "learning_rate": 1.1275203395825964e-08, + "loss": 0.4391, + "step": 12140 + }, + { + "epoch": 4.17, + "learning_rate": 1.1009904492394764e-08, + "loss": 0.2284, + "step": 12150 + }, + { + "epoch": 4.17, + "learning_rate": 1.0744605588963566e-08, + "loss": 0.489, + "step": 12160 + }, + { + "epoch": 4.17, + "learning_rate": 1.0479306685532365e-08, + "loss": 0.1444, + "step": 12170 + }, + { + "epoch": 4.17, + "learning_rate": 1.0214007782101165e-08, + "loss": 0.2967, + "step": 12180 + }, + { + "epoch": 4.17, + "learning_rate": 9.948708878669968e-09, + "loss": 0.3594, + "step": 12190 + }, + { + "epoch": 4.17, + "learning_rate": 9.683409975238768e-09, + "loss": 0.5359, + "step": 12200 + }, + { + "epoch": 4.17, + "learning_rate": 9.41811107180757e-09, + "loss": 0.2757, + "step": 12210 + }, + { + "epoch": 4.17, + "learning_rate": 9.152812168376371e-09, + "loss": 0.3935, + "step": 12220 + }, + { + "epoch": 4.17, + "learning_rate": 8.887513264945171e-09, + "loss": 0.14, + "step": 12230 + }, + { + "epoch": 4.17, + "learning_rate": 8.622214361513971e-09, + "loss": 0.4049, + "step": 12240 + }, + { + "epoch": 4.17, + "learning_rate": 8.356915458082773e-09, + "loss": 0.2653, + "step": 12250 + }, + { + "epoch": 4.18, + "learning_rate": 8.091616554651574e-09, + "loss": 0.1909, + "step": 12260 + }, + { + "epoch": 4.18, + "learning_rate": 7.826317651220374e-09, + "loss": 0.2591, + "step": 12270 + }, + { + "epoch": 4.18, + "learning_rate": 7.561018747789175e-09, + "loss": 0.1013, + "step": 12280 + }, + { + "epoch": 4.18, + "learning_rate": 7.295719844357976e-09, + "loss": 0.1527, + "step": 12290 + }, + { + "epoch": 4.18, + "learning_rate": 7.0304209409267776e-09, + "loss": 0.1982, + "step": 12300 + }, + { + "epoch": 4.18, + "learning_rate": 6.765122037495577e-09, + "loss": 0.3196, + "step": 12310 + }, + { + "epoch": 4.18, + "learning_rate": 6.499823134064379e-09, + "loss": 0.2233, + "step": 12320 + }, + { + "epoch": 4.18, + "learning_rate": 6.2345242306331796e-09, + "loss": 0.2557, + "step": 12330 + }, + { + "epoch": 4.18, + "learning_rate": 5.969225327201981e-09, + "loss": 0.1804, + "step": 12340 + }, + { + "epoch": 4.18, + "learning_rate": 5.703926423770781e-09, + "loss": 0.2618, + "step": 12350 + }, + { + "epoch": 4.18, + "learning_rate": 5.4386275203395824e-09, + "loss": 0.4291, + "step": 12360 + }, + { + "epoch": 4.18, + "learning_rate": 5.173328616908383e-09, + "loss": 0.2799, + "step": 12370 + }, + { + "epoch": 4.18, + "learning_rate": 4.908029713477184e-09, + "loss": 0.1811, + "step": 12380 + }, + { + "epoch": 4.19, + "learning_rate": 4.6427308100459845e-09, + "loss": 0.2275, + "step": 12390 + }, + { + "epoch": 4.19, + "learning_rate": 4.377431906614786e-09, + "loss": 0.4325, + "step": 12400 + }, + { + "epoch": 4.19, + "learning_rate": 4.112133003183587e-09, + "loss": 0.22, + "step": 12410 + }, + { + "epoch": 4.19, + "learning_rate": 3.846834099752387e-09, + "loss": 0.22, + "step": 12420 + }, + { + "epoch": 4.19, + "learning_rate": 3.5815351963211884e-09, + "loss": 0.199, + "step": 12430 + }, + { + "epoch": 4.19, + "learning_rate": 3.3162362928899895e-09, + "loss": 0.4157, + "step": 12440 + }, + { + "epoch": 4.19, + "learning_rate": 3.05093738945879e-09, + "loss": 0.4612, + "step": 12450 + }, + { + "epoch": 4.19, + "learning_rate": 2.7856384860275913e-09, + "loss": 0.2144, + "step": 12460 + }, + { + "epoch": 4.19, + "learning_rate": 2.520339582596392e-09, + "loss": 0.3051, + "step": 12470 + }, + { + "epoch": 4.19, + "learning_rate": 2.2550406791651926e-09, + "loss": 0.4694, + "step": 12480 + }, + { + "epoch": 4.19, + "learning_rate": 1.9897417757339937e-09, + "loss": 0.5988, + "step": 12490 + }, + { + "epoch": 4.19, + "learning_rate": 1.7244428723027944e-09, + "loss": 0.1659, + "step": 12500 + }, + { + "epoch": 4.2, + "learning_rate": 1.4591439688715953e-09, + "loss": 0.4332, + "step": 12510 + }, + { + "epoch": 4.2, + "learning_rate": 1.1938450654403961e-09, + "loss": 0.2026, + "step": 12520 + }, + { + "epoch": 4.2, + "learning_rate": 9.28546162009197e-10, + "loss": 0.3017, + "step": 12530 + }, + { + "epoch": 4.2, + "learning_rate": 6.632472585779978e-10, + "loss": 0.2656, + "step": 12540 + }, + { + "epoch": 4.2, + "learning_rate": 3.979483551467987e-10, + "loss": 0.5015, + "step": 12550 + }, + { + "epoch": 4.2, + "learning_rate": 1.3264945171559958e-10, + "loss": 0.4006, + "step": 12560 + }, + { + "epoch": 4.2, + "eval_accuracy": 0.8251666013328106, + "eval_loss": 0.44296565651893616, + "eval_runtime": 623.4351, + "eval_samples_per_second": 4.092, + "eval_steps_per_second": 1.023, + "step": 12565 + }, + { + "epoch": 4.2, + "step": 12565, + "total_flos": 2.2067541850155437e+20, + "train_loss": 0.39015588782197014, + "train_runtime": 24412.2348, + "train_samples_per_second": 2.059, + "train_steps_per_second": 0.515 + }, + { + "epoch": 4.2, + "eval_accuracy": 0.8251666013328106, + "eval_loss": 0.44296565651893616, + "eval_runtime": 654.7979, + "eval_samples_per_second": 3.896, + "eval_steps_per_second": 0.974, + "step": 12565 + }, + { + "epoch": 4.2, + "eval_accuracy": 0.8251666013328106, + "eval_loss": 0.44296565651893616, + "eval_runtime": 750.9501, + "eval_samples_per_second": 3.397, + "eval_steps_per_second": 0.85, + "step": 12565 + } + ], + "logging_steps": 10, + "max_steps": 12565, + "num_train_epochs": 9223372036854775807, + "save_steps": 500, + "total_flos": 2.2067541850155437e+20, + "trial_name": null, + "trial_params": null +}