{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.93374682830561, "eval_steps": 500, "global_step": 17500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.93752581844851e-06, "loss": 3.0868, "step": 10 }, { "epoch": 0.01, "learning_rate": 6.42386919416686e-06, "loss": 3.9106, "step": 20 }, { "epoch": 0.01, "learning_rate": 7.293324332157391e-06, "loss": 3.2471, "step": 30 }, { "epoch": 0.01, "learning_rate": 7.910212569885209e-06, "loss": 2.2106, "step": 40 }, { "epoch": 0.01, "learning_rate": 8.388708261178669e-06, "loss": 1.3444, "step": 50 }, { "epoch": 0.02, "learning_rate": 8.77966770787574e-06, "loss": 1.1874, "step": 60 }, { "epoch": 0.02, "learning_rate": 9.110219210139135e-06, "loss": 0.7707, "step": 70 }, { "epoch": 0.02, "learning_rate": 9.396555945603558e-06, "loss": 1.07, "step": 80 }, { "epoch": 0.03, "learning_rate": 9.649122845866272e-06, "loss": 0.8138, "step": 90 }, { "epoch": 0.03, "learning_rate": 9.87505163689702e-06, "loss": 0.7951, "step": 100 }, { "epoch": 0.03, "learning_rate": 9.997152349311819e-06, "loss": 1.8935, "step": 110 }, { "epoch": 0.03, "learning_rate": 9.98766018035121e-06, "loss": 1.1202, "step": 120 }, { "epoch": 0.04, "learning_rate": 9.978168011390604e-06, "loss": 0.8436, "step": 130 }, { "epoch": 0.04, "learning_rate": 9.968675842429996e-06, "loss": 1.6259, "step": 140 }, { "epoch": 0.04, "learning_rate": 9.959183673469387e-06, "loss": 0.688, "step": 150 }, { "epoch": 0.05, "learning_rate": 9.94969150450878e-06, "loss": 1.2987, "step": 160 }, { "epoch": 0.05, "learning_rate": 9.940199335548174e-06, "loss": 1.1667, "step": 170 }, { "epoch": 0.05, "learning_rate": 9.930707166587566e-06, "loss": 0.9948, "step": 180 }, { "epoch": 0.05, "learning_rate": 9.92121499762696e-06, "loss": 1.1458, "step": 190 }, { "epoch": 0.06, "learning_rate": 9.911722828666351e-06, "loss": 1.2454, "step": 200 }, { "epoch": 0.06, "learning_rate": 9.902230659705745e-06, "loss": 0.8821, "step": 210 }, { "epoch": 0.06, "learning_rate": 9.892738490745136e-06, "loss": 0.7736, "step": 220 }, { "epoch": 0.06, "learning_rate": 9.883246321784528e-06, "loss": 1.0794, "step": 230 }, { "epoch": 0.07, "learning_rate": 9.873754152823922e-06, "loss": 1.4458, "step": 240 }, { "epoch": 0.07, "learning_rate": 9.864261983863313e-06, "loss": 0.9957, "step": 250 }, { "epoch": 0.07, "learning_rate": 9.854769814902707e-06, "loss": 0.6101, "step": 260 }, { "epoch": 0.08, "learning_rate": 9.845277645942099e-06, "loss": 0.6848, "step": 270 }, { "epoch": 0.08, "learning_rate": 9.83578547698149e-06, "loss": 1.6407, "step": 280 }, { "epoch": 0.08, "learning_rate": 9.826293308020884e-06, "loss": 0.8201, "step": 290 }, { "epoch": 0.08, "learning_rate": 9.816801139060275e-06, "loss": 0.9695, "step": 300 }, { "epoch": 0.09, "learning_rate": 9.807308970099669e-06, "loss": 1.131, "step": 310 }, { "epoch": 0.09, "learning_rate": 9.79781680113906e-06, "loss": 0.8368, "step": 320 }, { "epoch": 0.09, "learning_rate": 9.788324632178452e-06, "loss": 1.0931, "step": 330 }, { "epoch": 0.1, "learning_rate": 9.778832463217846e-06, "loss": 0.617, "step": 340 }, { "epoch": 0.1, "learning_rate": 9.76934029425724e-06, "loss": 0.3638, "step": 350 }, { "epoch": 0.1, "learning_rate": 9.759848125296631e-06, "loss": 1.0894, "step": 360 }, { "epoch": 0.1, "learning_rate": 9.750355956336024e-06, "loss": 1.2748, "step": 370 }, { "epoch": 0.11, "learning_rate": 9.740863787375416e-06, "loss": 1.4306, "step": 380 }, { "epoch": 0.11, "learning_rate": 9.731371618414808e-06, "loss": 0.9422, "step": 390 }, { "epoch": 0.11, "learning_rate": 9.721879449454201e-06, "loss": 0.6539, "step": 400 }, { "epoch": 0.12, "learning_rate": 9.712387280493593e-06, "loss": 0.862, "step": 410 }, { "epoch": 0.12, "learning_rate": 9.702895111532987e-06, "loss": 1.2179, "step": 420 }, { "epoch": 0.12, "learning_rate": 9.693402942572378e-06, "loss": 0.9396, "step": 430 }, { "epoch": 0.12, "learning_rate": 9.683910773611772e-06, "loss": 0.897, "step": 440 }, { "epoch": 0.13, "learning_rate": 9.674418604651164e-06, "loss": 0.8183, "step": 450 }, { "epoch": 0.13, "learning_rate": 9.664926435690555e-06, "loss": 0.5799, "step": 460 }, { "epoch": 0.13, "learning_rate": 9.655434266729949e-06, "loss": 0.6694, "step": 470 }, { "epoch": 0.14, "learning_rate": 9.64594209776934e-06, "loss": 0.6407, "step": 480 }, { "epoch": 0.14, "learning_rate": 9.636449928808734e-06, "loss": 0.4138, "step": 490 }, { "epoch": 0.14, "learning_rate": 9.626957759848126e-06, "loss": 1.255, "step": 500 }, { "epoch": 0.14, "eval_loss": 0.5974699854850769, "eval_runtime": 216.3078, "eval_samples_per_second": 9.343, "eval_steps_per_second": 2.339, "step": 500 }, { "epoch": 0.14, "learning_rate": 9.617465590887517e-06, "loss": 1.0736, "step": 510 }, { "epoch": 0.15, "learning_rate": 9.607973421926911e-06, "loss": 0.7988, "step": 520 }, { "epoch": 0.15, "learning_rate": 9.598481252966304e-06, "loss": 0.7407, "step": 530 }, { "epoch": 0.15, "learning_rate": 9.588989084005696e-06, "loss": 0.6603, "step": 540 }, { "epoch": 0.16, "learning_rate": 9.57949691504509e-06, "loss": 0.5824, "step": 550 }, { "epoch": 0.16, "learning_rate": 9.570004746084481e-06, "loss": 0.8746, "step": 560 }, { "epoch": 0.16, "learning_rate": 9.560512577123873e-06, "loss": 0.3868, "step": 570 }, { "epoch": 0.16, "learning_rate": 9.551020408163266e-06, "loss": 0.5346, "step": 580 }, { "epoch": 0.17, "learning_rate": 9.541528239202658e-06, "loss": 0.559, "step": 590 }, { "epoch": 0.17, "learning_rate": 9.532036070242052e-06, "loss": 0.8601, "step": 600 }, { "epoch": 0.17, "learning_rate": 9.522543901281443e-06, "loss": 0.5392, "step": 610 }, { "epoch": 0.17, "learning_rate": 9.513051732320835e-06, "loss": 0.6593, "step": 620 }, { "epoch": 0.18, "learning_rate": 9.503559563360229e-06, "loss": 0.8227, "step": 630 }, { "epoch": 0.18, "learning_rate": 9.49406739439962e-06, "loss": 0.6741, "step": 640 }, { "epoch": 0.18, "learning_rate": 9.484575225439014e-06, "loss": 0.3784, "step": 650 }, { "epoch": 0.19, "learning_rate": 9.475083056478406e-06, "loss": 0.4041, "step": 660 }, { "epoch": 0.19, "learning_rate": 9.465590887517799e-06, "loss": 0.8986, "step": 670 }, { "epoch": 0.19, "learning_rate": 9.45609871855719e-06, "loss": 0.6402, "step": 680 }, { "epoch": 0.19, "learning_rate": 9.446606549596583e-06, "loss": 0.5837, "step": 690 }, { "epoch": 0.2, "learning_rate": 9.437114380635976e-06, "loss": 0.7486, "step": 700 }, { "epoch": 0.2, "learning_rate": 9.42762221167537e-06, "loss": 0.7009, "step": 710 }, { "epoch": 0.2, "learning_rate": 9.418130042714761e-06, "loss": 0.4641, "step": 720 }, { "epoch": 0.21, "learning_rate": 9.408637873754155e-06, "loss": 0.0882, "step": 730 }, { "epoch": 0.21, "learning_rate": 9.399145704793546e-06, "loss": 0.4681, "step": 740 }, { "epoch": 0.21, "learning_rate": 9.389653535832938e-06, "loss": 0.5282, "step": 750 }, { "epoch": 0.21, "learning_rate": 9.380161366872332e-06, "loss": 0.7416, "step": 760 }, { "epoch": 0.22, "learning_rate": 9.370669197911723e-06, "loss": 0.6423, "step": 770 }, { "epoch": 0.22, "learning_rate": 9.361177028951117e-06, "loss": 0.6967, "step": 780 }, { "epoch": 0.22, "learning_rate": 9.351684859990508e-06, "loss": 0.4016, "step": 790 }, { "epoch": 0.23, "learning_rate": 9.3421926910299e-06, "loss": 0.698, "step": 800 }, { "epoch": 0.23, "learning_rate": 9.332700522069294e-06, "loss": 0.4438, "step": 810 }, { "epoch": 0.23, "learning_rate": 9.323208353108685e-06, "loss": 0.5083, "step": 820 }, { "epoch": 0.23, "learning_rate": 9.313716184148079e-06, "loss": 0.8066, "step": 830 }, { "epoch": 0.24, "learning_rate": 9.30422401518747e-06, "loss": 0.5104, "step": 840 }, { "epoch": 0.24, "learning_rate": 9.294731846226862e-06, "loss": 0.383, "step": 850 }, { "epoch": 0.24, "learning_rate": 9.285239677266256e-06, "loss": 0.4484, "step": 860 }, { "epoch": 0.25, "learning_rate": 9.275747508305648e-06, "loss": 0.5572, "step": 870 }, { "epoch": 0.25, "learning_rate": 9.266255339345041e-06, "loss": 0.6063, "step": 880 }, { "epoch": 0.25, "learning_rate": 9.256763170384434e-06, "loss": 0.6665, "step": 890 }, { "epoch": 0.25, "learning_rate": 9.247271001423826e-06, "loss": 0.6123, "step": 900 }, { "epoch": 0.26, "learning_rate": 9.23777883246322e-06, "loss": 0.5005, "step": 910 }, { "epoch": 0.26, "learning_rate": 9.228286663502611e-06, "loss": 0.9236, "step": 920 }, { "epoch": 0.26, "learning_rate": 9.218794494542003e-06, "loss": 0.4072, "step": 930 }, { "epoch": 0.27, "learning_rate": 9.209302325581397e-06, "loss": 0.6445, "step": 940 }, { "epoch": 0.27, "learning_rate": 9.199810156620788e-06, "loss": 0.9185, "step": 950 }, { "epoch": 0.27, "learning_rate": 9.190317987660182e-06, "loss": 0.261, "step": 960 }, { "epoch": 0.27, "learning_rate": 9.180825818699574e-06, "loss": 0.7811, "step": 970 }, { "epoch": 0.28, "learning_rate": 9.171333649738965e-06, "loss": 0.7295, "step": 980 }, { "epoch": 0.28, "learning_rate": 9.161841480778359e-06, "loss": 0.3809, "step": 990 }, { "epoch": 0.28, "learning_rate": 9.15234931181775e-06, "loss": 0.6035, "step": 1000 }, { "epoch": 0.28, "eval_loss": 0.48104238510131836, "eval_runtime": 210.5024, "eval_samples_per_second": 9.601, "eval_steps_per_second": 2.404, "step": 1000 }, { "epoch": 0.28, "learning_rate": 9.142857142857144e-06, "loss": 0.9329, "step": 1010 }, { "epoch": 0.29, "learning_rate": 9.133364973896536e-06, "loss": 0.65, "step": 1020 }, { "epoch": 0.29, "learning_rate": 9.123872804935927e-06, "loss": 0.3792, "step": 1030 }, { "epoch": 0.29, "learning_rate": 9.11438063597532e-06, "loss": 0.1691, "step": 1040 }, { "epoch": 0.3, "learning_rate": 9.104888467014713e-06, "loss": 0.6827, "step": 1050 }, { "epoch": 0.3, "learning_rate": 9.095396298054106e-06, "loss": 0.6459, "step": 1060 }, { "epoch": 0.3, "learning_rate": 9.0859041290935e-06, "loss": 0.8112, "step": 1070 }, { "epoch": 0.3, "learning_rate": 9.076411960132891e-06, "loss": 0.6337, "step": 1080 }, { "epoch": 0.31, "learning_rate": 9.066919791172285e-06, "loss": 0.4256, "step": 1090 }, { "epoch": 0.31, "learning_rate": 9.057427622211676e-06, "loss": 0.2803, "step": 1100 }, { "epoch": 0.31, "learning_rate": 9.047935453251068e-06, "loss": 0.5562, "step": 1110 }, { "epoch": 0.32, "learning_rate": 9.038443284290462e-06, "loss": 0.3124, "step": 1120 }, { "epoch": 0.32, "learning_rate": 9.028951115329853e-06, "loss": 0.3223, "step": 1130 }, { "epoch": 0.32, "learning_rate": 9.019458946369247e-06, "loss": 0.5686, "step": 1140 }, { "epoch": 0.32, "learning_rate": 9.009966777408639e-06, "loss": 0.2857, "step": 1150 }, { "epoch": 0.33, "learning_rate": 9.00047460844803e-06, "loss": 0.4192, "step": 1160 }, { "epoch": 0.33, "learning_rate": 8.990982439487424e-06, "loss": 0.2262, "step": 1170 }, { "epoch": 0.33, "learning_rate": 8.981490270526815e-06, "loss": 0.6804, "step": 1180 }, { "epoch": 0.34, "learning_rate": 8.971998101566209e-06, "loss": 0.3721, "step": 1190 }, { "epoch": 0.34, "learning_rate": 8.9625059326056e-06, "loss": 0.47, "step": 1200 }, { "epoch": 0.34, "learning_rate": 8.953013763644992e-06, "loss": 0.7316, "step": 1210 }, { "epoch": 0.34, "learning_rate": 8.943521594684386e-06, "loss": 0.7199, "step": 1220 }, { "epoch": 0.35, "learning_rate": 8.934029425723778e-06, "loss": 0.6315, "step": 1230 }, { "epoch": 0.35, "learning_rate": 8.924537256763171e-06, "loss": 0.4095, "step": 1240 }, { "epoch": 0.35, "learning_rate": 8.915045087802565e-06, "loss": 1.0135, "step": 1250 }, { "epoch": 0.36, "learning_rate": 8.905552918841956e-06, "loss": 0.6973, "step": 1260 }, { "epoch": 0.36, "learning_rate": 8.89606074988135e-06, "loss": 0.3849, "step": 1270 }, { "epoch": 0.36, "learning_rate": 8.886568580920741e-06, "loss": 0.4011, "step": 1280 }, { "epoch": 0.36, "learning_rate": 8.877076411960133e-06, "loss": 0.2936, "step": 1290 }, { "epoch": 0.37, "learning_rate": 8.867584242999527e-06, "loss": 0.5101, "step": 1300 }, { "epoch": 0.37, "learning_rate": 8.858092074038918e-06, "loss": 0.4414, "step": 1310 }, { "epoch": 0.37, "learning_rate": 8.848599905078312e-06, "loss": 0.5243, "step": 1320 }, { "epoch": 0.37, "learning_rate": 8.839107736117704e-06, "loss": 0.3593, "step": 1330 }, { "epoch": 0.38, "learning_rate": 8.829615567157095e-06, "loss": 1.0084, "step": 1340 }, { "epoch": 0.38, "learning_rate": 8.820123398196489e-06, "loss": 0.612, "step": 1350 }, { "epoch": 0.38, "learning_rate": 8.81063122923588e-06, "loss": 0.6974, "step": 1360 }, { "epoch": 0.39, "learning_rate": 8.801139060275274e-06, "loss": 0.0845, "step": 1370 }, { "epoch": 0.39, "learning_rate": 8.791646891314666e-06, "loss": 0.4725, "step": 1380 }, { "epoch": 0.39, "learning_rate": 8.782154722354057e-06, "loss": 0.5528, "step": 1390 }, { "epoch": 0.39, "learning_rate": 8.772662553393451e-06, "loss": 0.4501, "step": 1400 }, { "epoch": 0.4, "learning_rate": 8.763170384432843e-06, "loss": 0.5435, "step": 1410 }, { "epoch": 0.4, "learning_rate": 8.753678215472236e-06, "loss": 0.3251, "step": 1420 }, { "epoch": 0.4, "learning_rate": 8.74418604651163e-06, "loss": 0.5594, "step": 1430 }, { "epoch": 0.41, "learning_rate": 8.734693877551021e-06, "loss": 0.7562, "step": 1440 }, { "epoch": 0.41, "learning_rate": 8.725201708590415e-06, "loss": 0.463, "step": 1450 }, { "epoch": 0.41, "learning_rate": 8.715709539629807e-06, "loss": 0.5296, "step": 1460 }, { "epoch": 0.41, "learning_rate": 8.706217370669198e-06, "loss": 0.6942, "step": 1470 }, { "epoch": 0.42, "learning_rate": 8.696725201708592e-06, "loss": 0.6386, "step": 1480 }, { "epoch": 0.42, "learning_rate": 8.687233032747983e-06, "loss": 0.4996, "step": 1490 }, { "epoch": 0.42, "learning_rate": 8.677740863787377e-06, "loss": 0.2947, "step": 1500 }, { "epoch": 0.42, "eval_loss": 0.43847087025642395, "eval_runtime": 211.0507, "eval_samples_per_second": 9.576, "eval_steps_per_second": 2.398, "step": 1500 }, { "epoch": 0.43, "learning_rate": 8.668248694826769e-06, "loss": 0.5069, "step": 1510 }, { "epoch": 0.43, "learning_rate": 8.65875652586616e-06, "loss": 0.5187, "step": 1520 }, { "epoch": 0.43, "learning_rate": 8.649264356905554e-06, "loss": 0.7606, "step": 1530 }, { "epoch": 0.43, "learning_rate": 8.639772187944946e-06, "loss": 0.3025, "step": 1540 }, { "epoch": 0.44, "learning_rate": 8.630280018984339e-06, "loss": 0.5177, "step": 1550 }, { "epoch": 0.44, "learning_rate": 8.62078785002373e-06, "loss": 0.7777, "step": 1560 }, { "epoch": 0.44, "learning_rate": 8.611295681063123e-06, "loss": 0.4197, "step": 1570 }, { "epoch": 0.45, "learning_rate": 8.601803512102516e-06, "loss": 0.2935, "step": 1580 }, { "epoch": 0.45, "learning_rate": 8.59231134314191e-06, "loss": 0.3054, "step": 1590 }, { "epoch": 0.45, "learning_rate": 8.582819174181301e-06, "loss": 0.3883, "step": 1600 }, { "epoch": 0.45, "learning_rate": 8.573327005220695e-06, "loss": 0.5324, "step": 1610 }, { "epoch": 0.46, "learning_rate": 8.563834836260086e-06, "loss": 0.5354, "step": 1620 }, { "epoch": 0.46, "learning_rate": 8.554342667299478e-06, "loss": 0.6134, "step": 1630 }, { "epoch": 0.46, "learning_rate": 8.544850498338872e-06, "loss": 0.289, "step": 1640 }, { "epoch": 0.47, "learning_rate": 8.535358329378263e-06, "loss": 0.2473, "step": 1650 }, { "epoch": 0.47, "learning_rate": 8.525866160417657e-06, "loss": 0.3148, "step": 1660 }, { "epoch": 0.47, "learning_rate": 8.516373991457048e-06, "loss": 0.5027, "step": 1670 }, { "epoch": 0.47, "learning_rate": 8.506881822496442e-06, "loss": 0.24, "step": 1680 }, { "epoch": 0.48, "learning_rate": 8.497389653535834e-06, "loss": 0.5346, "step": 1690 }, { "epoch": 0.48, "learning_rate": 8.487897484575225e-06, "loss": 0.5567, "step": 1700 }, { "epoch": 0.48, "learning_rate": 8.478405315614619e-06, "loss": 0.3816, "step": 1710 }, { "epoch": 0.48, "learning_rate": 8.46891314665401e-06, "loss": 0.499, "step": 1720 }, { "epoch": 0.49, "learning_rate": 8.459420977693404e-06, "loss": 0.6085, "step": 1730 }, { "epoch": 0.49, "learning_rate": 8.449928808732796e-06, "loss": 0.5301, "step": 1740 }, { "epoch": 0.49, "learning_rate": 8.440436639772188e-06, "loss": 0.5552, "step": 1750 }, { "epoch": 0.5, "learning_rate": 8.430944470811581e-06, "loss": 0.3411, "step": 1760 }, { "epoch": 0.5, "learning_rate": 8.421452301850974e-06, "loss": 0.9363, "step": 1770 }, { "epoch": 0.5, "learning_rate": 8.411960132890366e-06, "loss": 0.6064, "step": 1780 }, { "epoch": 0.5, "learning_rate": 8.40246796392976e-06, "loss": 0.5264, "step": 1790 }, { "epoch": 0.51, "learning_rate": 8.392975794969151e-06, "loss": 0.4054, "step": 1800 }, { "epoch": 0.51, "learning_rate": 8.383483626008543e-06, "loss": 0.4287, "step": 1810 }, { "epoch": 0.51, "learning_rate": 8.373991457047937e-06, "loss": 0.1082, "step": 1820 }, { "epoch": 0.52, "learning_rate": 8.364499288087328e-06, "loss": 0.6657, "step": 1830 }, { "epoch": 0.52, "learning_rate": 8.355007119126722e-06, "loss": 0.5962, "step": 1840 }, { "epoch": 0.52, "learning_rate": 8.345514950166114e-06, "loss": 0.4596, "step": 1850 }, { "epoch": 0.52, "learning_rate": 8.336022781205505e-06, "loss": 0.3728, "step": 1860 }, { "epoch": 0.53, "learning_rate": 8.326530612244899e-06, "loss": 0.5586, "step": 1870 }, { "epoch": 0.53, "learning_rate": 8.31703844328429e-06, "loss": 0.4651, "step": 1880 }, { "epoch": 0.53, "learning_rate": 8.307546274323684e-06, "loss": 0.5714, "step": 1890 }, { "epoch": 0.54, "learning_rate": 8.298054105363076e-06, "loss": 0.525, "step": 1900 }, { "epoch": 0.54, "learning_rate": 8.288561936402469e-06, "loss": 0.3305, "step": 1910 }, { "epoch": 0.54, "learning_rate": 8.279069767441861e-06, "loss": 0.5429, "step": 1920 }, { "epoch": 0.54, "learning_rate": 8.269577598481253e-06, "loss": 0.5503, "step": 1930 }, { "epoch": 0.55, "learning_rate": 8.260085429520646e-06, "loss": 0.303, "step": 1940 }, { "epoch": 0.55, "learning_rate": 8.25059326056004e-06, "loss": 0.4068, "step": 1950 }, { "epoch": 0.55, "learning_rate": 8.241101091599431e-06, "loss": 0.1963, "step": 1960 }, { "epoch": 0.56, "learning_rate": 8.231608922638825e-06, "loss": 0.2071, "step": 1970 }, { "epoch": 0.56, "learning_rate": 8.222116753678216e-06, "loss": 0.3768, "step": 1980 }, { "epoch": 0.56, "learning_rate": 8.212624584717608e-06, "loss": 0.3167, "step": 1990 }, { "epoch": 0.56, "learning_rate": 8.203132415757002e-06, "loss": 0.5134, "step": 2000 }, { "epoch": 0.56, "eval_loss": 0.4093641936779022, "eval_runtime": 211.8311, "eval_samples_per_second": 9.541, "eval_steps_per_second": 2.389, "step": 2000 }, { "epoch": 0.57, "learning_rate": 8.193640246796393e-06, "loss": 0.5266, "step": 2010 }, { "epoch": 0.57, "learning_rate": 8.184148077835787e-06, "loss": 0.3986, "step": 2020 }, { "epoch": 0.57, "learning_rate": 8.174655908875179e-06, "loss": 0.4266, "step": 2030 }, { "epoch": 0.58, "learning_rate": 8.16516373991457e-06, "loss": 0.3662, "step": 2040 }, { "epoch": 0.58, "learning_rate": 8.155671570953964e-06, "loss": 0.6357, "step": 2050 }, { "epoch": 0.58, "learning_rate": 8.146179401993356e-06, "loss": 0.6372, "step": 2060 }, { "epoch": 0.58, "learning_rate": 8.136687233032749e-06, "loss": 0.5443, "step": 2070 }, { "epoch": 0.59, "learning_rate": 8.12719506407214e-06, "loss": 0.2663, "step": 2080 }, { "epoch": 0.59, "learning_rate": 8.117702895111534e-06, "loss": 0.5385, "step": 2090 }, { "epoch": 0.59, "learning_rate": 8.108210726150926e-06, "loss": 0.3451, "step": 2100 }, { "epoch": 0.59, "learning_rate": 8.098718557190318e-06, "loss": 0.5365, "step": 2110 }, { "epoch": 0.6, "learning_rate": 8.089226388229711e-06, "loss": 0.4255, "step": 2120 }, { "epoch": 0.6, "learning_rate": 8.079734219269105e-06, "loss": 0.4673, "step": 2130 }, { "epoch": 0.6, "learning_rate": 8.070242050308496e-06, "loss": 0.7596, "step": 2140 }, { "epoch": 0.61, "learning_rate": 8.06074988134789e-06, "loss": 0.2713, "step": 2150 }, { "epoch": 0.61, "learning_rate": 8.051257712387281e-06, "loss": 0.3279, "step": 2160 }, { "epoch": 0.61, "learning_rate": 8.041765543426673e-06, "loss": 0.7999, "step": 2170 }, { "epoch": 0.61, "learning_rate": 8.032273374466067e-06, "loss": 0.4163, "step": 2180 }, { "epoch": 0.62, "learning_rate": 8.022781205505458e-06, "loss": 0.2294, "step": 2190 }, { "epoch": 0.62, "learning_rate": 8.013289036544852e-06, "loss": 0.6922, "step": 2200 }, { "epoch": 0.62, "learning_rate": 8.003796867584244e-06, "loss": 0.656, "step": 2210 }, { "epoch": 0.63, "learning_rate": 7.994304698623635e-06, "loss": 0.407, "step": 2220 }, { "epoch": 0.63, "learning_rate": 7.984812529663029e-06, "loss": 0.4617, "step": 2230 }, { "epoch": 0.63, "learning_rate": 7.97532036070242e-06, "loss": 0.4542, "step": 2240 }, { "epoch": 0.63, "learning_rate": 7.965828191741814e-06, "loss": 0.5353, "step": 2250 }, { "epoch": 0.64, "learning_rate": 7.956336022781206e-06, "loss": 0.3015, "step": 2260 }, { "epoch": 0.64, "learning_rate": 7.946843853820598e-06, "loss": 0.5876, "step": 2270 }, { "epoch": 0.64, "learning_rate": 7.937351684859991e-06, "loss": 0.44, "step": 2280 }, { "epoch": 0.65, "learning_rate": 7.927859515899383e-06, "loss": 0.3684, "step": 2290 }, { "epoch": 0.65, "learning_rate": 7.918367346938776e-06, "loss": 0.3488, "step": 2300 }, { "epoch": 0.65, "learning_rate": 7.90887517797817e-06, "loss": 0.4936, "step": 2310 }, { "epoch": 0.65, "learning_rate": 7.899383009017561e-06, "loss": 0.4513, "step": 2320 }, { "epoch": 0.66, "learning_rate": 7.889890840056955e-06, "loss": 0.4719, "step": 2330 }, { "epoch": 0.66, "learning_rate": 7.880398671096347e-06, "loss": 0.4911, "step": 2340 }, { "epoch": 0.66, "learning_rate": 7.870906502135738e-06, "loss": 0.5195, "step": 2350 }, { "epoch": 0.67, "learning_rate": 7.861414333175132e-06, "loss": 0.2038, "step": 2360 }, { "epoch": 0.67, "learning_rate": 7.851922164214523e-06, "loss": 0.3553, "step": 2370 }, { "epoch": 0.67, "learning_rate": 7.842429995253917e-06, "loss": 0.5719, "step": 2380 }, { "epoch": 0.67, "learning_rate": 7.832937826293309e-06, "loss": 0.7717, "step": 2390 }, { "epoch": 0.68, "learning_rate": 7.8234456573327e-06, "loss": 0.1035, "step": 2400 }, { "epoch": 0.68, "learning_rate": 7.813953488372094e-06, "loss": 0.5225, "step": 2410 }, { "epoch": 0.68, "learning_rate": 7.804461319411486e-06, "loss": 0.4826, "step": 2420 }, { "epoch": 0.69, "learning_rate": 7.794969150450879e-06, "loss": 0.573, "step": 2430 }, { "epoch": 0.69, "learning_rate": 7.78547698149027e-06, "loss": 0.2182, "step": 2440 }, { "epoch": 0.69, "learning_rate": 7.775984812529663e-06, "loss": 0.6481, "step": 2450 }, { "epoch": 0.69, "learning_rate": 7.766492643569056e-06, "loss": 0.3904, "step": 2460 }, { "epoch": 0.7, "learning_rate": 7.757000474608448e-06, "loss": 0.7893, "step": 2470 }, { "epoch": 0.7, "learning_rate": 7.747508305647841e-06, "loss": 0.4534, "step": 2480 }, { "epoch": 0.7, "learning_rate": 7.738016136687235e-06, "loss": 0.6228, "step": 2490 }, { "epoch": 0.7, "learning_rate": 7.728523967726626e-06, "loss": 0.345, "step": 2500 }, { "epoch": 0.7, "eval_loss": 0.3839055895805359, "eval_runtime": 210.4029, "eval_samples_per_second": 9.605, "eval_steps_per_second": 2.405, "step": 2500 }, { "epoch": 0.71, "learning_rate": 7.71903179876602e-06, "loss": 0.5411, "step": 2510 }, { "epoch": 0.71, "learning_rate": 7.709539629805412e-06, "loss": 0.5758, "step": 2520 }, { "epoch": 0.71, "learning_rate": 7.700047460844803e-06, "loss": 0.3009, "step": 2530 }, { "epoch": 0.72, "learning_rate": 7.690555291884197e-06, "loss": 0.5573, "step": 2540 }, { "epoch": 0.72, "learning_rate": 7.681063122923589e-06, "loss": 0.627, "step": 2550 }, { "epoch": 0.72, "learning_rate": 7.671570953962982e-06, "loss": 0.5643, "step": 2560 }, { "epoch": 0.72, "learning_rate": 7.662078785002374e-06, "loss": 0.6044, "step": 2570 }, { "epoch": 0.73, "learning_rate": 7.652586616041765e-06, "loss": 0.3706, "step": 2580 }, { "epoch": 0.73, "learning_rate": 7.643094447081159e-06, "loss": 0.4161, "step": 2590 }, { "epoch": 0.73, "learning_rate": 7.63360227812055e-06, "loss": 0.4195, "step": 2600 }, { "epoch": 0.74, "learning_rate": 7.624110109159943e-06, "loss": 0.619, "step": 2610 }, { "epoch": 0.74, "learning_rate": 7.614617940199336e-06, "loss": 0.5089, "step": 2620 }, { "epoch": 0.74, "learning_rate": 7.6051257712387284e-06, "loss": 0.288, "step": 2630 }, { "epoch": 0.74, "learning_rate": 7.595633602278121e-06, "loss": 0.3765, "step": 2640 }, { "epoch": 0.75, "learning_rate": 7.586141433317513e-06, "loss": 0.6771, "step": 2650 }, { "epoch": 0.75, "learning_rate": 7.576649264356905e-06, "loss": 0.3748, "step": 2660 }, { "epoch": 0.75, "learning_rate": 7.567157095396299e-06, "loss": 0.5745, "step": 2670 }, { "epoch": 0.76, "learning_rate": 7.557664926435691e-06, "loss": 0.6296, "step": 2680 }, { "epoch": 0.76, "learning_rate": 7.548172757475084e-06, "loss": 0.3589, "step": 2690 }, { "epoch": 0.76, "learning_rate": 7.538680588514477e-06, "loss": 0.6862, "step": 2700 }, { "epoch": 0.76, "learning_rate": 7.529188419553869e-06, "loss": 0.5385, "step": 2710 }, { "epoch": 0.77, "learning_rate": 7.519696250593261e-06, "loss": 0.2568, "step": 2720 }, { "epoch": 0.77, "learning_rate": 7.5102040816326536e-06, "loss": 0.3519, "step": 2730 }, { "epoch": 0.77, "learning_rate": 7.500711912672046e-06, "loss": 0.3583, "step": 2740 }, { "epoch": 0.78, "learning_rate": 7.491219743711439e-06, "loss": 0.4923, "step": 2750 }, { "epoch": 0.78, "learning_rate": 7.481727574750831e-06, "loss": 0.3252, "step": 2760 }, { "epoch": 0.78, "learning_rate": 7.472235405790224e-06, "loss": 0.4134, "step": 2770 }, { "epoch": 0.78, "learning_rate": 7.462743236829616e-06, "loss": 0.4374, "step": 2780 }, { "epoch": 0.79, "learning_rate": 7.453251067869008e-06, "loss": 0.2847, "step": 2790 }, { "epoch": 0.79, "learning_rate": 7.443758898908401e-06, "loss": 0.3431, "step": 2800 }, { "epoch": 0.79, "learning_rate": 7.4342667299477935e-06, "loss": 0.3763, "step": 2810 }, { "epoch": 0.8, "learning_rate": 7.424774560987186e-06, "loss": 0.4108, "step": 2820 }, { "epoch": 0.8, "learning_rate": 7.415282392026578e-06, "loss": 0.2789, "step": 2830 }, { "epoch": 0.8, "learning_rate": 7.405790223065972e-06, "loss": 0.2425, "step": 2840 }, { "epoch": 0.8, "learning_rate": 7.396298054105364e-06, "loss": 0.3791, "step": 2850 }, { "epoch": 0.81, "learning_rate": 7.3868058851447565e-06, "loss": 0.5064, "step": 2860 }, { "epoch": 0.81, "learning_rate": 7.377313716184149e-06, "loss": 0.3999, "step": 2870 }, { "epoch": 0.81, "learning_rate": 7.367821547223542e-06, "loss": 0.4088, "step": 2880 }, { "epoch": 0.81, "learning_rate": 7.358329378262934e-06, "loss": 0.3158, "step": 2890 }, { "epoch": 0.82, "learning_rate": 7.348837209302326e-06, "loss": 0.3839, "step": 2900 }, { "epoch": 0.82, "learning_rate": 7.339345040341719e-06, "loss": 0.7998, "step": 2910 }, { "epoch": 0.82, "learning_rate": 7.329852871381111e-06, "loss": 0.4684, "step": 2920 }, { "epoch": 0.83, "learning_rate": 7.320360702420504e-06, "loss": 0.3617, "step": 2930 }, { "epoch": 0.83, "learning_rate": 7.310868533459896e-06, "loss": 0.3617, "step": 2940 }, { "epoch": 0.83, "learning_rate": 7.301376364499288e-06, "loss": 0.4053, "step": 2950 }, { "epoch": 0.83, "learning_rate": 7.291884195538681e-06, "loss": 0.4896, "step": 2960 }, { "epoch": 0.84, "learning_rate": 7.282392026578073e-06, "loss": 0.385, "step": 2970 }, { "epoch": 0.84, "learning_rate": 7.272899857617466e-06, "loss": 0.376, "step": 2980 }, { "epoch": 0.84, "learning_rate": 7.2634076886568585e-06, "loss": 0.49, "step": 2990 }, { "epoch": 0.85, "learning_rate": 7.253915519696251e-06, "loss": 0.3913, "step": 3000 }, { "epoch": 0.85, "eval_loss": 0.37262919545173645, "eval_runtime": 210.8205, "eval_samples_per_second": 9.586, "eval_steps_per_second": 2.4, "step": 3000 }, { "epoch": 0.85, "learning_rate": 7.244423350735643e-06, "loss": 0.5203, "step": 3010 }, { "epoch": 0.85, "learning_rate": 7.234931181775036e-06, "loss": 0.2866, "step": 3020 }, { "epoch": 0.85, "learning_rate": 7.225439012814429e-06, "loss": 0.322, "step": 3030 }, { "epoch": 0.86, "learning_rate": 7.2159468438538215e-06, "loss": 0.4069, "step": 3040 }, { "epoch": 0.86, "learning_rate": 7.206454674893214e-06, "loss": 0.4716, "step": 3050 }, { "epoch": 0.86, "learning_rate": 7.196962505932607e-06, "loss": 0.6109, "step": 3060 }, { "epoch": 0.87, "learning_rate": 7.187470336971999e-06, "loss": 0.5786, "step": 3070 }, { "epoch": 0.87, "learning_rate": 7.177978168011391e-06, "loss": 0.5905, "step": 3080 }, { "epoch": 0.87, "learning_rate": 7.168485999050784e-06, "loss": 0.425, "step": 3090 }, { "epoch": 0.87, "learning_rate": 7.158993830090176e-06, "loss": 0.3144, "step": 3100 }, { "epoch": 0.88, "learning_rate": 7.149501661129569e-06, "loss": 0.4081, "step": 3110 }, { "epoch": 0.88, "learning_rate": 7.1400094921689614e-06, "loss": 0.3787, "step": 3120 }, { "epoch": 0.88, "learning_rate": 7.130517323208353e-06, "loss": 0.2301, "step": 3130 }, { "epoch": 0.89, "learning_rate": 7.121025154247746e-06, "loss": 0.318, "step": 3140 }, { "epoch": 0.89, "learning_rate": 7.111532985287138e-06, "loss": 0.4586, "step": 3150 }, { "epoch": 0.89, "learning_rate": 7.102040816326531e-06, "loss": 0.4389, "step": 3160 }, { "epoch": 0.89, "learning_rate": 7.0925486473659236e-06, "loss": 0.473, "step": 3170 }, { "epoch": 0.9, "learning_rate": 7.083056478405316e-06, "loss": 0.1068, "step": 3180 }, { "epoch": 0.9, "learning_rate": 7.073564309444708e-06, "loss": 0.243, "step": 3190 }, { "epoch": 0.9, "learning_rate": 7.064072140484101e-06, "loss": 0.3434, "step": 3200 }, { "epoch": 0.9, "learning_rate": 7.054579971523494e-06, "loss": 0.5638, "step": 3210 }, { "epoch": 0.91, "learning_rate": 7.0450878025628865e-06, "loss": 0.448, "step": 3220 }, { "epoch": 0.91, "learning_rate": 7.035595633602279e-06, "loss": 0.6347, "step": 3230 }, { "epoch": 0.91, "learning_rate": 7.026103464641672e-06, "loss": 0.7818, "step": 3240 }, { "epoch": 0.92, "learning_rate": 7.016611295681064e-06, "loss": 0.4482, "step": 3250 }, { "epoch": 0.92, "learning_rate": 7.007119126720456e-06, "loss": 0.6219, "step": 3260 }, { "epoch": 0.92, "learning_rate": 6.997626957759849e-06, "loss": 0.2313, "step": 3270 }, { "epoch": 0.92, "learning_rate": 6.988134788799241e-06, "loss": 0.1876, "step": 3280 }, { "epoch": 0.93, "learning_rate": 6.978642619838634e-06, "loss": 0.4771, "step": 3290 }, { "epoch": 0.93, "learning_rate": 6.9691504508780265e-06, "loss": 0.4634, "step": 3300 }, { "epoch": 0.93, "learning_rate": 6.959658281917418e-06, "loss": 0.489, "step": 3310 }, { "epoch": 0.94, "learning_rate": 6.950166112956811e-06, "loss": 0.3812, "step": 3320 }, { "epoch": 0.94, "learning_rate": 6.940673943996203e-06, "loss": 0.4829, "step": 3330 }, { "epoch": 0.94, "learning_rate": 6.931181775035596e-06, "loss": 0.495, "step": 3340 }, { "epoch": 0.94, "learning_rate": 6.921689606074989e-06, "loss": 0.4593, "step": 3350 }, { "epoch": 0.95, "learning_rate": 6.91219743711438e-06, "loss": 0.3818, "step": 3360 }, { "epoch": 0.95, "learning_rate": 6.902705268153773e-06, "loss": 0.3838, "step": 3370 }, { "epoch": 0.95, "learning_rate": 6.893213099193166e-06, "loss": 0.2638, "step": 3380 }, { "epoch": 0.96, "learning_rate": 6.883720930232559e-06, "loss": 0.3994, "step": 3390 }, { "epoch": 0.96, "learning_rate": 6.874228761271952e-06, "loss": 0.2879, "step": 3400 }, { "epoch": 0.96, "learning_rate": 6.864736592311344e-06, "loss": 0.5988, "step": 3410 }, { "epoch": 0.96, "learning_rate": 6.855244423350737e-06, "loss": 0.4441, "step": 3420 }, { "epoch": 0.97, "learning_rate": 6.8457522543901285e-06, "loss": 0.4185, "step": 3430 }, { "epoch": 0.97, "learning_rate": 6.836260085429521e-06, "loss": 0.4659, "step": 3440 }, { "epoch": 0.97, "learning_rate": 6.826767916468914e-06, "loss": 0.4107, "step": 3450 }, { "epoch": 0.98, "learning_rate": 6.817275747508306e-06, "loss": 0.2984, "step": 3460 }, { "epoch": 0.98, "learning_rate": 6.807783578547699e-06, "loss": 0.5467, "step": 3470 }, { "epoch": 0.98, "learning_rate": 6.7982914095870915e-06, "loss": 0.5183, "step": 3480 }, { "epoch": 0.98, "learning_rate": 6.788799240626483e-06, "loss": 0.1466, "step": 3490 }, { "epoch": 0.99, "learning_rate": 6.779307071665876e-06, "loss": 0.2805, "step": 3500 }, { "epoch": 0.99, "eval_loss": 0.36545732617378235, "eval_runtime": 211.5781, "eval_samples_per_second": 9.552, "eval_steps_per_second": 2.392, "step": 3500 }, { "epoch": 0.99, "learning_rate": 6.7698149027052685e-06, "loss": 0.7174, "step": 3510 }, { "epoch": 0.99, "learning_rate": 6.760322733744661e-06, "loss": 0.6719, "step": 3520 }, { "epoch": 1.0, "learning_rate": 6.750830564784054e-06, "loss": 0.4309, "step": 3530 }, { "epoch": 1.0, "learning_rate": 6.741338395823445e-06, "loss": 0.2661, "step": 3540 }, { "epoch": 1.0, "learning_rate": 6.731846226862838e-06, "loss": 0.5369, "step": 3550 }, { "epoch": 1.0, "learning_rate": 6.7223540579022314e-06, "loss": 0.3933, "step": 3560 }, { "epoch": 1.01, "learning_rate": 6.712861888941624e-06, "loss": 0.456, "step": 3570 }, { "epoch": 1.01, "learning_rate": 6.703369719981017e-06, "loss": 0.7618, "step": 3580 }, { "epoch": 1.01, "learning_rate": 6.693877551020409e-06, "loss": 0.5126, "step": 3590 }, { "epoch": 1.01, "learning_rate": 6.684385382059802e-06, "loss": 0.2889, "step": 3600 }, { "epoch": 1.02, "learning_rate": 6.6748932130991936e-06, "loss": 0.6109, "step": 3610 }, { "epoch": 1.02, "learning_rate": 6.665401044138586e-06, "loss": 0.3158, "step": 3620 }, { "epoch": 1.02, "learning_rate": 6.655908875177979e-06, "loss": 0.4721, "step": 3630 }, { "epoch": 1.03, "learning_rate": 6.646416706217371e-06, "loss": 0.4659, "step": 3640 }, { "epoch": 1.03, "learning_rate": 6.636924537256764e-06, "loss": 0.4783, "step": 3650 }, { "epoch": 1.03, "learning_rate": 6.627432368296156e-06, "loss": 0.3333, "step": 3660 }, { "epoch": 1.03, "learning_rate": 6.617940199335548e-06, "loss": 0.3447, "step": 3670 }, { "epoch": 1.04, "learning_rate": 6.608448030374941e-06, "loss": 0.5593, "step": 3680 }, { "epoch": 1.04, "learning_rate": 6.5989558614143335e-06, "loss": 0.3474, "step": 3690 }, { "epoch": 1.04, "learning_rate": 6.589463692453726e-06, "loss": 0.5889, "step": 3700 }, { "epoch": 1.05, "learning_rate": 6.579971523493119e-06, "loss": 0.1262, "step": 3710 }, { "epoch": 1.05, "learning_rate": 6.5704793545325104e-06, "loss": 0.5701, "step": 3720 }, { "epoch": 1.05, "learning_rate": 6.560987185571903e-06, "loss": 0.6356, "step": 3730 }, { "epoch": 1.05, "learning_rate": 6.5514950166112965e-06, "loss": 0.5827, "step": 3740 }, { "epoch": 1.06, "learning_rate": 6.542002847650689e-06, "loss": 0.4105, "step": 3750 }, { "epoch": 1.06, "learning_rate": 6.532510678690082e-06, "loss": 0.4191, "step": 3760 }, { "epoch": 1.06, "learning_rate": 6.523018509729474e-06, "loss": 0.3027, "step": 3770 }, { "epoch": 1.07, "learning_rate": 6.513526340768867e-06, "loss": 0.395, "step": 3780 }, { "epoch": 1.07, "learning_rate": 6.504034171808259e-06, "loss": 0.2587, "step": 3790 }, { "epoch": 1.07, "learning_rate": 6.494542002847651e-06, "loss": 0.2284, "step": 3800 }, { "epoch": 1.07, "learning_rate": 6.485049833887044e-06, "loss": 0.4834, "step": 3810 }, { "epoch": 1.08, "learning_rate": 6.475557664926436e-06, "loss": 0.1843, "step": 3820 }, { "epoch": 1.08, "learning_rate": 6.466065495965829e-06, "loss": 0.5196, "step": 3830 }, { "epoch": 1.08, "learning_rate": 6.456573327005221e-06, "loss": 0.4602, "step": 3840 }, { "epoch": 1.09, "learning_rate": 6.447081158044613e-06, "loss": 0.1406, "step": 3850 }, { "epoch": 1.09, "learning_rate": 6.437588989084006e-06, "loss": 0.3493, "step": 3860 }, { "epoch": 1.09, "learning_rate": 6.4280968201233985e-06, "loss": 0.4312, "step": 3870 }, { "epoch": 1.09, "learning_rate": 6.418604651162791e-06, "loss": 0.5843, "step": 3880 }, { "epoch": 1.1, "learning_rate": 6.409112482202183e-06, "loss": 0.4368, "step": 3890 }, { "epoch": 1.1, "learning_rate": 6.3996203132415755e-06, "loss": 0.4638, "step": 3900 }, { "epoch": 1.1, "learning_rate": 6.390128144280968e-06, "loss": 0.5721, "step": 3910 }, { "epoch": 1.11, "learning_rate": 6.3806359753203615e-06, "loss": 0.2774, "step": 3920 }, { "epoch": 1.11, "learning_rate": 6.371143806359754e-06, "loss": 0.641, "step": 3930 }, { "epoch": 1.11, "learning_rate": 6.361651637399147e-06, "loss": 0.3003, "step": 3940 }, { "epoch": 1.11, "learning_rate": 6.352159468438539e-06, "loss": 0.5912, "step": 3950 }, { "epoch": 1.12, "learning_rate": 6.342667299477931e-06, "loss": 0.5673, "step": 3960 }, { "epoch": 1.12, "learning_rate": 6.333175130517324e-06, "loss": 0.3721, "step": 3970 }, { "epoch": 1.12, "learning_rate": 6.323682961556716e-06, "loss": 0.5748, "step": 3980 }, { "epoch": 1.12, "learning_rate": 6.314190792596109e-06, "loss": 0.384, "step": 3990 }, { "epoch": 1.13, "learning_rate": 6.3046986236355014e-06, "loss": 0.6733, "step": 4000 }, { "epoch": 1.13, "eval_loss": 0.3598354458808899, "eval_runtime": 209.9575, "eval_samples_per_second": 9.626, "eval_steps_per_second": 2.41, "step": 4000 }, { "epoch": 1.13, "learning_rate": 6.295206454674894e-06, "loss": 0.29, "step": 4010 }, { "epoch": 1.13, "learning_rate": 6.285714285714286e-06, "loss": 0.2849, "step": 4020 }, { "epoch": 1.14, "learning_rate": 6.276222116753678e-06, "loss": 0.5834, "step": 4030 }, { "epoch": 1.14, "learning_rate": 6.266729947793071e-06, "loss": 0.1479, "step": 4040 }, { "epoch": 1.14, "learning_rate": 6.257237778832464e-06, "loss": 0.5803, "step": 4050 }, { "epoch": 1.14, "learning_rate": 6.247745609871856e-06, "loss": 0.2966, "step": 4060 }, { "epoch": 1.15, "learning_rate": 6.238253440911248e-06, "loss": 0.3872, "step": 4070 }, { "epoch": 1.15, "learning_rate": 6.2287612719506405e-06, "loss": 0.4973, "step": 4080 }, { "epoch": 1.15, "learning_rate": 6.219269102990034e-06, "loss": 0.6415, "step": 4090 }, { "epoch": 1.16, "learning_rate": 6.2097769340294266e-06, "loss": 0.4729, "step": 4100 }, { "epoch": 1.16, "learning_rate": 6.200284765068819e-06, "loss": 0.5401, "step": 4110 }, { "epoch": 1.16, "learning_rate": 6.190792596108212e-06, "loss": 0.2515, "step": 4120 }, { "epoch": 1.16, "learning_rate": 6.181300427147604e-06, "loss": 0.3167, "step": 4130 }, { "epoch": 1.17, "learning_rate": 6.171808258186996e-06, "loss": 0.6597, "step": 4140 }, { "epoch": 1.17, "learning_rate": 6.162316089226389e-06, "loss": 0.4192, "step": 4150 }, { "epoch": 1.17, "learning_rate": 6.152823920265781e-06, "loss": 0.2399, "step": 4160 }, { "epoch": 1.18, "learning_rate": 6.143331751305174e-06, "loss": 0.3981, "step": 4170 }, { "epoch": 1.18, "learning_rate": 6.1338395823445665e-06, "loss": 0.3968, "step": 4180 }, { "epoch": 1.18, "learning_rate": 6.124347413383958e-06, "loss": 0.3704, "step": 4190 }, { "epoch": 1.18, "learning_rate": 6.114855244423351e-06, "loss": 0.4162, "step": 4200 }, { "epoch": 1.19, "learning_rate": 6.1053630754627434e-06, "loss": 0.3179, "step": 4210 }, { "epoch": 1.19, "learning_rate": 6.095870906502136e-06, "loss": 0.4292, "step": 4220 }, { "epoch": 1.19, "learning_rate": 6.086378737541529e-06, "loss": 0.4461, "step": 4230 }, { "epoch": 1.2, "learning_rate": 6.076886568580921e-06, "loss": 0.4048, "step": 4240 }, { "epoch": 1.2, "learning_rate": 6.067394399620313e-06, "loss": 0.4935, "step": 4250 }, { "epoch": 1.2, "learning_rate": 6.0579022306597056e-06, "loss": 0.2367, "step": 4260 }, { "epoch": 1.2, "learning_rate": 6.048410061699099e-06, "loss": 0.354, "step": 4270 }, { "epoch": 1.21, "learning_rate": 6.038917892738492e-06, "loss": 0.705, "step": 4280 }, { "epoch": 1.21, "learning_rate": 6.029425723777884e-06, "loss": 0.5404, "step": 4290 }, { "epoch": 1.21, "learning_rate": 6.019933554817277e-06, "loss": 0.4148, "step": 4300 }, { "epoch": 1.22, "learning_rate": 6.010441385856669e-06, "loss": 0.2862, "step": 4310 }, { "epoch": 1.22, "learning_rate": 6.000949216896061e-06, "loss": 0.5574, "step": 4320 }, { "epoch": 1.22, "learning_rate": 5.991457047935454e-06, "loss": 0.3857, "step": 4330 }, { "epoch": 1.22, "learning_rate": 5.981964878974846e-06, "loss": 0.4672, "step": 4340 }, { "epoch": 1.23, "learning_rate": 5.972472710014239e-06, "loss": 0.5925, "step": 4350 }, { "epoch": 1.23, "learning_rate": 5.9629805410536315e-06, "loss": 0.4423, "step": 4360 }, { "epoch": 1.23, "learning_rate": 5.953488372093023e-06, "loss": 0.4828, "step": 4370 }, { "epoch": 1.23, "learning_rate": 5.943996203132416e-06, "loss": 0.5208, "step": 4380 }, { "epoch": 1.24, "learning_rate": 5.9345040341718085e-06, "loss": 0.3895, "step": 4390 }, { "epoch": 1.24, "learning_rate": 5.925011865211201e-06, "loss": 0.4667, "step": 4400 }, { "epoch": 1.24, "learning_rate": 5.915519696250594e-06, "loss": 0.5809, "step": 4410 }, { "epoch": 1.25, "learning_rate": 5.906027527289986e-06, "loss": 0.4157, "step": 4420 }, { "epoch": 1.25, "learning_rate": 5.896535358329378e-06, "loss": 0.2807, "step": 4430 }, { "epoch": 1.25, "learning_rate": 5.887043189368771e-06, "loss": 0.5036, "step": 4440 }, { "epoch": 1.25, "learning_rate": 5.877551020408164e-06, "loss": 0.3498, "step": 4450 }, { "epoch": 1.26, "learning_rate": 5.868058851447557e-06, "loss": 0.46, "step": 4460 }, { "epoch": 1.26, "learning_rate": 5.858566682486949e-06, "loss": 0.2416, "step": 4470 }, { "epoch": 1.26, "learning_rate": 5.849074513526342e-06, "loss": 0.3104, "step": 4480 }, { "epoch": 1.27, "learning_rate": 5.8395823445657344e-06, "loss": 0.6617, "step": 4490 }, { "epoch": 1.27, "learning_rate": 5.830090175605126e-06, "loss": 0.2603, "step": 4500 }, { "epoch": 1.27, "eval_loss": 0.3565267324447632, "eval_runtime": 210.4541, "eval_samples_per_second": 9.603, "eval_steps_per_second": 2.404, "step": 4500 }, { "epoch": 1.27, "learning_rate": 5.820598006644519e-06, "loss": 0.1701, "step": 4510 }, { "epoch": 1.27, "learning_rate": 5.811105837683911e-06, "loss": 0.3745, "step": 4520 }, { "epoch": 1.28, "learning_rate": 5.801613668723304e-06, "loss": 0.296, "step": 4530 }, { "epoch": 1.28, "learning_rate": 5.7921214997626966e-06, "loss": 0.1971, "step": 4540 }, { "epoch": 1.28, "learning_rate": 5.782629330802088e-06, "loss": 0.3824, "step": 4550 }, { "epoch": 1.29, "learning_rate": 5.773137161841481e-06, "loss": 0.4496, "step": 4560 }, { "epoch": 1.29, "learning_rate": 5.7636449928808735e-06, "loss": 0.3678, "step": 4570 }, { "epoch": 1.29, "learning_rate": 5.754152823920266e-06, "loss": 0.368, "step": 4580 }, { "epoch": 1.29, "learning_rate": 5.744660654959659e-06, "loss": 0.4836, "step": 4590 }, { "epoch": 1.3, "learning_rate": 5.7351684859990505e-06, "loss": 0.4723, "step": 4600 }, { "epoch": 1.3, "learning_rate": 5.725676317038443e-06, "loss": 0.2437, "step": 4610 }, { "epoch": 1.3, "learning_rate": 5.716184148077836e-06, "loss": 0.1526, "step": 4620 }, { "epoch": 1.31, "learning_rate": 5.706691979117229e-06, "loss": 0.1451, "step": 4630 }, { "epoch": 1.31, "learning_rate": 5.697199810156622e-06, "loss": 0.4266, "step": 4640 }, { "epoch": 1.31, "learning_rate": 5.687707641196014e-06, "loss": 0.5522, "step": 4650 }, { "epoch": 1.31, "learning_rate": 5.678215472235407e-06, "loss": 0.4187, "step": 4660 }, { "epoch": 1.32, "learning_rate": 5.668723303274799e-06, "loss": 0.3749, "step": 4670 }, { "epoch": 1.32, "learning_rate": 5.659231134314191e-06, "loss": 0.156, "step": 4680 }, { "epoch": 1.32, "learning_rate": 5.649738965353584e-06, "loss": 0.1152, "step": 4690 }, { "epoch": 1.33, "learning_rate": 5.6402467963929764e-06, "loss": 0.4565, "step": 4700 }, { "epoch": 1.33, "learning_rate": 5.630754627432369e-06, "loss": 0.328, "step": 4710 }, { "epoch": 1.33, "learning_rate": 5.621262458471762e-06, "loss": 0.4919, "step": 4720 }, { "epoch": 1.33, "learning_rate": 5.611770289511153e-06, "loss": 0.3646, "step": 4730 }, { "epoch": 1.34, "learning_rate": 5.602278120550546e-06, "loss": 0.3677, "step": 4740 }, { "epoch": 1.34, "learning_rate": 5.5927859515899386e-06, "loss": 0.2724, "step": 4750 }, { "epoch": 1.34, "learning_rate": 5.583293782629331e-06, "loss": 0.3406, "step": 4760 }, { "epoch": 1.34, "learning_rate": 5.573801613668724e-06, "loss": 0.4709, "step": 4770 }, { "epoch": 1.35, "learning_rate": 5.5643094447081155e-06, "loss": 0.3161, "step": 4780 }, { "epoch": 1.35, "learning_rate": 5.554817275747508e-06, "loss": 0.4555, "step": 4790 }, { "epoch": 1.35, "learning_rate": 5.545325106786901e-06, "loss": 0.4641, "step": 4800 }, { "epoch": 1.36, "learning_rate": 5.535832937826294e-06, "loss": 0.4858, "step": 4810 }, { "epoch": 1.36, "learning_rate": 5.526340768865687e-06, "loss": 0.4066, "step": 4820 }, { "epoch": 1.36, "learning_rate": 5.516848599905079e-06, "loss": 0.4246, "step": 4830 }, { "epoch": 1.36, "learning_rate": 5.507356430944472e-06, "loss": 0.3599, "step": 4840 }, { "epoch": 1.37, "learning_rate": 5.497864261983864e-06, "loss": 0.4388, "step": 4850 }, { "epoch": 1.37, "learning_rate": 5.488372093023256e-06, "loss": 0.2378, "step": 4860 }, { "epoch": 1.37, "learning_rate": 5.478879924062649e-06, "loss": 0.512, "step": 4870 }, { "epoch": 1.38, "learning_rate": 5.4693877551020415e-06, "loss": 0.5356, "step": 4880 }, { "epoch": 1.38, "learning_rate": 5.459895586141434e-06, "loss": 0.723, "step": 4890 }, { "epoch": 1.38, "learning_rate": 5.450403417180826e-06, "loss": 0.4442, "step": 4900 }, { "epoch": 1.38, "learning_rate": 5.440911248220218e-06, "loss": 0.5757, "step": 4910 }, { "epoch": 1.39, "learning_rate": 5.431419079259611e-06, "loss": 0.3083, "step": 4920 }, { "epoch": 1.39, "learning_rate": 5.421926910299004e-06, "loss": 0.3306, "step": 4930 }, { "epoch": 1.39, "learning_rate": 5.412434741338396e-06, "loss": 0.3638, "step": 4940 }, { "epoch": 1.4, "learning_rate": 5.402942572377789e-06, "loss": 0.3285, "step": 4950 }, { "epoch": 1.4, "learning_rate": 5.3934504034171805e-06, "loss": 0.5824, "step": 4960 }, { "epoch": 1.4, "learning_rate": 5.383958234456573e-06, "loss": 0.4002, "step": 4970 }, { "epoch": 1.4, "learning_rate": 5.374466065495966e-06, "loss": 0.1342, "step": 4980 }, { "epoch": 1.41, "learning_rate": 5.364973896535359e-06, "loss": 0.3491, "step": 4990 }, { "epoch": 1.41, "learning_rate": 5.355481727574752e-06, "loss": 0.5927, "step": 5000 }, { "epoch": 1.41, "eval_loss": 0.3536190092563629, "eval_runtime": 209.407, "eval_samples_per_second": 9.651, "eval_steps_per_second": 2.416, "step": 5000 }, { "epoch": 1.41, "learning_rate": 5.345989558614144e-06, "loss": 0.1028, "step": 5010 }, { "epoch": 1.42, "learning_rate": 5.336497389653537e-06, "loss": 0.206, "step": 5020 }, { "epoch": 1.42, "learning_rate": 5.327005220692929e-06, "loss": 0.2538, "step": 5030 }, { "epoch": 1.42, "learning_rate": 5.317513051732321e-06, "loss": 0.4764, "step": 5040 }, { "epoch": 1.42, "learning_rate": 5.308020882771714e-06, "loss": 0.4456, "step": 5050 }, { "epoch": 1.43, "learning_rate": 5.2985287138111065e-06, "loss": 0.2988, "step": 5060 }, { "epoch": 1.43, "learning_rate": 5.289036544850499e-06, "loss": 0.4721, "step": 5070 }, { "epoch": 1.43, "learning_rate": 5.279544375889891e-06, "loss": 0.4259, "step": 5080 }, { "epoch": 1.44, "learning_rate": 5.2700522069292835e-06, "loss": 0.1379, "step": 5090 }, { "epoch": 1.44, "learning_rate": 5.260560037968676e-06, "loss": 0.4755, "step": 5100 }, { "epoch": 1.44, "learning_rate": 5.251067869008069e-06, "loss": 0.2663, "step": 5110 }, { "epoch": 1.44, "learning_rate": 5.241575700047461e-06, "loss": 0.3229, "step": 5120 }, { "epoch": 1.45, "learning_rate": 5.232083531086853e-06, "loss": 0.2666, "step": 5130 }, { "epoch": 1.45, "learning_rate": 5.222591362126246e-06, "loss": 0.3572, "step": 5140 }, { "epoch": 1.45, "learning_rate": 5.213099193165638e-06, "loss": 0.4938, "step": 5150 }, { "epoch": 1.45, "learning_rate": 5.203607024205031e-06, "loss": 0.5069, "step": 5160 }, { "epoch": 1.46, "learning_rate": 5.194114855244424e-06, "loss": 0.6373, "step": 5170 }, { "epoch": 1.46, "learning_rate": 5.184622686283817e-06, "loss": 0.4395, "step": 5180 }, { "epoch": 1.46, "learning_rate": 5.175130517323209e-06, "loss": 0.3435, "step": 5190 }, { "epoch": 1.47, "learning_rate": 5.165638348362601e-06, "loss": 0.2505, "step": 5200 }, { "epoch": 1.47, "learning_rate": 5.156146179401994e-06, "loss": 0.4521, "step": 5210 }, { "epoch": 1.47, "learning_rate": 5.146654010441386e-06, "loss": 0.4649, "step": 5220 }, { "epoch": 1.47, "learning_rate": 5.137161841480779e-06, "loss": 0.429, "step": 5230 }, { "epoch": 1.48, "learning_rate": 5.1276696725201716e-06, "loss": 0.4792, "step": 5240 }, { "epoch": 1.48, "learning_rate": 5.118177503559564e-06, "loss": 0.4247, "step": 5250 }, { "epoch": 1.48, "learning_rate": 5.108685334598956e-06, "loss": 0.5332, "step": 5260 }, { "epoch": 1.49, "learning_rate": 5.0991931656383485e-06, "loss": 0.1256, "step": 5270 }, { "epoch": 1.49, "learning_rate": 5.089700996677741e-06, "loss": 0.6229, "step": 5280 }, { "epoch": 1.49, "learning_rate": 5.080208827717134e-06, "loss": 0.5491, "step": 5290 }, { "epoch": 1.49, "learning_rate": 5.070716658756526e-06, "loss": 0.5865, "step": 5300 }, { "epoch": 1.5, "learning_rate": 5.061224489795918e-06, "loss": 0.3055, "step": 5310 }, { "epoch": 1.5, "learning_rate": 5.051732320835311e-06, "loss": 0.2392, "step": 5320 }, { "epoch": 1.5, "learning_rate": 5.042240151874703e-06, "loss": 0.2454, "step": 5330 }, { "epoch": 1.51, "learning_rate": 5.032747982914097e-06, "loss": 0.3592, "step": 5340 }, { "epoch": 1.51, "learning_rate": 5.023255813953489e-06, "loss": 0.388, "step": 5350 }, { "epoch": 1.51, "learning_rate": 5.013763644992882e-06, "loss": 0.1818, "step": 5360 }, { "epoch": 1.51, "learning_rate": 5.0042714760322745e-06, "loss": 0.3422, "step": 5370 }, { "epoch": 1.52, "learning_rate": 4.994779307071666e-06, "loss": 0.4801, "step": 5380 }, { "epoch": 1.52, "learning_rate": 4.985287138111059e-06, "loss": 0.7783, "step": 5390 }, { "epoch": 1.52, "learning_rate": 4.975794969150451e-06, "loss": 0.1979, "step": 5400 }, { "epoch": 1.53, "learning_rate": 4.966302800189844e-06, "loss": 0.1784, "step": 5410 }, { "epoch": 1.53, "learning_rate": 4.956810631229237e-06, "loss": 0.3514, "step": 5420 }, { "epoch": 1.53, "learning_rate": 4.947318462268629e-06, "loss": 0.4134, "step": 5430 }, { "epoch": 1.53, "learning_rate": 4.937826293308021e-06, "loss": 0.3057, "step": 5440 }, { "epoch": 1.54, "learning_rate": 4.9283341243474135e-06, "loss": 0.5433, "step": 5450 }, { "epoch": 1.54, "learning_rate": 4.918841955386806e-06, "loss": 0.6126, "step": 5460 }, { "epoch": 1.54, "learning_rate": 4.909349786426199e-06, "loss": 0.2329, "step": 5470 }, { "epoch": 1.54, "learning_rate": 4.899857617465591e-06, "loss": 0.3048, "step": 5480 }, { "epoch": 1.55, "learning_rate": 4.890365448504984e-06, "loss": 0.6663, "step": 5490 }, { "epoch": 1.55, "learning_rate": 4.8808732795443765e-06, "loss": 0.3444, "step": 5500 }, { "epoch": 1.55, "eval_loss": 0.3520536720752716, "eval_runtime": 210.3626, "eval_samples_per_second": 9.607, "eval_steps_per_second": 2.405, "step": 5500 }, { "epoch": 1.55, "learning_rate": 4.871381110583769e-06, "loss": 0.7078, "step": 5510 }, { "epoch": 1.56, "learning_rate": 4.861888941623161e-06, "loss": 0.3893, "step": 5520 }, { "epoch": 1.56, "learning_rate": 4.8523967726625535e-06, "loss": 0.3299, "step": 5530 }, { "epoch": 1.56, "learning_rate": 4.842904603701946e-06, "loss": 0.2718, "step": 5540 }, { "epoch": 1.56, "learning_rate": 4.833412434741339e-06, "loss": 0.4207, "step": 5550 }, { "epoch": 1.57, "learning_rate": 4.823920265780731e-06, "loss": 0.2569, "step": 5560 }, { "epoch": 1.57, "learning_rate": 4.814428096820124e-06, "loss": 0.3158, "step": 5570 }, { "epoch": 1.57, "learning_rate": 4.8049359278595164e-06, "loss": 0.4117, "step": 5580 }, { "epoch": 1.58, "learning_rate": 4.795443758898909e-06, "loss": 0.4646, "step": 5590 }, { "epoch": 1.58, "learning_rate": 4.785951589938302e-06, "loss": 0.3877, "step": 5600 }, { "epoch": 1.58, "learning_rate": 4.776459420977693e-06, "loss": 0.2987, "step": 5610 }, { "epoch": 1.58, "learning_rate": 4.766967252017086e-06, "loss": 0.3859, "step": 5620 }, { "epoch": 1.59, "learning_rate": 4.757475083056479e-06, "loss": 0.138, "step": 5630 }, { "epoch": 1.59, "learning_rate": 4.747982914095871e-06, "loss": 0.2836, "step": 5640 }, { "epoch": 1.59, "learning_rate": 4.738490745135264e-06, "loss": 0.4993, "step": 5650 }, { "epoch": 1.6, "learning_rate": 4.728998576174656e-06, "loss": 0.4256, "step": 5660 }, { "epoch": 1.6, "learning_rate": 4.719506407214049e-06, "loss": 0.5983, "step": 5670 }, { "epoch": 1.6, "learning_rate": 4.7100142382534416e-06, "loss": 0.4265, "step": 5680 }, { "epoch": 1.6, "learning_rate": 4.700522069292834e-06, "loss": 0.2854, "step": 5690 }, { "epoch": 1.61, "learning_rate": 4.691029900332226e-06, "loss": 0.3984, "step": 5700 }, { "epoch": 1.61, "learning_rate": 4.6815377313716185e-06, "loss": 0.34, "step": 5710 }, { "epoch": 1.61, "learning_rate": 4.672045562411011e-06, "loss": 0.4355, "step": 5720 }, { "epoch": 1.62, "learning_rate": 4.662553393450404e-06, "loss": 0.4004, "step": 5730 }, { "epoch": 1.62, "learning_rate": 4.653061224489796e-06, "loss": 0.3462, "step": 5740 }, { "epoch": 1.62, "learning_rate": 4.643569055529189e-06, "loss": 0.3231, "step": 5750 }, { "epoch": 1.62, "learning_rate": 4.6340768865685815e-06, "loss": 0.6457, "step": 5760 }, { "epoch": 1.63, "learning_rate": 4.624584717607974e-06, "loss": 0.4086, "step": 5770 }, { "epoch": 1.63, "learning_rate": 4.615092548647367e-06, "loss": 0.2528, "step": 5780 }, { "epoch": 1.63, "learning_rate": 4.6056003796867584e-06, "loss": 0.5488, "step": 5790 }, { "epoch": 1.64, "learning_rate": 4.596108210726151e-06, "loss": 0.381, "step": 5800 }, { "epoch": 1.64, "learning_rate": 4.586616041765544e-06, "loss": 0.5675, "step": 5810 }, { "epoch": 1.64, "learning_rate": 4.577123872804936e-06, "loss": 0.5866, "step": 5820 }, { "epoch": 1.64, "learning_rate": 4.567631703844329e-06, "loss": 0.4035, "step": 5830 }, { "epoch": 1.65, "learning_rate": 4.558139534883721e-06, "loss": 0.2322, "step": 5840 }, { "epoch": 1.65, "learning_rate": 4.548647365923114e-06, "loss": 0.717, "step": 5850 }, { "epoch": 1.65, "learning_rate": 4.539155196962507e-06, "loss": 0.5347, "step": 5860 }, { "epoch": 1.65, "learning_rate": 4.529663028001899e-06, "loss": 0.2423, "step": 5870 }, { "epoch": 1.66, "learning_rate": 4.520170859041291e-06, "loss": 0.439, "step": 5880 }, { "epoch": 1.66, "learning_rate": 4.5106786900806835e-06, "loss": 0.4595, "step": 5890 }, { "epoch": 1.66, "learning_rate": 4.501186521120076e-06, "loss": 0.5906, "step": 5900 }, { "epoch": 1.67, "learning_rate": 4.491694352159469e-06, "loss": 0.5294, "step": 5910 }, { "epoch": 1.67, "learning_rate": 4.482202183198861e-06, "loss": 0.2951, "step": 5920 }, { "epoch": 1.67, "learning_rate": 4.472710014238254e-06, "loss": 0.6254, "step": 5930 }, { "epoch": 1.67, "learning_rate": 4.4632178452776465e-06, "loss": 0.5945, "step": 5940 }, { "epoch": 1.68, "learning_rate": 4.453725676317039e-06, "loss": 0.4814, "step": 5950 }, { "epoch": 1.68, "learning_rate": 4.444233507356432e-06, "loss": 0.4048, "step": 5960 }, { "epoch": 1.68, "learning_rate": 4.4347413383958235e-06, "loss": 0.1721, "step": 5970 }, { "epoch": 1.69, "learning_rate": 4.425249169435216e-06, "loss": 0.2362, "step": 5980 }, { "epoch": 1.69, "learning_rate": 4.415757000474609e-06, "loss": 0.2302, "step": 5990 }, { "epoch": 1.69, "learning_rate": 4.406264831514001e-06, "loss": 0.4041, "step": 6000 }, { "epoch": 1.69, "eval_loss": 0.34984728693962097, "eval_runtime": 212.0644, "eval_samples_per_second": 9.53, "eval_steps_per_second": 2.386, "step": 6000 }, { "epoch": 1.69, "learning_rate": 4.396772662553394e-06, "loss": 0.8092, "step": 6010 }, { "epoch": 1.7, "learning_rate": 4.3872804935927865e-06, "loss": 0.327, "step": 6020 }, { "epoch": 1.7, "learning_rate": 4.377788324632179e-06, "loss": 0.3231, "step": 6030 }, { "epoch": 1.7, "learning_rate": 4.368296155671572e-06, "loss": 0.4055, "step": 6040 }, { "epoch": 1.71, "learning_rate": 4.358803986710964e-06, "loss": 0.2245, "step": 6050 }, { "epoch": 1.71, "learning_rate": 4.349311817750356e-06, "loss": 0.4406, "step": 6060 }, { "epoch": 1.71, "learning_rate": 4.339819648789749e-06, "loss": 0.2078, "step": 6070 }, { "epoch": 1.71, "learning_rate": 4.330327479829141e-06, "loss": 0.5263, "step": 6080 }, { "epoch": 1.72, "learning_rate": 4.320835310868534e-06, "loss": 0.6044, "step": 6090 }, { "epoch": 1.72, "learning_rate": 4.311343141907926e-06, "loss": 0.3704, "step": 6100 }, { "epoch": 1.72, "learning_rate": 4.301850972947319e-06, "loss": 0.4697, "step": 6110 }, { "epoch": 1.73, "learning_rate": 4.2923588039867116e-06, "loss": 0.3077, "step": 6120 }, { "epoch": 1.73, "learning_rate": 4.282866635026104e-06, "loss": 0.4685, "step": 6130 }, { "epoch": 1.73, "learning_rate": 4.273374466065496e-06, "loss": 0.4972, "step": 6140 }, { "epoch": 1.73, "learning_rate": 4.2638822971048885e-06, "loss": 0.4597, "step": 6150 }, { "epoch": 1.74, "learning_rate": 4.254390128144281e-06, "loss": 0.2447, "step": 6160 }, { "epoch": 1.74, "learning_rate": 4.244897959183674e-06, "loss": 0.5126, "step": 6170 }, { "epoch": 1.74, "learning_rate": 4.235405790223066e-06, "loss": 0.3077, "step": 6180 }, { "epoch": 1.75, "learning_rate": 4.225913621262459e-06, "loss": 0.3864, "step": 6190 }, { "epoch": 1.75, "learning_rate": 4.2164214523018515e-06, "loss": 0.5483, "step": 6200 }, { "epoch": 1.75, "learning_rate": 4.206929283341244e-06, "loss": 0.5613, "step": 6210 }, { "epoch": 1.75, "learning_rate": 4.197437114380637e-06, "loss": 0.3505, "step": 6220 }, { "epoch": 1.76, "learning_rate": 4.1879449454200284e-06, "loss": 0.4028, "step": 6230 }, { "epoch": 1.76, "learning_rate": 4.178452776459421e-06, "loss": 0.3838, "step": 6240 }, { "epoch": 1.76, "learning_rate": 4.168960607498814e-06, "loss": 0.2545, "step": 6250 }, { "epoch": 1.76, "learning_rate": 4.159468438538206e-06, "loss": 0.3897, "step": 6260 }, { "epoch": 1.77, "learning_rate": 4.149976269577599e-06, "loss": 0.1108, "step": 6270 }, { "epoch": 1.77, "learning_rate": 4.1404841006169914e-06, "loss": 0.2364, "step": 6280 }, { "epoch": 1.77, "learning_rate": 4.130991931656384e-06, "loss": 0.4792, "step": 6290 }, { "epoch": 1.78, "learning_rate": 4.121499762695777e-06, "loss": 0.3424, "step": 6300 }, { "epoch": 1.78, "learning_rate": 4.112007593735169e-06, "loss": 0.3274, "step": 6310 }, { "epoch": 1.78, "learning_rate": 4.102515424774561e-06, "loss": 0.2145, "step": 6320 }, { "epoch": 1.78, "learning_rate": 4.0930232558139536e-06, "loss": 0.4916, "step": 6330 }, { "epoch": 1.79, "learning_rate": 4.083531086853346e-06, "loss": 0.4363, "step": 6340 }, { "epoch": 1.79, "learning_rate": 4.074038917892739e-06, "loss": 0.3422, "step": 6350 }, { "epoch": 1.79, "learning_rate": 4.064546748932131e-06, "loss": 0.4339, "step": 6360 }, { "epoch": 1.8, "learning_rate": 4.055054579971524e-06, "loss": 0.5902, "step": 6370 }, { "epoch": 1.8, "learning_rate": 4.0455624110109165e-06, "loss": 0.3237, "step": 6380 }, { "epoch": 1.8, "learning_rate": 4.036070242050309e-06, "loss": 0.4783, "step": 6390 }, { "epoch": 1.8, "learning_rate": 4.026578073089702e-06, "loss": 0.4535, "step": 6400 }, { "epoch": 1.81, "learning_rate": 4.0170859041290935e-06, "loss": 0.5848, "step": 6410 }, { "epoch": 1.81, "learning_rate": 4.007593735168486e-06, "loss": 0.501, "step": 6420 }, { "epoch": 1.81, "learning_rate": 3.998101566207879e-06, "loss": 0.5878, "step": 6430 }, { "epoch": 1.82, "learning_rate": 3.988609397247271e-06, "loss": 0.2525, "step": 6440 }, { "epoch": 1.82, "learning_rate": 3.979117228286664e-06, "loss": 0.1343, "step": 6450 }, { "epoch": 1.82, "learning_rate": 3.9696250593260565e-06, "loss": 0.4367, "step": 6460 }, { "epoch": 1.82, "learning_rate": 3.960132890365449e-06, "loss": 0.4518, "step": 6470 }, { "epoch": 1.83, "learning_rate": 3.950640721404842e-06, "loss": 0.1851, "step": 6480 }, { "epoch": 1.83, "learning_rate": 3.941148552444234e-06, "loss": 0.5702, "step": 6490 }, { "epoch": 1.83, "learning_rate": 3.931656383483626e-06, "loss": 0.5526, "step": 6500 }, { "epoch": 1.83, "eval_loss": 0.34791234135627747, "eval_runtime": 210.9344, "eval_samples_per_second": 9.581, "eval_steps_per_second": 2.399, "step": 6500 }, { "epoch": 1.84, "learning_rate": 3.922164214523019e-06, "loss": 0.2374, "step": 6510 }, { "epoch": 1.84, "learning_rate": 3.912672045562411e-06, "loss": 0.2343, "step": 6520 }, { "epoch": 1.84, "learning_rate": 3.903179876601804e-06, "loss": 0.3023, "step": 6530 }, { "epoch": 1.84, "learning_rate": 3.893687707641196e-06, "loss": 0.2294, "step": 6540 }, { "epoch": 1.85, "learning_rate": 3.884195538680589e-06, "loss": 0.2737, "step": 6550 }, { "epoch": 1.85, "learning_rate": 3.874703369719982e-06, "loss": 0.718, "step": 6560 }, { "epoch": 1.85, "learning_rate": 3.865211200759374e-06, "loss": 0.4965, "step": 6570 }, { "epoch": 1.86, "learning_rate": 3.855719031798767e-06, "loss": 0.5557, "step": 6580 }, { "epoch": 1.86, "learning_rate": 3.8462268628381585e-06, "loss": 0.3303, "step": 6590 }, { "epoch": 1.86, "learning_rate": 3.836734693877551e-06, "loss": 0.3357, "step": 6600 }, { "epoch": 1.86, "learning_rate": 3.827242524916944e-06, "loss": 0.1917, "step": 6610 }, { "epoch": 1.87, "learning_rate": 3.817750355956336e-06, "loss": 0.284, "step": 6620 }, { "epoch": 1.87, "learning_rate": 3.8082581869957285e-06, "loss": 0.4455, "step": 6630 }, { "epoch": 1.87, "learning_rate": 3.7987660180351215e-06, "loss": 0.2505, "step": 6640 }, { "epoch": 1.87, "learning_rate": 3.789273849074514e-06, "loss": 0.3804, "step": 6650 }, { "epoch": 1.88, "learning_rate": 3.7797816801139063e-06, "loss": 0.3043, "step": 6660 }, { "epoch": 1.88, "learning_rate": 3.770289511153299e-06, "loss": 0.1626, "step": 6670 }, { "epoch": 1.88, "learning_rate": 3.7607973421926915e-06, "loss": 0.2546, "step": 6680 }, { "epoch": 1.89, "learning_rate": 3.7513051732320836e-06, "loss": 0.4317, "step": 6690 }, { "epoch": 1.89, "learning_rate": 3.7418130042714762e-06, "loss": 0.3358, "step": 6700 }, { "epoch": 1.89, "learning_rate": 3.732320835310869e-06, "loss": 0.5867, "step": 6710 }, { "epoch": 1.89, "learning_rate": 3.722828666350261e-06, "loss": 0.6311, "step": 6720 }, { "epoch": 1.9, "learning_rate": 3.713336497389654e-06, "loss": 0.3264, "step": 6730 }, { "epoch": 1.9, "learning_rate": 3.7038443284290466e-06, "loss": 0.4179, "step": 6740 }, { "epoch": 1.9, "learning_rate": 3.694352159468439e-06, "loss": 0.372, "step": 6750 }, { "epoch": 1.91, "learning_rate": 3.6848599905078314e-06, "loss": 0.3102, "step": 6760 }, { "epoch": 1.91, "learning_rate": 3.675367821547224e-06, "loss": 0.494, "step": 6770 }, { "epoch": 1.91, "learning_rate": 3.665875652586616e-06, "loss": 0.2307, "step": 6780 }, { "epoch": 1.91, "learning_rate": 3.6563834836260088e-06, "loss": 0.4356, "step": 6790 }, { "epoch": 1.92, "learning_rate": 3.646891314665401e-06, "loss": 0.5974, "step": 6800 }, { "epoch": 1.92, "learning_rate": 3.6373991457047935e-06, "loss": 0.1553, "step": 6810 }, { "epoch": 1.92, "learning_rate": 3.6279069767441866e-06, "loss": 0.429, "step": 6820 }, { "epoch": 1.93, "learning_rate": 3.618414807783579e-06, "loss": 0.2103, "step": 6830 }, { "epoch": 1.93, "learning_rate": 3.6089226388229713e-06, "loss": 0.505, "step": 6840 }, { "epoch": 1.93, "learning_rate": 3.599430469862364e-06, "loss": 0.3072, "step": 6850 }, { "epoch": 1.93, "learning_rate": 3.5899383009017565e-06, "loss": 0.5361, "step": 6860 }, { "epoch": 1.94, "learning_rate": 3.5804461319411487e-06, "loss": 0.6273, "step": 6870 }, { "epoch": 1.94, "learning_rate": 3.5709539629805413e-06, "loss": 0.3035, "step": 6880 }, { "epoch": 1.94, "learning_rate": 3.5614617940199335e-06, "loss": 0.3123, "step": 6890 }, { "epoch": 1.95, "learning_rate": 3.551969625059326e-06, "loss": 0.4275, "step": 6900 }, { "epoch": 1.95, "learning_rate": 3.542477456098719e-06, "loss": 0.4825, "step": 6910 }, { "epoch": 1.95, "learning_rate": 3.5329852871381117e-06, "loss": 0.3058, "step": 6920 }, { "epoch": 1.95, "learning_rate": 3.523493118177504e-06, "loss": 0.4175, "step": 6930 }, { "epoch": 1.96, "learning_rate": 3.5140009492168964e-06, "loss": 0.3237, "step": 6940 }, { "epoch": 1.96, "learning_rate": 3.5045087802562886e-06, "loss": 0.5556, "step": 6950 }, { "epoch": 1.96, "learning_rate": 3.495016611295681e-06, "loss": 0.2021, "step": 6960 }, { "epoch": 1.97, "learning_rate": 3.485524442335074e-06, "loss": 0.4329, "step": 6970 }, { "epoch": 1.97, "learning_rate": 3.476032273374466e-06, "loss": 0.465, "step": 6980 }, { "epoch": 1.97, "learning_rate": 3.4665401044138586e-06, "loss": 0.4829, "step": 6990 }, { "epoch": 1.97, "learning_rate": 3.4570479354532516e-06, "loss": 0.3314, "step": 7000 }, { "epoch": 1.97, "eval_loss": 0.3463754951953888, "eval_runtime": 211.4262, "eval_samples_per_second": 9.559, "eval_steps_per_second": 2.393, "step": 7000 }, { "epoch": 1.98, "learning_rate": 3.447555766492644e-06, "loss": 0.3652, "step": 7010 }, { "epoch": 1.98, "learning_rate": 3.4380635975320364e-06, "loss": 0.3147, "step": 7020 }, { "epoch": 1.98, "learning_rate": 3.428571428571429e-06, "loss": 0.1345, "step": 7030 }, { "epoch": 1.98, "learning_rate": 3.419079259610821e-06, "loss": 0.361, "step": 7040 }, { "epoch": 1.99, "learning_rate": 3.4095870906502137e-06, "loss": 0.5192, "step": 7050 }, { "epoch": 1.99, "learning_rate": 3.4000949216896063e-06, "loss": 0.37, "step": 7060 }, { "epoch": 1.99, "learning_rate": 3.3906027527289985e-06, "loss": 0.4377, "step": 7070 }, { "epoch": 2.0, "learning_rate": 3.3811105837683915e-06, "loss": 0.5498, "step": 7080 }, { "epoch": 2.0, "learning_rate": 3.371618414807784e-06, "loss": 0.2211, "step": 7090 }, { "epoch": 2.0, "learning_rate": 3.3621262458471767e-06, "loss": 0.5511, "step": 7100 }, { "epoch": 2.0, "learning_rate": 3.352634076886569e-06, "loss": 0.4069, "step": 7110 }, { "epoch": 2.01, "learning_rate": 3.3431419079259615e-06, "loss": 0.51, "step": 7120 }, { "epoch": 2.01, "learning_rate": 3.3336497389653537e-06, "loss": 0.3028, "step": 7130 }, { "epoch": 2.01, "learning_rate": 3.3241575700047463e-06, "loss": 0.2983, "step": 7140 }, { "epoch": 2.02, "learning_rate": 3.314665401044139e-06, "loss": 0.7088, "step": 7150 }, { "epoch": 2.02, "learning_rate": 3.305173232083531e-06, "loss": 0.3242, "step": 7160 }, { "epoch": 2.02, "learning_rate": 3.295681063122924e-06, "loss": 0.3656, "step": 7170 }, { "epoch": 2.02, "learning_rate": 3.2861888941623166e-06, "loss": 0.5446, "step": 7180 }, { "epoch": 2.03, "learning_rate": 3.276696725201709e-06, "loss": 0.1479, "step": 7190 }, { "epoch": 2.03, "learning_rate": 3.2672045562411014e-06, "loss": 0.2724, "step": 7200 }, { "epoch": 2.03, "learning_rate": 3.257712387280494e-06, "loss": 0.4428, "step": 7210 }, { "epoch": 2.04, "learning_rate": 3.248220218319886e-06, "loss": 0.374, "step": 7220 }, { "epoch": 2.04, "learning_rate": 3.2387280493592788e-06, "loss": 0.4587, "step": 7230 }, { "epoch": 2.04, "learning_rate": 3.2292358803986714e-06, "loss": 0.2607, "step": 7240 }, { "epoch": 2.04, "learning_rate": 3.2197437114380635e-06, "loss": 0.31, "step": 7250 }, { "epoch": 2.05, "learning_rate": 3.2102515424774566e-06, "loss": 0.5392, "step": 7260 }, { "epoch": 2.05, "learning_rate": 3.200759373516849e-06, "loss": 0.3685, "step": 7270 }, { "epoch": 2.05, "learning_rate": 3.1912672045562413e-06, "loss": 0.6325, "step": 7280 }, { "epoch": 2.06, "learning_rate": 3.181775035595634e-06, "loss": 0.4223, "step": 7290 }, { "epoch": 2.06, "learning_rate": 3.1722828666350265e-06, "loss": 0.3727, "step": 7300 }, { "epoch": 2.06, "learning_rate": 3.1627906976744187e-06, "loss": 0.4869, "step": 7310 }, { "epoch": 2.06, "learning_rate": 3.1532985287138113e-06, "loss": 0.2286, "step": 7320 }, { "epoch": 2.07, "learning_rate": 3.143806359753204e-06, "loss": 0.4144, "step": 7330 }, { "epoch": 2.07, "learning_rate": 3.134314190792596e-06, "loss": 0.452, "step": 7340 }, { "epoch": 2.07, "learning_rate": 3.124822021831989e-06, "loss": 0.3295, "step": 7350 }, { "epoch": 2.07, "learning_rate": 3.1153298528713817e-06, "loss": 0.2194, "step": 7360 }, { "epoch": 2.08, "learning_rate": 3.105837683910774e-06, "loss": 0.2943, "step": 7370 }, { "epoch": 2.08, "learning_rate": 3.0963455149501664e-06, "loss": 0.4255, "step": 7380 }, { "epoch": 2.08, "learning_rate": 3.086853345989559e-06, "loss": 0.2137, "step": 7390 }, { "epoch": 2.09, "learning_rate": 3.0773611770289512e-06, "loss": 0.3923, "step": 7400 }, { "epoch": 2.09, "learning_rate": 3.067869008068344e-06, "loss": 0.3551, "step": 7410 }, { "epoch": 2.09, "learning_rate": 3.058376839107736e-06, "loss": 0.3381, "step": 7420 }, { "epoch": 2.09, "learning_rate": 3.0488846701471286e-06, "loss": 0.2882, "step": 7430 }, { "epoch": 2.1, "learning_rate": 3.0393925011865216e-06, "loss": 0.3432, "step": 7440 }, { "epoch": 2.1, "learning_rate": 3.029900332225914e-06, "loss": 0.5384, "step": 7450 }, { "epoch": 2.1, "learning_rate": 3.0204081632653064e-06, "loss": 0.3176, "step": 7460 }, { "epoch": 2.11, "learning_rate": 3.010915994304699e-06, "loss": 0.6058, "step": 7470 }, { "epoch": 2.11, "learning_rate": 3.0014238253440916e-06, "loss": 0.38, "step": 7480 }, { "epoch": 2.11, "learning_rate": 2.9919316563834837e-06, "loss": 0.3803, "step": 7490 }, { "epoch": 2.11, "learning_rate": 2.9824394874228763e-06, "loss": 0.4602, "step": 7500 }, { "epoch": 2.11, "eval_loss": 0.3481377065181732, "eval_runtime": 211.3751, "eval_samples_per_second": 9.561, "eval_steps_per_second": 2.394, "step": 7500 }, { "epoch": 2.12, "learning_rate": 2.9729473184622685e-06, "loss": 0.4522, "step": 7510 }, { "epoch": 2.12, "learning_rate": 2.963455149501661e-06, "loss": 0.227, "step": 7520 }, { "epoch": 2.12, "learning_rate": 2.953962980541054e-06, "loss": 0.4448, "step": 7530 }, { "epoch": 2.13, "learning_rate": 2.9444708115804467e-06, "loss": 0.538, "step": 7540 }, { "epoch": 2.13, "learning_rate": 2.934978642619839e-06, "loss": 0.3633, "step": 7550 }, { "epoch": 2.13, "learning_rate": 2.9254864736592315e-06, "loss": 0.2754, "step": 7560 }, { "epoch": 2.13, "learning_rate": 2.915994304698624e-06, "loss": 0.5962, "step": 7570 }, { "epoch": 2.14, "learning_rate": 2.9065021357380163e-06, "loss": 0.2812, "step": 7580 }, { "epoch": 2.14, "learning_rate": 2.897009966777409e-06, "loss": 0.5014, "step": 7590 }, { "epoch": 2.14, "learning_rate": 2.887517797816801e-06, "loss": 0.271, "step": 7600 }, { "epoch": 2.15, "learning_rate": 2.8780256288561936e-06, "loss": 0.2206, "step": 7610 }, { "epoch": 2.15, "learning_rate": 2.8685334598955866e-06, "loss": 0.3344, "step": 7620 }, { "epoch": 2.15, "learning_rate": 2.8590412909349792e-06, "loss": 0.4583, "step": 7630 }, { "epoch": 2.15, "learning_rate": 2.8495491219743714e-06, "loss": 0.4646, "step": 7640 }, { "epoch": 2.16, "learning_rate": 2.840056953013764e-06, "loss": 0.2786, "step": 7650 }, { "epoch": 2.16, "learning_rate": 2.830564784053156e-06, "loss": 0.2995, "step": 7660 }, { "epoch": 2.16, "learning_rate": 2.8210726150925488e-06, "loss": 0.2673, "step": 7670 }, { "epoch": 2.17, "learning_rate": 2.8115804461319414e-06, "loss": 0.3, "step": 7680 }, { "epoch": 2.17, "learning_rate": 2.8020882771713336e-06, "loss": 0.4938, "step": 7690 }, { "epoch": 2.17, "learning_rate": 2.792596108210726e-06, "loss": 0.5341, "step": 7700 }, { "epoch": 2.17, "learning_rate": 2.783103939250119e-06, "loss": 0.4789, "step": 7710 }, { "epoch": 2.18, "learning_rate": 2.7736117702895118e-06, "loss": 0.203, "step": 7720 }, { "epoch": 2.18, "learning_rate": 2.764119601328904e-06, "loss": 0.2085, "step": 7730 }, { "epoch": 2.18, "learning_rate": 2.7546274323682965e-06, "loss": 0.35, "step": 7740 }, { "epoch": 2.18, "learning_rate": 2.7451352634076887e-06, "loss": 0.5455, "step": 7750 }, { "epoch": 2.19, "learning_rate": 2.7356430944470813e-06, "loss": 0.6806, "step": 7760 }, { "epoch": 2.19, "learning_rate": 2.726150925486474e-06, "loss": 0.3908, "step": 7770 }, { "epoch": 2.19, "learning_rate": 2.716658756525866e-06, "loss": 0.3496, "step": 7780 }, { "epoch": 2.2, "learning_rate": 2.7071665875652587e-06, "loss": 0.3023, "step": 7790 }, { "epoch": 2.2, "learning_rate": 2.6976744186046517e-06, "loss": 0.2246, "step": 7800 }, { "epoch": 2.2, "learning_rate": 2.688182249644044e-06, "loss": 0.325, "step": 7810 }, { "epoch": 2.2, "learning_rate": 2.6786900806834365e-06, "loss": 0.344, "step": 7820 }, { "epoch": 2.21, "learning_rate": 2.669197911722829e-06, "loss": 0.496, "step": 7830 }, { "epoch": 2.21, "learning_rate": 2.6597057427622212e-06, "loss": 0.1879, "step": 7840 }, { "epoch": 2.21, "learning_rate": 2.650213573801614e-06, "loss": 0.3855, "step": 7850 }, { "epoch": 2.22, "learning_rate": 2.6407214048410064e-06, "loss": 0.5485, "step": 7860 }, { "epoch": 2.22, "learning_rate": 2.6312292358803986e-06, "loss": 0.2999, "step": 7870 }, { "epoch": 2.22, "learning_rate": 2.621737066919791e-06, "loss": 0.2614, "step": 7880 }, { "epoch": 2.22, "learning_rate": 2.6122448979591842e-06, "loss": 0.615, "step": 7890 }, { "epoch": 2.23, "learning_rate": 2.6027527289985764e-06, "loss": 0.4784, "step": 7900 }, { "epoch": 2.23, "learning_rate": 2.593260560037969e-06, "loss": 0.3825, "step": 7910 }, { "epoch": 2.23, "learning_rate": 2.5837683910773616e-06, "loss": 0.2182, "step": 7920 }, { "epoch": 2.24, "learning_rate": 2.5742762221167538e-06, "loss": 0.336, "step": 7930 }, { "epoch": 2.24, "learning_rate": 2.5647840531561463e-06, "loss": 0.3651, "step": 7940 }, { "epoch": 2.24, "learning_rate": 2.555291884195539e-06, "loss": 0.2512, "step": 7950 }, { "epoch": 2.24, "learning_rate": 2.545799715234931e-06, "loss": 0.4523, "step": 7960 }, { "epoch": 2.25, "learning_rate": 2.5363075462743237e-06, "loss": 0.3506, "step": 7970 }, { "epoch": 2.25, "learning_rate": 2.5268153773137167e-06, "loss": 0.3505, "step": 7980 }, { "epoch": 2.25, "learning_rate": 2.517323208353109e-06, "loss": 0.533, "step": 7990 }, { "epoch": 2.26, "learning_rate": 2.5078310393925015e-06, "loss": 0.4401, "step": 8000 }, { "epoch": 2.26, "eval_loss": 0.34744471311569214, "eval_runtime": 211.4984, "eval_samples_per_second": 9.556, "eval_steps_per_second": 2.392, "step": 8000 }, { "epoch": 2.26, "learning_rate": 2.498338870431894e-06, "loss": 0.3372, "step": 8010 }, { "epoch": 2.26, "learning_rate": 2.4888467014712863e-06, "loss": 0.3914, "step": 8020 }, { "epoch": 2.26, "learning_rate": 2.479354532510679e-06, "loss": 0.4832, "step": 8030 }, { "epoch": 2.27, "learning_rate": 2.4698623635500715e-06, "loss": 0.3643, "step": 8040 }, { "epoch": 2.27, "learning_rate": 2.460370194589464e-06, "loss": 0.3191, "step": 8050 }, { "epoch": 2.27, "learning_rate": 2.4508780256288562e-06, "loss": 0.4392, "step": 8060 }, { "epoch": 2.28, "learning_rate": 2.441385856668249e-06, "loss": 0.2072, "step": 8070 }, { "epoch": 2.28, "learning_rate": 2.4318936877076414e-06, "loss": 0.193, "step": 8080 }, { "epoch": 2.28, "learning_rate": 2.422401518747034e-06, "loss": 0.3453, "step": 8090 }, { "epoch": 2.28, "learning_rate": 2.4129093497864266e-06, "loss": 0.3169, "step": 8100 }, { "epoch": 2.29, "learning_rate": 2.403417180825819e-06, "loss": 0.3616, "step": 8110 }, { "epoch": 2.29, "learning_rate": 2.3939250118652114e-06, "loss": 0.2884, "step": 8120 }, { "epoch": 2.29, "learning_rate": 2.384432842904604e-06, "loss": 0.5743, "step": 8130 }, { "epoch": 2.29, "learning_rate": 2.3749406739439966e-06, "loss": 0.3009, "step": 8140 }, { "epoch": 2.3, "learning_rate": 2.3654485049833888e-06, "loss": 0.4973, "step": 8150 }, { "epoch": 2.3, "learning_rate": 2.3559563360227814e-06, "loss": 0.3357, "step": 8160 }, { "epoch": 2.3, "learning_rate": 2.346464167062174e-06, "loss": 0.3922, "step": 8170 }, { "epoch": 2.31, "learning_rate": 2.3369719981015665e-06, "loss": 0.3381, "step": 8180 }, { "epoch": 2.31, "learning_rate": 2.327479829140959e-06, "loss": 0.2991, "step": 8190 }, { "epoch": 2.31, "learning_rate": 2.3179876601803513e-06, "loss": 0.4372, "step": 8200 }, { "epoch": 2.31, "learning_rate": 2.308495491219744e-06, "loss": 0.2993, "step": 8210 }, { "epoch": 2.32, "learning_rate": 2.2990033222591365e-06, "loss": 0.3867, "step": 8220 }, { "epoch": 2.32, "learning_rate": 2.289511153298529e-06, "loss": 0.4566, "step": 8230 }, { "epoch": 2.32, "learning_rate": 2.2800189843379213e-06, "loss": 0.2101, "step": 8240 }, { "epoch": 2.33, "learning_rate": 2.270526815377314e-06, "loss": 0.2875, "step": 8250 }, { "epoch": 2.33, "learning_rate": 2.2610346464167065e-06, "loss": 0.4097, "step": 8260 }, { "epoch": 2.33, "learning_rate": 2.251542477456099e-06, "loss": 0.4438, "step": 8270 }, { "epoch": 2.33, "learning_rate": 2.2420503084954912e-06, "loss": 0.4007, "step": 8280 }, { "epoch": 2.34, "learning_rate": 2.232558139534884e-06, "loss": 0.3578, "step": 8290 }, { "epoch": 2.34, "learning_rate": 2.2230659705742764e-06, "loss": 0.4336, "step": 8300 }, { "epoch": 2.34, "learning_rate": 2.213573801613669e-06, "loss": 0.6333, "step": 8310 }, { "epoch": 2.35, "learning_rate": 2.2040816326530616e-06, "loss": 0.1238, "step": 8320 }, { "epoch": 2.35, "learning_rate": 2.194589463692454e-06, "loss": 0.5357, "step": 8330 }, { "epoch": 2.35, "learning_rate": 2.1850972947318464e-06, "loss": 0.2623, "step": 8340 }, { "epoch": 2.35, "learning_rate": 2.1756051257712386e-06, "loss": 0.2794, "step": 8350 }, { "epoch": 2.36, "learning_rate": 2.1661129568106316e-06, "loss": 0.489, "step": 8360 }, { "epoch": 2.36, "learning_rate": 2.1566207878500238e-06, "loss": 0.3155, "step": 8370 }, { "epoch": 2.36, "learning_rate": 2.1471286188894164e-06, "loss": 0.2944, "step": 8380 }, { "epoch": 2.37, "learning_rate": 2.137636449928809e-06, "loss": 0.4045, "step": 8390 }, { "epoch": 2.37, "learning_rate": 2.1281442809682016e-06, "loss": 0.3847, "step": 8400 }, { "epoch": 2.37, "learning_rate": 2.118652112007594e-06, "loss": 0.2581, "step": 8410 }, { "epoch": 2.37, "learning_rate": 2.1091599430469863e-06, "loss": 0.1977, "step": 8420 }, { "epoch": 2.38, "learning_rate": 2.099667774086379e-06, "loss": 0.1456, "step": 8430 }, { "epoch": 2.38, "learning_rate": 2.090175605125771e-06, "loss": 0.3576, "step": 8440 }, { "epoch": 2.38, "learning_rate": 2.080683436165164e-06, "loss": 0.1775, "step": 8450 }, { "epoch": 2.39, "learning_rate": 2.0711912672045563e-06, "loss": 0.153, "step": 8460 }, { "epoch": 2.39, "learning_rate": 2.061699098243949e-06, "loss": 0.4822, "step": 8470 }, { "epoch": 2.39, "learning_rate": 2.0522069292833415e-06, "loss": 0.3706, "step": 8480 }, { "epoch": 2.39, "learning_rate": 2.042714760322734e-06, "loss": 0.5487, "step": 8490 }, { "epoch": 2.4, "learning_rate": 2.0332225913621267e-06, "loss": 0.1947, "step": 8500 }, { "epoch": 2.4, "eval_loss": 0.3472154438495636, "eval_runtime": 210.133, "eval_samples_per_second": 9.618, "eval_steps_per_second": 2.408, "step": 8500 }, { "epoch": 2.4, "learning_rate": 2.023730422401519e-06, "loss": 0.3501, "step": 8510 }, { "epoch": 2.4, "learning_rate": 2.0142382534409114e-06, "loss": 0.3572, "step": 8520 }, { "epoch": 2.4, "learning_rate": 2.0047460844803036e-06, "loss": 0.4292, "step": 8530 }, { "epoch": 2.41, "learning_rate": 1.9952539155196966e-06, "loss": 0.202, "step": 8540 }, { "epoch": 2.41, "learning_rate": 1.985761746559089e-06, "loss": 0.5637, "step": 8550 }, { "epoch": 2.41, "learning_rate": 1.9762695775984814e-06, "loss": 0.5057, "step": 8560 }, { "epoch": 2.42, "learning_rate": 1.966777408637874e-06, "loss": 0.2232, "step": 8570 }, { "epoch": 2.42, "learning_rate": 1.9572852396772666e-06, "loss": 0.3422, "step": 8580 }, { "epoch": 2.42, "learning_rate": 1.9477930707166588e-06, "loss": 0.282, "step": 8590 }, { "epoch": 2.42, "learning_rate": 1.9383009017560514e-06, "loss": 0.6693, "step": 8600 }, { "epoch": 2.43, "learning_rate": 1.928808732795444e-06, "loss": 0.3485, "step": 8610 }, { "epoch": 2.43, "learning_rate": 1.9193165638348366e-06, "loss": 0.5767, "step": 8620 }, { "epoch": 2.43, "learning_rate": 1.909824394874229e-06, "loss": 0.3807, "step": 8630 }, { "epoch": 2.44, "learning_rate": 1.9003322259136213e-06, "loss": 0.2077, "step": 8640 }, { "epoch": 2.44, "learning_rate": 1.890840056953014e-06, "loss": 0.2414, "step": 8650 }, { "epoch": 2.44, "learning_rate": 1.8813478879924063e-06, "loss": 0.3187, "step": 8660 }, { "epoch": 2.44, "learning_rate": 1.8718557190317991e-06, "loss": 0.5724, "step": 8670 }, { "epoch": 2.45, "learning_rate": 1.8623635500711915e-06, "loss": 0.55, "step": 8680 }, { "epoch": 2.45, "learning_rate": 1.8528713811105839e-06, "loss": 0.386, "step": 8690 }, { "epoch": 2.45, "learning_rate": 1.8433792121499763e-06, "loss": 0.1704, "step": 8700 }, { "epoch": 2.46, "learning_rate": 1.833887043189369e-06, "loss": 0.5093, "step": 8710 }, { "epoch": 2.46, "learning_rate": 1.8243948742287615e-06, "loss": 0.2857, "step": 8720 }, { "epoch": 2.46, "learning_rate": 1.8149027052681538e-06, "loss": 0.4402, "step": 8730 }, { "epoch": 2.46, "learning_rate": 1.8054105363075464e-06, "loss": 0.3786, "step": 8740 }, { "epoch": 2.47, "learning_rate": 1.7959183673469388e-06, "loss": 0.443, "step": 8750 }, { "epoch": 2.47, "learning_rate": 1.7864261983863314e-06, "loss": 0.1684, "step": 8760 }, { "epoch": 2.47, "learning_rate": 1.776934029425724e-06, "loss": 0.4989, "step": 8770 }, { "epoch": 2.48, "learning_rate": 1.7674418604651164e-06, "loss": 0.3821, "step": 8780 }, { "epoch": 2.48, "learning_rate": 1.7579496915045088e-06, "loss": 0.3299, "step": 8790 }, { "epoch": 2.48, "learning_rate": 1.7484575225439016e-06, "loss": 0.4878, "step": 8800 }, { "epoch": 2.48, "learning_rate": 1.738965353583294e-06, "loss": 0.3214, "step": 8810 }, { "epoch": 2.49, "learning_rate": 1.7294731846226864e-06, "loss": 0.3999, "step": 8820 }, { "epoch": 2.49, "learning_rate": 1.719981015662079e-06, "loss": 0.2993, "step": 8830 }, { "epoch": 2.49, "learning_rate": 1.7104888467014713e-06, "loss": 0.415, "step": 8840 }, { "epoch": 2.5, "learning_rate": 1.700996677740864e-06, "loss": 0.29, "step": 8850 }, { "epoch": 2.5, "learning_rate": 1.6915045087802565e-06, "loss": 0.3722, "step": 8860 }, { "epoch": 2.5, "learning_rate": 1.682012339819649e-06, "loss": 0.3986, "step": 8870 }, { "epoch": 2.5, "learning_rate": 1.6725201708590413e-06, "loss": 0.2658, "step": 8880 }, { "epoch": 2.51, "learning_rate": 1.6630280018984341e-06, "loss": 0.4891, "step": 8890 }, { "epoch": 2.51, "learning_rate": 1.6535358329378265e-06, "loss": 0.3952, "step": 8900 }, { "epoch": 2.51, "learning_rate": 1.6440436639772189e-06, "loss": 0.1337, "step": 8910 }, { "epoch": 2.51, "learning_rate": 1.6345514950166113e-06, "loss": 0.358, "step": 8920 }, { "epoch": 2.52, "learning_rate": 1.6250593260560039e-06, "loss": 0.4691, "step": 8930 }, { "epoch": 2.52, "learning_rate": 1.6155671570953965e-06, "loss": 0.3327, "step": 8940 }, { "epoch": 2.52, "learning_rate": 1.606074988134789e-06, "loss": 0.5424, "step": 8950 }, { "epoch": 2.53, "learning_rate": 1.5965828191741814e-06, "loss": 0.4439, "step": 8960 }, { "epoch": 2.53, "learning_rate": 1.5870906502135738e-06, "loss": 0.7304, "step": 8970 }, { "epoch": 2.53, "learning_rate": 1.5775984812529666e-06, "loss": 0.2145, "step": 8980 }, { "epoch": 2.53, "learning_rate": 1.568106312292359e-06, "loss": 0.33, "step": 8990 }, { "epoch": 2.54, "learning_rate": 1.5586141433317514e-06, "loss": 0.4144, "step": 9000 }, { "epoch": 2.54, "eval_loss": 0.3469783067703247, "eval_runtime": 210.9536, "eval_samples_per_second": 9.58, "eval_steps_per_second": 2.399, "step": 9000 }, { "epoch": 2.54, "learning_rate": 1.5491219743711438e-06, "loss": 0.4365, "step": 9010 }, { "epoch": 2.54, "learning_rate": 1.5396298054105364e-06, "loss": 0.306, "step": 9020 }, { "epoch": 2.55, "learning_rate": 1.530137636449929e-06, "loss": 0.5146, "step": 9030 }, { "epoch": 2.55, "learning_rate": 1.5206454674893214e-06, "loss": 0.2689, "step": 9040 }, { "epoch": 2.55, "learning_rate": 1.511153298528714e-06, "loss": 0.2034, "step": 9050 }, { "epoch": 2.55, "learning_rate": 1.5016611295681064e-06, "loss": 0.4136, "step": 9060 }, { "epoch": 2.56, "learning_rate": 1.492168960607499e-06, "loss": 0.2729, "step": 9070 }, { "epoch": 2.56, "learning_rate": 1.4826767916468915e-06, "loss": 0.6817, "step": 9080 }, { "epoch": 2.56, "learning_rate": 1.473184622686284e-06, "loss": 0.3407, "step": 9090 }, { "epoch": 2.57, "learning_rate": 1.4636924537256763e-06, "loss": 0.4404, "step": 9100 }, { "epoch": 2.57, "learning_rate": 1.4542002847650687e-06, "loss": 0.4365, "step": 9110 }, { "epoch": 2.57, "learning_rate": 1.4447081158044615e-06, "loss": 0.318, "step": 9120 }, { "epoch": 2.57, "learning_rate": 1.435215946843854e-06, "loss": 0.7077, "step": 9130 }, { "epoch": 2.58, "learning_rate": 1.4257237778832465e-06, "loss": 0.4964, "step": 9140 }, { "epoch": 2.58, "learning_rate": 1.4162316089226389e-06, "loss": 0.5746, "step": 9150 }, { "epoch": 2.58, "learning_rate": 1.4067394399620315e-06, "loss": 0.349, "step": 9160 }, { "epoch": 2.59, "learning_rate": 1.397247271001424e-06, "loss": 0.4112, "step": 9170 }, { "epoch": 2.59, "learning_rate": 1.3877551020408165e-06, "loss": 0.3403, "step": 9180 }, { "epoch": 2.59, "learning_rate": 1.3782629330802088e-06, "loss": 0.4245, "step": 9190 }, { "epoch": 2.59, "learning_rate": 1.3687707641196012e-06, "loss": 0.473, "step": 9200 }, { "epoch": 2.6, "learning_rate": 1.359278595158994e-06, "loss": 0.8119, "step": 9210 }, { "epoch": 2.6, "learning_rate": 1.3497864261983864e-06, "loss": 0.435, "step": 9220 }, { "epoch": 2.6, "learning_rate": 1.3402942572377788e-06, "loss": 0.2139, "step": 9230 }, { "epoch": 2.61, "learning_rate": 1.3308020882771714e-06, "loss": 0.4287, "step": 9240 }, { "epoch": 2.61, "learning_rate": 1.321309919316564e-06, "loss": 0.2869, "step": 9250 }, { "epoch": 2.61, "learning_rate": 1.3118177503559566e-06, "loss": 0.1564, "step": 9260 }, { "epoch": 2.61, "learning_rate": 1.302325581395349e-06, "loss": 0.4634, "step": 9270 }, { "epoch": 2.62, "learning_rate": 1.2928334124347414e-06, "loss": 0.1519, "step": 9280 }, { "epoch": 2.62, "learning_rate": 1.2833412434741342e-06, "loss": 0.3904, "step": 9290 }, { "epoch": 2.62, "learning_rate": 1.2738490745135266e-06, "loss": 0.227, "step": 9300 }, { "epoch": 2.62, "learning_rate": 1.264356905552919e-06, "loss": 0.5029, "step": 9310 }, { "epoch": 2.63, "learning_rate": 1.2548647365923113e-06, "loss": 0.4979, "step": 9320 }, { "epoch": 2.63, "learning_rate": 1.245372567631704e-06, "loss": 0.4358, "step": 9330 }, { "epoch": 2.63, "learning_rate": 1.2358803986710965e-06, "loss": 0.304, "step": 9340 }, { "epoch": 2.64, "learning_rate": 1.226388229710489e-06, "loss": 0.6384, "step": 9350 }, { "epoch": 2.64, "learning_rate": 1.2168960607498815e-06, "loss": 0.1619, "step": 9360 }, { "epoch": 2.64, "learning_rate": 1.207403891789274e-06, "loss": 0.2769, "step": 9370 }, { "epoch": 2.64, "learning_rate": 1.1979117228286665e-06, "loss": 0.3026, "step": 9380 }, { "epoch": 2.65, "learning_rate": 1.188419553868059e-06, "loss": 0.3886, "step": 9390 }, { "epoch": 2.65, "learning_rate": 1.1789273849074515e-06, "loss": 0.1441, "step": 9400 }, { "epoch": 2.65, "learning_rate": 1.1694352159468438e-06, "loss": 0.3779, "step": 9410 }, { "epoch": 2.66, "learning_rate": 1.1599430469862364e-06, "loss": 0.3901, "step": 9420 }, { "epoch": 2.66, "learning_rate": 1.1504508780256288e-06, "loss": 0.2091, "step": 9430 }, { "epoch": 2.66, "learning_rate": 1.1409587090650214e-06, "loss": 0.3605, "step": 9440 }, { "epoch": 2.66, "learning_rate": 1.131466540104414e-06, "loss": 0.416, "step": 9450 }, { "epoch": 2.67, "learning_rate": 1.1219743711438064e-06, "loss": 0.2775, "step": 9460 }, { "epoch": 2.67, "learning_rate": 1.112482202183199e-06, "loss": 0.4789, "step": 9470 }, { "epoch": 2.67, "learning_rate": 1.1029900332225916e-06, "loss": 0.4778, "step": 9480 }, { "epoch": 2.68, "learning_rate": 1.093497864261984e-06, "loss": 0.3076, "step": 9490 }, { "epoch": 2.68, "learning_rate": 1.0840056953013764e-06, "loss": 0.3726, "step": 9500 }, { "epoch": 2.68, "eval_loss": 0.34603193402290344, "eval_runtime": 209.9907, "eval_samples_per_second": 9.624, "eval_steps_per_second": 2.41, "step": 9500 }, { "epoch": 2.68, "learning_rate": 1.074513526340769e-06, "loss": 0.4906, "step": 9510 }, { "epoch": 2.68, "learning_rate": 1.0650213573801613e-06, "loss": 0.3415, "step": 9520 }, { "epoch": 2.69, "learning_rate": 1.055529188419554e-06, "loss": 0.2651, "step": 9530 }, { "epoch": 2.69, "learning_rate": 1.0460370194589463e-06, "loss": 0.3119, "step": 9540 }, { "epoch": 2.69, "learning_rate": 1.036544850498339e-06, "loss": 0.3054, "step": 9550 }, { "epoch": 2.7, "learning_rate": 1.0270526815377315e-06, "loss": 0.3226, "step": 9560 }, { "epoch": 2.7, "learning_rate": 1.0175605125771241e-06, "loss": 0.5005, "step": 9570 }, { "epoch": 2.7, "learning_rate": 1.0080683436165165e-06, "loss": 0.2753, "step": 9580 }, { "epoch": 2.7, "learning_rate": 9.985761746559089e-07, "loss": 0.2687, "step": 9590 }, { "epoch": 2.71, "learning_rate": 9.890840056953015e-07, "loss": 0.3084, "step": 9600 }, { "epoch": 2.71, "learning_rate": 9.795918367346939e-07, "loss": 0.3602, "step": 9610 }, { "epoch": 2.71, "learning_rate": 9.700996677740865e-07, "loss": 0.3026, "step": 9620 }, { "epoch": 2.71, "learning_rate": 9.606074988134788e-07, "loss": 0.5057, "step": 9630 }, { "epoch": 2.72, "learning_rate": 9.511153298528716e-07, "loss": 0.3735, "step": 9640 }, { "epoch": 2.72, "learning_rate": 9.416231608922639e-07, "loss": 0.5268, "step": 9650 }, { "epoch": 2.72, "learning_rate": 9.321309919316565e-07, "loss": 0.4173, "step": 9660 }, { "epoch": 2.73, "learning_rate": 9.226388229710489e-07, "loss": 0.3529, "step": 9670 }, { "epoch": 2.73, "learning_rate": 9.131466540104414e-07, "loss": 0.3961, "step": 9680 }, { "epoch": 2.73, "learning_rate": 9.03654485049834e-07, "loss": 0.4331, "step": 9690 }, { "epoch": 2.73, "learning_rate": 8.941623160892264e-07, "loss": 0.3743, "step": 9700 }, { "epoch": 2.74, "learning_rate": 8.84670147128619e-07, "loss": 0.2121, "step": 9710 }, { "epoch": 2.74, "learning_rate": 8.751779781680114e-07, "loss": 0.4233, "step": 9720 }, { "epoch": 2.74, "learning_rate": 8.65685809207404e-07, "loss": 0.3716, "step": 9730 }, { "epoch": 2.75, "learning_rate": 8.561936402467965e-07, "loss": 0.275, "step": 9740 }, { "epoch": 2.75, "learning_rate": 8.467014712861891e-07, "loss": 0.2921, "step": 9750 }, { "epoch": 2.75, "learning_rate": 8.372093023255814e-07, "loss": 0.3295, "step": 9760 }, { "epoch": 2.75, "learning_rate": 8.27717133364974e-07, "loss": 0.4326, "step": 9770 }, { "epoch": 2.76, "learning_rate": 8.182249644043664e-07, "loss": 0.4383, "step": 9780 }, { "epoch": 2.76, "learning_rate": 8.087327954437589e-07, "loss": 0.3254, "step": 9790 }, { "epoch": 2.76, "learning_rate": 7.992406264831515e-07, "loss": 0.3985, "step": 9800 }, { "epoch": 2.77, "learning_rate": 7.897484575225439e-07, "loss": 0.4201, "step": 9810 }, { "epoch": 2.77, "learning_rate": 7.802562885619365e-07, "loss": 0.15, "step": 9820 }, { "epoch": 2.77, "learning_rate": 7.70764119601329e-07, "loss": 0.5237, "step": 9830 }, { "epoch": 2.77, "learning_rate": 7.612719506407215e-07, "loss": 0.3785, "step": 9840 }, { "epoch": 2.78, "learning_rate": 7.51779781680114e-07, "loss": 0.508, "step": 9850 }, { "epoch": 2.78, "learning_rate": 7.422876127195066e-07, "loss": 0.3134, "step": 9860 }, { "epoch": 2.78, "learning_rate": 7.327954437588989e-07, "loss": 0.4777, "step": 9870 }, { "epoch": 2.79, "learning_rate": 7.233032747982914e-07, "loss": 0.2723, "step": 9880 }, { "epoch": 2.79, "learning_rate": 7.13811105837684e-07, "loss": 0.2647, "step": 9890 }, { "epoch": 2.79, "learning_rate": 7.043189368770764e-07, "loss": 0.4874, "step": 9900 }, { "epoch": 2.79, "learning_rate": 6.94826767916469e-07, "loss": 0.0944, "step": 9910 }, { "epoch": 2.8, "learning_rate": 6.853345989558614e-07, "loss": 0.2228, "step": 9920 }, { "epoch": 2.8, "learning_rate": 6.75842429995254e-07, "loss": 0.3151, "step": 9930 }, { "epoch": 2.8, "learning_rate": 6.663502610346465e-07, "loss": 0.3432, "step": 9940 }, { "epoch": 2.81, "learning_rate": 6.568580920740391e-07, "loss": 0.4183, "step": 9950 }, { "epoch": 2.81, "learning_rate": 6.473659231134315e-07, "loss": 0.2398, "step": 9960 }, { "epoch": 2.81, "learning_rate": 6.37873754152824e-07, "loss": 0.2553, "step": 9970 }, { "epoch": 2.81, "learning_rate": 6.283815851922164e-07, "loss": 0.2551, "step": 9980 }, { "epoch": 2.82, "learning_rate": 6.18889416231609e-07, "loss": 0.4634, "step": 9990 }, { "epoch": 2.82, "learning_rate": 6.093972472710015e-07, "loss": 0.418, "step": 10000 }, { "epoch": 2.82, "eval_loss": 0.34635499119758606, "eval_runtime": 210.2528, "eval_samples_per_second": 9.612, "eval_steps_per_second": 2.407, "step": 10000 }, { "epoch": 2.82, "learning_rate": 5.99905078310394e-07, "loss": 0.3378, "step": 10010 }, { "epoch": 2.82, "learning_rate": 5.904129093497864e-07, "loss": 0.3378, "step": 10020 }, { "epoch": 2.83, "learning_rate": 5.809207403891789e-07, "loss": 0.1693, "step": 10030 }, { "epoch": 2.83, "learning_rate": 5.714285714285715e-07, "loss": 0.3012, "step": 10040 }, { "epoch": 2.83, "learning_rate": 5.61936402467964e-07, "loss": 0.2447, "step": 10050 }, { "epoch": 2.84, "learning_rate": 5.524442335073565e-07, "loss": 0.2869, "step": 10060 }, { "epoch": 2.84, "learning_rate": 5.42952064546749e-07, "loss": 0.3656, "step": 10070 }, { "epoch": 2.84, "learning_rate": 5.334598955861415e-07, "loss": 0.1434, "step": 10080 }, { "epoch": 2.84, "learning_rate": 5.239677266255339e-07, "loss": 0.1777, "step": 10090 }, { "epoch": 2.85, "learning_rate": 5.144755576649265e-07, "loss": 0.5208, "step": 10100 }, { "epoch": 2.85, "learning_rate": 5.04983388704319e-07, "loss": 0.5794, "step": 10110 }, { "epoch": 2.85, "learning_rate": 4.954912197437114e-07, "loss": 0.2979, "step": 10120 }, { "epoch": 2.86, "learning_rate": 4.859990507831039e-07, "loss": 0.2351, "step": 10130 }, { "epoch": 2.86, "learning_rate": 4.7650688182249645e-07, "loss": 0.2441, "step": 10140 }, { "epoch": 2.86, "learning_rate": 4.67014712861889e-07, "loss": 0.3115, "step": 10150 }, { "epoch": 2.86, "learning_rate": 4.575225439012815e-07, "loss": 0.2808, "step": 10160 }, { "epoch": 2.87, "learning_rate": 4.48030374940674e-07, "loss": 0.2838, "step": 10170 }, { "epoch": 2.87, "learning_rate": 4.385382059800665e-07, "loss": 0.2347, "step": 10180 }, { "epoch": 2.87, "learning_rate": 4.29046037019459e-07, "loss": 0.4912, "step": 10190 }, { "epoch": 2.88, "learning_rate": 4.195538680588515e-07, "loss": 0.2631, "step": 10200 }, { "epoch": 2.88, "learning_rate": 4.1006169909824394e-07, "loss": 0.2965, "step": 10210 }, { "epoch": 2.88, "learning_rate": 4.0056953013763643e-07, "loss": 0.3917, "step": 10220 }, { "epoch": 2.88, "learning_rate": 3.91077361177029e-07, "loss": 0.5057, "step": 10230 }, { "epoch": 2.89, "learning_rate": 3.8158519221642146e-07, "loss": 0.4255, "step": 10240 }, { "epoch": 2.89, "learning_rate": 3.7209302325581396e-07, "loss": 0.2203, "step": 10250 }, { "epoch": 2.89, "learning_rate": 3.626008542952065e-07, "loss": 0.4773, "step": 10260 }, { "epoch": 2.9, "learning_rate": 3.53108685334599e-07, "loss": 0.2554, "step": 10270 }, { "epoch": 2.9, "learning_rate": 3.436165163739915e-07, "loss": 0.389, "step": 10280 }, { "epoch": 2.9, "learning_rate": 3.34124347413384e-07, "loss": 0.2402, "step": 10290 }, { "epoch": 2.9, "learning_rate": 3.246321784527765e-07, "loss": 0.447, "step": 10300 }, { "epoch": 2.91, "learning_rate": 3.1514000949216895e-07, "loss": 0.3556, "step": 10310 }, { "epoch": 2.91, "learning_rate": 3.056478405315615e-07, "loss": 0.3357, "step": 10320 }, { "epoch": 2.91, "learning_rate": 2.96155671570954e-07, "loss": 0.5372, "step": 10330 }, { "epoch": 2.92, "learning_rate": 2.866635026103465e-07, "loss": 0.3488, "step": 10340 }, { "epoch": 2.92, "learning_rate": 2.7717133364973897e-07, "loss": 0.4506, "step": 10350 }, { "epoch": 2.92, "learning_rate": 2.676791646891315e-07, "loss": 0.3523, "step": 10360 }, { "epoch": 2.92, "learning_rate": 2.58186995728524e-07, "loss": 0.3725, "step": 10370 }, { "epoch": 2.93, "learning_rate": 2.486948267679165e-07, "loss": 0.5271, "step": 10380 }, { "epoch": 2.93, "learning_rate": 2.39202657807309e-07, "loss": 0.2397, "step": 10390 }, { "epoch": 2.93, "learning_rate": 2.297104888467015e-07, "loss": 0.1559, "step": 10400 }, { "epoch": 2.93, "learning_rate": 2.20218319886094e-07, "loss": 0.4188, "step": 10410 }, { "epoch": 2.94, "learning_rate": 2.107261509254865e-07, "loss": 0.5267, "step": 10420 }, { "epoch": 2.94, "learning_rate": 2.0123398196487897e-07, "loss": 0.5628, "step": 10430 }, { "epoch": 2.94, "learning_rate": 1.917418130042715e-07, "loss": 0.4105, "step": 10440 }, { "epoch": 2.95, "learning_rate": 1.82249644043664e-07, "loss": 0.2813, "step": 10450 }, { "epoch": 2.95, "learning_rate": 1.727574750830565e-07, "loss": 0.3071, "step": 10460 }, { "epoch": 2.95, "learning_rate": 1.6326530612244901e-07, "loss": 0.3369, "step": 10470 }, { "epoch": 2.95, "learning_rate": 1.537731371618415e-07, "loss": 0.1922, "step": 10480 }, { "epoch": 2.96, "learning_rate": 1.44280968201234e-07, "loss": 0.461, "step": 10490 }, { "epoch": 2.96, "learning_rate": 1.3478879924062649e-07, "loss": 0.3687, "step": 10500 }, { "epoch": 2.96, "eval_loss": 0.34634825587272644, "eval_runtime": 209.8305, "eval_samples_per_second": 9.632, "eval_steps_per_second": 2.411, "step": 10500 }, { "epoch": 2.96, "learning_rate": 4.115502904658845e-06, "loss": 0.4669, "step": 10510 }, { "epoch": 2.97, "learning_rate": 4.109807495158902e-06, "loss": 0.3283, "step": 10520 }, { "epoch": 2.97, "learning_rate": 4.104112085658959e-06, "loss": 0.2859, "step": 10530 }, { "epoch": 2.97, "learning_rate": 4.098416676159016e-06, "loss": 0.2623, "step": 10540 }, { "epoch": 2.97, "learning_rate": 4.0927212666590734e-06, "loss": 0.3267, "step": 10550 }, { "epoch": 2.98, "learning_rate": 4.08702585715913e-06, "loss": 0.278, "step": 10560 }, { "epoch": 2.98, "learning_rate": 4.081330447659187e-06, "loss": 0.487, "step": 10570 }, { "epoch": 2.98, "learning_rate": 4.075635038159244e-06, "loss": 0.3299, "step": 10580 }, { "epoch": 2.99, "learning_rate": 4.069939628659301e-06, "loss": 0.4466, "step": 10590 }, { "epoch": 2.99, "learning_rate": 4.064244219159358e-06, "loss": 0.2438, "step": 10600 }, { "epoch": 2.99, "learning_rate": 4.058548809659415e-06, "loss": 0.6179, "step": 10610 }, { "epoch": 2.99, "learning_rate": 4.052853400159472e-06, "loss": 0.3405, "step": 10620 }, { "epoch": 3.0, "learning_rate": 4.047157990659529e-06, "loss": 0.344, "step": 10630 }, { "epoch": 3.0, "learning_rate": 4.041462581159586e-06, "loss": 0.3586, "step": 10640 }, { "epoch": 3.0, "learning_rate": 4.035767171659642e-06, "loss": 0.1847, "step": 10650 }, { "epoch": 3.01, "learning_rate": 4.0300717621597e-06, "loss": 0.3832, "step": 10660 }, { "epoch": 3.01, "learning_rate": 4.024376352659757e-06, "loss": 0.2131, "step": 10670 }, { "epoch": 3.01, "learning_rate": 4.018680943159813e-06, "loss": 0.3245, "step": 10680 }, { "epoch": 3.01, "learning_rate": 4.01298553365987e-06, "loss": 0.5644, "step": 10690 }, { "epoch": 3.02, "learning_rate": 4.007290124159927e-06, "loss": 0.1613, "step": 10700 }, { "epoch": 3.02, "learning_rate": 4.001594714659984e-06, "loss": 0.3756, "step": 10710 }, { "epoch": 3.02, "learning_rate": 3.9958993051600414e-06, "loss": 0.1069, "step": 10720 }, { "epoch": 3.03, "learning_rate": 3.990203895660098e-06, "loss": 0.4927, "step": 10730 }, { "epoch": 3.03, "learning_rate": 3.9845084861601555e-06, "loss": 0.386, "step": 10740 }, { "epoch": 3.03, "learning_rate": 3.9788130766602126e-06, "loss": 0.3843, "step": 10750 }, { "epoch": 3.03, "learning_rate": 3.973117667160269e-06, "loss": 0.2719, "step": 10760 }, { "epoch": 3.04, "learning_rate": 3.967422257660326e-06, "loss": 0.4034, "step": 10770 }, { "epoch": 3.04, "learning_rate": 3.961726848160383e-06, "loss": 0.3937, "step": 10780 }, { "epoch": 3.04, "learning_rate": 3.95603143866044e-06, "loss": 0.4995, "step": 10790 }, { "epoch": 3.04, "learning_rate": 3.950336029160497e-06, "loss": 0.2434, "step": 10800 }, { "epoch": 3.05, "learning_rate": 3.944640619660554e-06, "loss": 0.3839, "step": 10810 }, { "epoch": 3.05, "learning_rate": 3.938945210160611e-06, "loss": 0.3355, "step": 10820 }, { "epoch": 3.05, "learning_rate": 3.933249800660668e-06, "loss": 0.2876, "step": 10830 }, { "epoch": 3.06, "learning_rate": 3.927554391160725e-06, "loss": 0.4364, "step": 10840 }, { "epoch": 3.06, "learning_rate": 3.921858981660781e-06, "loss": 0.2628, "step": 10850 }, { "epoch": 3.06, "learning_rate": 3.916163572160839e-06, "loss": 0.3119, "step": 10860 }, { "epoch": 3.06, "learning_rate": 3.910468162660896e-06, "loss": 0.1733, "step": 10870 }, { "epoch": 3.07, "learning_rate": 3.904772753160952e-06, "loss": 0.2827, "step": 10880 }, { "epoch": 3.07, "learning_rate": 3.899077343661009e-06, "loss": 0.1766, "step": 10890 }, { "epoch": 3.07, "learning_rate": 3.8933819341610665e-06, "loss": 0.2672, "step": 10900 }, { "epoch": 3.08, "learning_rate": 3.8876865246611235e-06, "loss": 0.3424, "step": 10910 }, { "epoch": 3.08, "learning_rate": 3.8819911151611805e-06, "loss": 0.4487, "step": 10920 }, { "epoch": 3.08, "learning_rate": 3.876295705661237e-06, "loss": 0.4172, "step": 10930 }, { "epoch": 3.08, "learning_rate": 3.870600296161295e-06, "loss": 0.3854, "step": 10940 }, { "epoch": 3.09, "learning_rate": 3.864904886661352e-06, "loss": 0.3686, "step": 10950 }, { "epoch": 3.09, "learning_rate": 3.859209477161408e-06, "loss": 0.2533, "step": 10960 }, { "epoch": 3.09, "learning_rate": 3.853514067661465e-06, "loss": 0.1369, "step": 10970 }, { "epoch": 3.1, "learning_rate": 3.847818658161522e-06, "loss": 0.4269, "step": 10980 }, { "epoch": 3.1, "learning_rate": 3.842123248661579e-06, "loss": 0.2595, "step": 10990 }, { "epoch": 3.1, "learning_rate": 3.836427839161636e-06, "loss": 0.2355, "step": 11000 }, { "epoch": 3.1, "eval_loss": 0.34977805614471436, "eval_runtime": 212.3154, "eval_samples_per_second": 9.519, "eval_steps_per_second": 2.383, "step": 11000 }, { "epoch": 3.1, "learning_rate": 3.830732429661693e-06, "loss": 0.3466, "step": 11010 }, { "epoch": 3.11, "learning_rate": 3.82503702016175e-06, "loss": 0.1506, "step": 11020 }, { "epoch": 3.11, "learning_rate": 3.819341610661807e-06, "loss": 0.2931, "step": 11030 }, { "epoch": 3.11, "learning_rate": 3.813646201161864e-06, "loss": 0.5066, "step": 11040 }, { "epoch": 3.12, "learning_rate": 3.807950791661921e-06, "loss": 0.3229, "step": 11050 }, { "epoch": 3.12, "learning_rate": 3.802255382161978e-06, "loss": 0.2496, "step": 11060 }, { "epoch": 3.12, "learning_rate": 3.7965599726620344e-06, "loss": 0.4409, "step": 11070 }, { "epoch": 3.12, "learning_rate": 3.7908645631620915e-06, "loss": 0.3241, "step": 11080 }, { "epoch": 3.13, "learning_rate": 3.785169153662149e-06, "loss": 0.2519, "step": 11090 }, { "epoch": 3.13, "learning_rate": 3.7794737441622056e-06, "loss": 0.4325, "step": 11100 }, { "epoch": 3.13, "learning_rate": 3.7737783346622626e-06, "loss": 0.3483, "step": 11110 }, { "epoch": 3.14, "learning_rate": 3.7680829251623196e-06, "loss": 0.3515, "step": 11120 }, { "epoch": 3.14, "learning_rate": 3.7623875156623763e-06, "loss": 0.4152, "step": 11130 }, { "epoch": 3.14, "learning_rate": 3.7566921061624333e-06, "loss": 0.3937, "step": 11140 }, { "epoch": 3.14, "learning_rate": 3.7509966966624903e-06, "loss": 0.4958, "step": 11150 }, { "epoch": 3.15, "learning_rate": 3.745301287162547e-06, "loss": 0.3978, "step": 11160 }, { "epoch": 3.15, "learning_rate": 3.7396058776626044e-06, "loss": 0.3547, "step": 11170 }, { "epoch": 3.15, "learning_rate": 3.7339104681626615e-06, "loss": 0.1469, "step": 11180 }, { "epoch": 3.15, "learning_rate": 3.728215058662718e-06, "loss": 0.4382, "step": 11190 }, { "epoch": 3.16, "learning_rate": 3.722519649162775e-06, "loss": 0.234, "step": 11200 }, { "epoch": 3.16, "learning_rate": 3.7168242396628317e-06, "loss": 0.3652, "step": 11210 }, { "epoch": 3.16, "learning_rate": 3.7111288301628888e-06, "loss": 0.2035, "step": 11220 }, { "epoch": 3.17, "learning_rate": 3.7054334206629462e-06, "loss": 0.1942, "step": 11230 }, { "epoch": 3.17, "learning_rate": 3.699738011163003e-06, "loss": 0.6065, "step": 11240 }, { "epoch": 3.17, "learning_rate": 3.69404260166306e-06, "loss": 0.3618, "step": 11250 }, { "epoch": 3.17, "learning_rate": 3.688347192163117e-06, "loss": 0.3662, "step": 11260 }, { "epoch": 3.18, "learning_rate": 3.6826517826631736e-06, "loss": 0.2511, "step": 11270 }, { "epoch": 3.18, "learning_rate": 3.6769563731632306e-06, "loss": 0.3071, "step": 11280 }, { "epoch": 3.18, "learning_rate": 3.671260963663288e-06, "loss": 0.4005, "step": 11290 }, { "epoch": 3.19, "learning_rate": 3.6655655541633447e-06, "loss": 0.3301, "step": 11300 }, { "epoch": 3.19, "learning_rate": 3.6598701446634017e-06, "loss": 0.3304, "step": 11310 }, { "epoch": 3.19, "learning_rate": 3.6541747351634588e-06, "loss": 0.4917, "step": 11320 }, { "epoch": 3.19, "learning_rate": 3.6484793256635154e-06, "loss": 0.4638, "step": 11330 }, { "epoch": 3.2, "learning_rate": 3.6427839161635724e-06, "loss": 0.3115, "step": 11340 }, { "epoch": 3.2, "learning_rate": 3.63708850666363e-06, "loss": 0.322, "step": 11350 }, { "epoch": 3.2, "learning_rate": 3.631393097163686e-06, "loss": 0.365, "step": 11360 }, { "epoch": 3.21, "learning_rate": 3.6256976876637435e-06, "loss": 0.361, "step": 11370 }, { "epoch": 3.21, "learning_rate": 3.6200022781638e-06, "loss": 0.3455, "step": 11380 }, { "epoch": 3.21, "learning_rate": 3.614306868663857e-06, "loss": 0.3239, "step": 11390 }, { "epoch": 3.21, "learning_rate": 3.6086114591639142e-06, "loss": 0.229, "step": 11400 }, { "epoch": 3.22, "learning_rate": 3.602916049663971e-06, "loss": 0.3363, "step": 11410 }, { "epoch": 3.22, "learning_rate": 3.597220640164028e-06, "loss": 0.3821, "step": 11420 }, { "epoch": 3.22, "learning_rate": 3.5915252306640854e-06, "loss": 0.5456, "step": 11430 }, { "epoch": 3.23, "learning_rate": 3.585829821164142e-06, "loss": 0.3119, "step": 11440 }, { "epoch": 3.23, "learning_rate": 3.580134411664199e-06, "loss": 0.1574, "step": 11450 }, { "epoch": 3.23, "learning_rate": 3.574439002164256e-06, "loss": 0.4289, "step": 11460 }, { "epoch": 3.23, "learning_rate": 3.5687435926643127e-06, "loss": 0.5156, "step": 11470 }, { "epoch": 3.24, "learning_rate": 3.5630481831643697e-06, "loss": 0.2907, "step": 11480 }, { "epoch": 3.24, "learning_rate": 3.557352773664427e-06, "loss": 0.4394, "step": 11490 }, { "epoch": 3.24, "learning_rate": 3.5516573641644838e-06, "loss": 0.4248, "step": 11500 }, { "epoch": 3.24, "eval_loss": 0.3501090109348297, "eval_runtime": 212.2293, "eval_samples_per_second": 9.523, "eval_steps_per_second": 2.384, "step": 11500 }, { "epoch": 3.24, "learning_rate": 3.545961954664541e-06, "loss": 0.3417, "step": 11510 }, { "epoch": 3.25, "learning_rate": 3.540266545164598e-06, "loss": 0.4421, "step": 11520 }, { "epoch": 3.25, "learning_rate": 3.5345711356646545e-06, "loss": 0.0987, "step": 11530 }, { "epoch": 3.25, "learning_rate": 3.5288757261647115e-06, "loss": 0.2601, "step": 11540 }, { "epoch": 3.26, "learning_rate": 3.523180316664768e-06, "loss": 0.5805, "step": 11550 }, { "epoch": 3.26, "learning_rate": 3.517484907164825e-06, "loss": 0.3845, "step": 11560 }, { "epoch": 3.26, "learning_rate": 3.5117894976648826e-06, "loss": 0.5719, "step": 11570 }, { "epoch": 3.26, "learning_rate": 3.5060940881649393e-06, "loss": 0.2547, "step": 11580 }, { "epoch": 3.27, "learning_rate": 3.5003986786649963e-06, "loss": 0.4258, "step": 11590 }, { "epoch": 3.27, "learning_rate": 3.4947032691650533e-06, "loss": 0.3509, "step": 11600 }, { "epoch": 3.27, "learning_rate": 3.48900785966511e-06, "loss": 0.4785, "step": 11610 }, { "epoch": 3.28, "learning_rate": 3.483312450165167e-06, "loss": 0.3351, "step": 11620 }, { "epoch": 3.28, "learning_rate": 3.4776170406652245e-06, "loss": 0.319, "step": 11630 }, { "epoch": 3.28, "learning_rate": 3.471921631165281e-06, "loss": 0.5243, "step": 11640 }, { "epoch": 3.28, "learning_rate": 3.466226221665338e-06, "loss": 0.3207, "step": 11650 }, { "epoch": 3.29, "learning_rate": 3.460530812165395e-06, "loss": 0.3911, "step": 11660 }, { "epoch": 3.29, "learning_rate": 3.4548354026654518e-06, "loss": 0.4672, "step": 11670 }, { "epoch": 3.29, "learning_rate": 3.449139993165509e-06, "loss": 0.3416, "step": 11680 }, { "epoch": 3.3, "learning_rate": 3.4434445836655663e-06, "loss": 0.1996, "step": 11690 }, { "epoch": 3.3, "learning_rate": 3.437749174165623e-06, "loss": 0.2258, "step": 11700 }, { "epoch": 3.3, "learning_rate": 3.43205376466568e-06, "loss": 0.6087, "step": 11710 }, { "epoch": 3.3, "learning_rate": 3.4263583551657366e-06, "loss": 0.5061, "step": 11720 }, { "epoch": 3.31, "learning_rate": 3.4206629456657936e-06, "loss": 0.318, "step": 11730 }, { "epoch": 3.31, "learning_rate": 3.4149675361658506e-06, "loss": 0.2787, "step": 11740 }, { "epoch": 3.31, "learning_rate": 3.4092721266659073e-06, "loss": 0.2786, "step": 11750 }, { "epoch": 3.32, "learning_rate": 3.4035767171659643e-06, "loss": 0.3459, "step": 11760 }, { "epoch": 3.32, "learning_rate": 3.3978813076660218e-06, "loss": 0.3916, "step": 11770 }, { "epoch": 3.32, "learning_rate": 3.3921858981660784e-06, "loss": 0.485, "step": 11780 }, { "epoch": 3.32, "learning_rate": 3.3864904886661354e-06, "loss": 0.1479, "step": 11790 }, { "epoch": 3.33, "learning_rate": 3.3807950791661925e-06, "loss": 0.2049, "step": 11800 }, { "epoch": 3.33, "learning_rate": 3.375099669666249e-06, "loss": 0.2876, "step": 11810 }, { "epoch": 3.33, "learning_rate": 3.369404260166306e-06, "loss": 0.4562, "step": 11820 }, { "epoch": 3.34, "learning_rate": 3.3637088506663636e-06, "loss": 0.3319, "step": 11830 }, { "epoch": 3.34, "learning_rate": 3.35801344116642e-06, "loss": 0.442, "step": 11840 }, { "epoch": 3.34, "learning_rate": 3.3523180316664772e-06, "loss": 0.3675, "step": 11850 }, { "epoch": 3.34, "learning_rate": 3.346622622166534e-06, "loss": 0.3927, "step": 11860 }, { "epoch": 3.35, "learning_rate": 3.340927212666591e-06, "loss": 0.6269, "step": 11870 }, { "epoch": 3.35, "learning_rate": 3.335231803166648e-06, "loss": 0.1756, "step": 11880 }, { "epoch": 3.35, "learning_rate": 3.3295363936667045e-06, "loss": 0.4247, "step": 11890 }, { "epoch": 3.35, "learning_rate": 3.323840984166762e-06, "loss": 0.3975, "step": 11900 }, { "epoch": 3.36, "learning_rate": 3.318145574666819e-06, "loss": 0.4755, "step": 11910 }, { "epoch": 3.36, "learning_rate": 3.3124501651668757e-06, "loss": 0.379, "step": 11920 }, { "epoch": 3.36, "learning_rate": 3.3067547556669327e-06, "loss": 0.3689, "step": 11930 }, { "epoch": 3.37, "learning_rate": 3.3010593461669897e-06, "loss": 0.2979, "step": 11940 }, { "epoch": 3.37, "learning_rate": 3.2953639366670464e-06, "loss": 0.455, "step": 11950 }, { "epoch": 3.37, "learning_rate": 3.2896685271671034e-06, "loss": 0.4162, "step": 11960 }, { "epoch": 3.37, "learning_rate": 3.283973117667161e-06, "loss": 0.4755, "step": 11970 }, { "epoch": 3.38, "learning_rate": 3.2782777081672175e-06, "loss": 0.3042, "step": 11980 }, { "epoch": 3.38, "learning_rate": 3.2725822986672745e-06, "loss": 0.286, "step": 11990 }, { "epoch": 3.38, "learning_rate": 3.2668868891673316e-06, "loss": 0.4205, "step": 12000 }, { "epoch": 3.38, "eval_loss": 0.3515862822532654, "eval_runtime": 210.7037, "eval_samples_per_second": 9.592, "eval_steps_per_second": 2.401, "step": 12000 }, { "epoch": 3.39, "learning_rate": 3.261191479667388e-06, "loss": 0.4638, "step": 12010 }, { "epoch": 3.39, "learning_rate": 3.2554960701674452e-06, "loss": 0.4955, "step": 12020 }, { "epoch": 3.39, "learning_rate": 3.249800660667502e-06, "loss": 0.3171, "step": 12030 }, { "epoch": 3.39, "learning_rate": 3.2441052511675593e-06, "loss": 0.3157, "step": 12040 }, { "epoch": 3.4, "learning_rate": 3.2384098416676163e-06, "loss": 0.4029, "step": 12050 }, { "epoch": 3.4, "learning_rate": 3.232714432167673e-06, "loss": 0.6141, "step": 12060 }, { "epoch": 3.4, "learning_rate": 3.22701902266773e-06, "loss": 0.5261, "step": 12070 }, { "epoch": 3.41, "learning_rate": 3.221323613167787e-06, "loss": 0.4287, "step": 12080 }, { "epoch": 3.41, "learning_rate": 3.2156282036678437e-06, "loss": 0.3101, "step": 12090 }, { "epoch": 3.41, "learning_rate": 3.209932794167901e-06, "loss": 0.4954, "step": 12100 }, { "epoch": 3.41, "learning_rate": 3.204237384667958e-06, "loss": 0.4279, "step": 12110 }, { "epoch": 3.42, "learning_rate": 3.1985419751680148e-06, "loss": 0.1931, "step": 12120 }, { "epoch": 3.42, "learning_rate": 3.192846565668072e-06, "loss": 0.4763, "step": 12130 }, { "epoch": 3.42, "learning_rate": 3.187151156168129e-06, "loss": 0.1875, "step": 12140 }, { "epoch": 3.43, "learning_rate": 3.1814557466681855e-06, "loss": 0.1963, "step": 12150 }, { "epoch": 3.43, "learning_rate": 3.175760337168243e-06, "loss": 0.3605, "step": 12160 }, { "epoch": 3.43, "learning_rate": 3.1700649276683e-06, "loss": 0.2117, "step": 12170 }, { "epoch": 3.43, "learning_rate": 3.1643695181683566e-06, "loss": 0.4029, "step": 12180 }, { "epoch": 3.44, "learning_rate": 3.1586741086684136e-06, "loss": 0.3201, "step": 12190 }, { "epoch": 3.44, "learning_rate": 3.1529786991684702e-06, "loss": 0.4943, "step": 12200 }, { "epoch": 3.44, "learning_rate": 3.1472832896685273e-06, "loss": 0.459, "step": 12210 }, { "epoch": 3.45, "learning_rate": 3.1415878801685843e-06, "loss": 0.1145, "step": 12220 }, { "epoch": 3.45, "learning_rate": 3.135892470668641e-06, "loss": 0.3262, "step": 12230 }, { "epoch": 3.45, "learning_rate": 3.1301970611686984e-06, "loss": 0.3552, "step": 12240 }, { "epoch": 3.45, "learning_rate": 3.1245016516687554e-06, "loss": 0.3953, "step": 12250 }, { "epoch": 3.46, "learning_rate": 3.118806242168812e-06, "loss": 0.2188, "step": 12260 }, { "epoch": 3.46, "learning_rate": 3.113110832668869e-06, "loss": 0.2279, "step": 12270 }, { "epoch": 3.46, "learning_rate": 3.107415423168926e-06, "loss": 0.398, "step": 12280 }, { "epoch": 3.46, "learning_rate": 3.1017200136689828e-06, "loss": 0.4347, "step": 12290 }, { "epoch": 3.47, "learning_rate": 3.0960246041690402e-06, "loss": 0.232, "step": 12300 }, { "epoch": 3.47, "learning_rate": 3.0903291946690973e-06, "loss": 0.2781, "step": 12310 }, { "epoch": 3.47, "learning_rate": 3.084633785169154e-06, "loss": 0.3938, "step": 12320 }, { "epoch": 3.48, "learning_rate": 3.078938375669211e-06, "loss": 0.5736, "step": 12330 }, { "epoch": 3.48, "learning_rate": 3.073242966169268e-06, "loss": 0.327, "step": 12340 }, { "epoch": 3.48, "learning_rate": 3.0675475566693246e-06, "loss": 0.2571, "step": 12350 }, { "epoch": 3.48, "learning_rate": 3.061852147169382e-06, "loss": 0.3868, "step": 12360 }, { "epoch": 3.49, "learning_rate": 3.0561567376694382e-06, "loss": 0.3174, "step": 12370 }, { "epoch": 3.49, "learning_rate": 3.0504613281694957e-06, "loss": 0.2424, "step": 12380 }, { "epoch": 3.49, "learning_rate": 3.0447659186695527e-06, "loss": 0.2545, "step": 12390 }, { "epoch": 3.5, "learning_rate": 3.0390705091696094e-06, "loss": 0.4465, "step": 12400 }, { "epoch": 3.5, "learning_rate": 3.0333750996696664e-06, "loss": 0.304, "step": 12410 }, { "epoch": 3.5, "learning_rate": 3.0276796901697234e-06, "loss": 0.3063, "step": 12420 }, { "epoch": 3.5, "learning_rate": 3.02198428066978e-06, "loss": 0.3685, "step": 12430 }, { "epoch": 3.51, "learning_rate": 3.0162888711698375e-06, "loss": 0.4217, "step": 12440 }, { "epoch": 3.51, "learning_rate": 3.0105934616698946e-06, "loss": 0.3801, "step": 12450 }, { "epoch": 3.51, "learning_rate": 3.004898052169951e-06, "loss": 0.2402, "step": 12460 }, { "epoch": 3.52, "learning_rate": 2.9992026426700082e-06, "loss": 0.3375, "step": 12470 }, { "epoch": 3.52, "learning_rate": 2.9935072331700653e-06, "loss": 0.4198, "step": 12480 }, { "epoch": 3.52, "learning_rate": 2.987811823670122e-06, "loss": 0.4481, "step": 12490 }, { "epoch": 3.52, "learning_rate": 2.9821164141701793e-06, "loss": 0.1827, "step": 12500 }, { "epoch": 3.52, "eval_loss": 0.3478640913963318, "eval_runtime": 211.175, "eval_samples_per_second": 9.57, "eval_steps_per_second": 2.396, "step": 12500 }, { "epoch": 3.53, "learning_rate": 2.976421004670236e-06, "loss": 0.2672, "step": 12510 }, { "epoch": 3.53, "learning_rate": 2.970725595170293e-06, "loss": 0.3767, "step": 12520 }, { "epoch": 3.53, "learning_rate": 2.96503018567035e-06, "loss": 0.356, "step": 12530 }, { "epoch": 3.54, "learning_rate": 2.9593347761704066e-06, "loss": 0.2137, "step": 12540 }, { "epoch": 3.54, "learning_rate": 2.9536393666704637e-06, "loss": 0.156, "step": 12550 }, { "epoch": 3.54, "learning_rate": 2.947943957170521e-06, "loss": 0.2804, "step": 12560 }, { "epoch": 3.54, "learning_rate": 2.9422485476705773e-06, "loss": 0.346, "step": 12570 }, { "epoch": 3.55, "learning_rate": 2.936553138170635e-06, "loss": 0.3009, "step": 12580 }, { "epoch": 3.55, "learning_rate": 2.930857728670692e-06, "loss": 0.3055, "step": 12590 }, { "epoch": 3.55, "learning_rate": 2.9251623191707485e-06, "loss": 0.4737, "step": 12600 }, { "epoch": 3.56, "learning_rate": 2.9194669096708055e-06, "loss": 0.214, "step": 12610 }, { "epoch": 3.56, "learning_rate": 2.9137715001708625e-06, "loss": 0.3206, "step": 12620 }, { "epoch": 3.56, "learning_rate": 2.908076090670919e-06, "loss": 0.5329, "step": 12630 }, { "epoch": 3.56, "learning_rate": 2.9023806811709766e-06, "loss": 0.1631, "step": 12640 }, { "epoch": 3.57, "learning_rate": 2.8966852716710337e-06, "loss": 0.3896, "step": 12650 }, { "epoch": 3.57, "learning_rate": 2.8909898621710903e-06, "loss": 0.7929, "step": 12660 }, { "epoch": 3.57, "learning_rate": 2.8852944526711473e-06, "loss": 0.3773, "step": 12670 }, { "epoch": 3.57, "learning_rate": 2.879599043171204e-06, "loss": 0.2049, "step": 12680 }, { "epoch": 3.58, "learning_rate": 2.873903633671261e-06, "loss": 0.3991, "step": 12690 }, { "epoch": 3.58, "learning_rate": 2.8682082241713184e-06, "loss": 0.3699, "step": 12700 }, { "epoch": 3.58, "learning_rate": 2.862512814671375e-06, "loss": 0.5049, "step": 12710 }, { "epoch": 3.59, "learning_rate": 2.856817405171432e-06, "loss": 0.727, "step": 12720 }, { "epoch": 3.59, "learning_rate": 2.851121995671489e-06, "loss": 0.4813, "step": 12730 }, { "epoch": 3.59, "learning_rate": 2.8454265861715458e-06, "loss": 0.2857, "step": 12740 }, { "epoch": 3.59, "learning_rate": 2.839731176671603e-06, "loss": 0.3309, "step": 12750 }, { "epoch": 3.6, "learning_rate": 2.8340357671716603e-06, "loss": 0.5852, "step": 12760 }, { "epoch": 3.6, "learning_rate": 2.8283403576717165e-06, "loss": 0.5285, "step": 12770 }, { "epoch": 3.6, "learning_rate": 2.822644948171774e-06, "loss": 0.3123, "step": 12780 }, { "epoch": 3.61, "learning_rate": 2.816949538671831e-06, "loss": 0.3655, "step": 12790 }, { "epoch": 3.61, "learning_rate": 2.8112541291718876e-06, "loss": 0.227, "step": 12800 }, { "epoch": 3.61, "learning_rate": 2.8055587196719446e-06, "loss": 0.188, "step": 12810 }, { "epoch": 3.61, "learning_rate": 2.7998633101720017e-06, "loss": 0.4075, "step": 12820 }, { "epoch": 3.62, "learning_rate": 2.7941679006720583e-06, "loss": 0.3146, "step": 12830 }, { "epoch": 3.62, "learning_rate": 2.7884724911721157e-06, "loss": 0.2311, "step": 12840 }, { "epoch": 3.62, "learning_rate": 2.7827770816721724e-06, "loss": 0.3325, "step": 12850 }, { "epoch": 3.63, "learning_rate": 2.7770816721722294e-06, "loss": 0.179, "step": 12860 }, { "epoch": 3.63, "learning_rate": 2.7713862626722864e-06, "loss": 0.3413, "step": 12870 }, { "epoch": 3.63, "learning_rate": 2.765690853172343e-06, "loss": 0.4583, "step": 12880 }, { "epoch": 3.63, "learning_rate": 2.7599954436724e-06, "loss": 0.4093, "step": 12890 }, { "epoch": 3.64, "learning_rate": 2.7543000341724576e-06, "loss": 0.321, "step": 12900 }, { "epoch": 3.64, "learning_rate": 2.748604624672514e-06, "loss": 0.2316, "step": 12910 }, { "epoch": 3.64, "learning_rate": 2.742909215172571e-06, "loss": 0.2259, "step": 12920 }, { "epoch": 3.65, "learning_rate": 2.7372138056726283e-06, "loss": 0.4776, "step": 12930 }, { "epoch": 3.65, "learning_rate": 2.731518396172685e-06, "loss": 0.1712, "step": 12940 }, { "epoch": 3.65, "learning_rate": 2.725822986672742e-06, "loss": 0.3603, "step": 12950 }, { "epoch": 3.65, "learning_rate": 2.7201275771727994e-06, "loss": 0.4242, "step": 12960 }, { "epoch": 3.66, "learning_rate": 2.714432167672856e-06, "loss": 0.3223, "step": 12970 }, { "epoch": 3.66, "learning_rate": 2.708736758172913e-06, "loss": 0.3738, "step": 12980 }, { "epoch": 3.66, "learning_rate": 2.70304134867297e-06, "loss": 0.1973, "step": 12990 }, { "epoch": 3.67, "learning_rate": 2.6973459391730267e-06, "loss": 0.3688, "step": 13000 }, { "epoch": 3.67, "eval_loss": 0.34860894083976746, "eval_runtime": 211.4299, "eval_samples_per_second": 9.559, "eval_steps_per_second": 2.393, "step": 13000 }, { "epoch": 3.67, "learning_rate": 2.6916505296730837e-06, "loss": 0.5827, "step": 13010 }, { "epoch": 3.67, "learning_rate": 2.6859551201731403e-06, "loss": 0.4093, "step": 13020 }, { "epoch": 3.67, "learning_rate": 2.6802597106731974e-06, "loss": 0.2026, "step": 13030 }, { "epoch": 3.68, "learning_rate": 2.674564301173255e-06, "loss": 0.3811, "step": 13040 }, { "epoch": 3.68, "learning_rate": 2.6688688916733115e-06, "loss": 0.4959, "step": 13050 }, { "epoch": 3.68, "learning_rate": 2.6631734821733685e-06, "loss": 0.2947, "step": 13060 }, { "epoch": 3.68, "learning_rate": 2.6574780726734255e-06, "loss": 0.3327, "step": 13070 }, { "epoch": 3.69, "learning_rate": 2.651782663173482e-06, "loss": 0.4354, "step": 13080 }, { "epoch": 3.69, "learning_rate": 2.646087253673539e-06, "loss": 0.4678, "step": 13090 }, { "epoch": 3.69, "learning_rate": 2.6403918441735967e-06, "loss": 0.2031, "step": 13100 }, { "epoch": 3.7, "learning_rate": 2.6346964346736533e-06, "loss": 0.4424, "step": 13110 }, { "epoch": 3.7, "learning_rate": 2.6290010251737103e-06, "loss": 0.3075, "step": 13120 }, { "epoch": 3.7, "learning_rate": 2.6233056156737674e-06, "loss": 0.2436, "step": 13130 }, { "epoch": 3.7, "learning_rate": 2.617610206173824e-06, "loss": 0.2671, "step": 13140 }, { "epoch": 3.71, "learning_rate": 2.611914796673881e-06, "loss": 0.2847, "step": 13150 }, { "epoch": 3.71, "learning_rate": 2.6062193871739376e-06, "loss": 0.519, "step": 13160 }, { "epoch": 3.71, "learning_rate": 2.600523977673995e-06, "loss": 0.3778, "step": 13170 }, { "epoch": 3.72, "learning_rate": 2.594828568174052e-06, "loss": 0.3399, "step": 13180 }, { "epoch": 3.72, "learning_rate": 2.5891331586741088e-06, "loss": 0.296, "step": 13190 }, { "epoch": 3.72, "learning_rate": 2.583437749174166e-06, "loss": 0.4316, "step": 13200 }, { "epoch": 3.72, "learning_rate": 2.577742339674223e-06, "loss": 0.3023, "step": 13210 }, { "epoch": 3.73, "learning_rate": 2.5720469301742795e-06, "loss": 0.2552, "step": 13220 }, { "epoch": 3.73, "learning_rate": 2.5663515206743365e-06, "loss": 0.3943, "step": 13230 }, { "epoch": 3.73, "learning_rate": 2.560656111174394e-06, "loss": 0.4276, "step": 13240 }, { "epoch": 3.74, "learning_rate": 2.5549607016744506e-06, "loss": 0.4096, "step": 13250 }, { "epoch": 3.74, "learning_rate": 2.5492652921745076e-06, "loss": 0.3047, "step": 13260 }, { "epoch": 3.74, "learning_rate": 2.5435698826745647e-06, "loss": 0.4829, "step": 13270 }, { "epoch": 3.74, "learning_rate": 2.5378744731746213e-06, "loss": 0.3925, "step": 13280 }, { "epoch": 3.75, "learning_rate": 2.5321790636746783e-06, "loss": 0.2451, "step": 13290 }, { "epoch": 3.75, "learning_rate": 2.5264836541747358e-06, "loss": 0.3625, "step": 13300 }, { "epoch": 3.75, "learning_rate": 2.5207882446747924e-06, "loss": 0.26, "step": 13310 }, { "epoch": 3.76, "learning_rate": 2.5150928351748494e-06, "loss": 0.4886, "step": 13320 }, { "epoch": 3.76, "learning_rate": 2.509397425674906e-06, "loss": 0.3842, "step": 13330 }, { "epoch": 3.76, "learning_rate": 2.503702016174963e-06, "loss": 0.5277, "step": 13340 }, { "epoch": 3.76, "learning_rate": 2.49800660667502e-06, "loss": 0.537, "step": 13350 }, { "epoch": 3.77, "learning_rate": 2.492311197175077e-06, "loss": 0.1046, "step": 13360 }, { "epoch": 3.77, "learning_rate": 2.486615787675134e-06, "loss": 0.3134, "step": 13370 }, { "epoch": 3.77, "learning_rate": 2.480920378175191e-06, "loss": 0.4783, "step": 13380 }, { "epoch": 3.78, "learning_rate": 2.475224968675248e-06, "loss": 0.5872, "step": 13390 }, { "epoch": 3.78, "learning_rate": 2.469529559175305e-06, "loss": 0.3972, "step": 13400 }, { "epoch": 3.78, "learning_rate": 2.463834149675362e-06, "loss": 0.2255, "step": 13410 }, { "epoch": 3.78, "learning_rate": 2.458138740175419e-06, "loss": 0.3646, "step": 13420 }, { "epoch": 3.79, "learning_rate": 2.4524433306754756e-06, "loss": 0.1656, "step": 13430 }, { "epoch": 3.79, "learning_rate": 2.4467479211755326e-06, "loss": 0.5685, "step": 13440 }, { "epoch": 3.79, "learning_rate": 2.4410525116755897e-06, "loss": 0.3274, "step": 13450 }, { "epoch": 3.79, "learning_rate": 2.4353571021756467e-06, "loss": 0.3213, "step": 13460 }, { "epoch": 3.8, "learning_rate": 2.4296616926757038e-06, "loss": 0.2743, "step": 13470 }, { "epoch": 3.8, "learning_rate": 2.4239662831757604e-06, "loss": 0.3187, "step": 13480 }, { "epoch": 3.8, "learning_rate": 2.4182708736758174e-06, "loss": 0.2557, "step": 13490 }, { "epoch": 3.81, "learning_rate": 2.4125754641758745e-06, "loss": 0.1861, "step": 13500 }, { "epoch": 3.81, "eval_loss": 0.3507066071033478, "eval_runtime": 212.1488, "eval_samples_per_second": 9.526, "eval_steps_per_second": 2.385, "step": 13500 }, { "epoch": 3.81, "learning_rate": 2.4068800546759315e-06, "loss": 0.3103, "step": 13510 }, { "epoch": 3.81, "learning_rate": 2.4011846451759885e-06, "loss": 0.4539, "step": 13520 }, { "epoch": 3.81, "learning_rate": 2.395489235676045e-06, "loss": 0.5474, "step": 13530 }, { "epoch": 3.82, "learning_rate": 2.389793826176102e-06, "loss": 0.3454, "step": 13540 }, { "epoch": 3.82, "learning_rate": 2.3840984166761592e-06, "loss": 0.478, "step": 13550 }, { "epoch": 3.82, "learning_rate": 2.3784030071762163e-06, "loss": 0.3736, "step": 13560 }, { "epoch": 3.83, "learning_rate": 2.3727075976762733e-06, "loss": 0.3233, "step": 13570 }, { "epoch": 3.83, "learning_rate": 2.36701218817633e-06, "loss": 0.5434, "step": 13580 }, { "epoch": 3.83, "learning_rate": 2.361316778676387e-06, "loss": 0.2229, "step": 13590 }, { "epoch": 3.83, "learning_rate": 2.355621369176444e-06, "loss": 0.2059, "step": 13600 }, { "epoch": 3.84, "learning_rate": 2.349925959676501e-06, "loss": 0.254, "step": 13610 }, { "epoch": 3.84, "learning_rate": 2.3442305501765577e-06, "loss": 0.3806, "step": 13620 }, { "epoch": 3.84, "learning_rate": 2.3385351406766147e-06, "loss": 0.3837, "step": 13630 }, { "epoch": 3.85, "learning_rate": 2.3328397311766718e-06, "loss": 0.439, "step": 13640 }, { "epoch": 3.85, "learning_rate": 2.327144321676729e-06, "loss": 0.3859, "step": 13650 }, { "epoch": 3.85, "learning_rate": 2.321448912176786e-06, "loss": 0.5599, "step": 13660 }, { "epoch": 3.85, "learning_rate": 2.315753502676843e-06, "loss": 0.3513, "step": 13670 }, { "epoch": 3.86, "learning_rate": 2.3100580931768995e-06, "loss": 0.3253, "step": 13680 }, { "epoch": 3.86, "learning_rate": 2.3043626836769565e-06, "loss": 0.2308, "step": 13690 }, { "epoch": 3.86, "learning_rate": 2.2986672741770136e-06, "loss": 0.2583, "step": 13700 }, { "epoch": 3.87, "learning_rate": 2.2929718646770706e-06, "loss": 0.4509, "step": 13710 }, { "epoch": 3.87, "learning_rate": 2.2872764551771272e-06, "loss": 0.3235, "step": 13720 }, { "epoch": 3.87, "learning_rate": 2.2815810456771843e-06, "loss": 0.3594, "step": 13730 }, { "epoch": 3.87, "learning_rate": 2.2758856361772413e-06, "loss": 0.5218, "step": 13740 }, { "epoch": 3.88, "learning_rate": 2.2701902266772983e-06, "loss": 0.2541, "step": 13750 }, { "epoch": 3.88, "learning_rate": 2.2644948171773554e-06, "loss": 0.3263, "step": 13760 }, { "epoch": 3.88, "learning_rate": 2.2587994076774124e-06, "loss": 0.4093, "step": 13770 }, { "epoch": 3.88, "learning_rate": 2.253103998177469e-06, "loss": 0.3335, "step": 13780 }, { "epoch": 3.89, "learning_rate": 2.247408588677526e-06, "loss": 0.3746, "step": 13790 }, { "epoch": 3.89, "learning_rate": 2.241713179177583e-06, "loss": 0.4138, "step": 13800 }, { "epoch": 3.89, "learning_rate": 2.23601776967764e-06, "loss": 0.4556, "step": 13810 }, { "epoch": 3.9, "learning_rate": 2.2303223601776968e-06, "loss": 0.3204, "step": 13820 }, { "epoch": 3.9, "learning_rate": 2.224626950677754e-06, "loss": 0.2649, "step": 13830 }, { "epoch": 3.9, "learning_rate": 2.218931541177811e-06, "loss": 0.2692, "step": 13840 }, { "epoch": 3.9, "learning_rate": 2.213236131677868e-06, "loss": 0.3215, "step": 13850 }, { "epoch": 3.91, "learning_rate": 2.2075407221779245e-06, "loss": 0.4207, "step": 13860 }, { "epoch": 3.91, "learning_rate": 2.201845312677982e-06, "loss": 0.4033, "step": 13870 }, { "epoch": 3.91, "learning_rate": 2.1961499031780386e-06, "loss": 0.2342, "step": 13880 }, { "epoch": 3.92, "learning_rate": 2.1904544936780956e-06, "loss": 0.2611, "step": 13890 }, { "epoch": 3.92, "learning_rate": 2.1847590841781527e-06, "loss": 0.3371, "step": 13900 }, { "epoch": 3.92, "learning_rate": 2.1790636746782097e-06, "loss": 0.4576, "step": 13910 }, { "epoch": 3.92, "learning_rate": 2.1733682651782663e-06, "loss": 0.4421, "step": 13920 }, { "epoch": 3.93, "learning_rate": 2.1676728556783234e-06, "loss": 0.5129, "step": 13930 }, { "epoch": 3.93, "learning_rate": 2.1619774461783804e-06, "loss": 0.2611, "step": 13940 }, { "epoch": 3.93, "learning_rate": 2.1562820366784375e-06, "loss": 0.4886, "step": 13950 }, { "epoch": 3.94, "learning_rate": 2.150586627178494e-06, "loss": 0.1713, "step": 13960 }, { "epoch": 3.94, "learning_rate": 2.1448912176785515e-06, "loss": 0.1568, "step": 13970 }, { "epoch": 3.94, "learning_rate": 2.139195808178608e-06, "loss": 0.4705, "step": 13980 }, { "epoch": 3.94, "learning_rate": 2.133500398678665e-06, "loss": 0.2815, "step": 13990 }, { "epoch": 3.95, "learning_rate": 2.1278049891787222e-06, "loss": 0.2475, "step": 14000 }, { "epoch": 3.95, "eval_loss": 0.34873369336128235, "eval_runtime": 212.3511, "eval_samples_per_second": 9.517, "eval_steps_per_second": 2.383, "step": 14000 }, { "epoch": 3.95, "learning_rate": 2.1221095796787793e-06, "loss": 0.4751, "step": 14010 }, { "epoch": 3.95, "learning_rate": 2.116414170178836e-06, "loss": 0.4, "step": 14020 }, { "epoch": 3.96, "learning_rate": 2.110718760678893e-06, "loss": 0.4484, "step": 14030 }, { "epoch": 3.96, "learning_rate": 2.10502335117895e-06, "loss": 0.2746, "step": 14040 }, { "epoch": 3.96, "learning_rate": 2.099327941679007e-06, "loss": 0.4018, "step": 14050 }, { "epoch": 3.96, "learning_rate": 2.0936325321790636e-06, "loss": 0.2564, "step": 14060 }, { "epoch": 3.97, "learning_rate": 2.087937122679121e-06, "loss": 0.5503, "step": 14070 }, { "epoch": 3.97, "learning_rate": 2.0822417131791777e-06, "loss": 0.147, "step": 14080 }, { "epoch": 3.97, "learning_rate": 2.0765463036792347e-06, "loss": 0.1796, "step": 14090 }, { "epoch": 3.98, "learning_rate": 2.0708508941792914e-06, "loss": 0.5249, "step": 14100 }, { "epoch": 3.98, "learning_rate": 2.065155484679349e-06, "loss": 0.4368, "step": 14110 }, { "epoch": 3.98, "learning_rate": 2.0594600751794054e-06, "loss": 0.325, "step": 14120 }, { "epoch": 3.98, "learning_rate": 2.0537646656794625e-06, "loss": 0.2107, "step": 14130 }, { "epoch": 3.99, "learning_rate": 2.0480692561795195e-06, "loss": 0.5274, "step": 14140 }, { "epoch": 3.99, "learning_rate": 2.0423738466795766e-06, "loss": 0.3492, "step": 14150 }, { "epoch": 3.99, "learning_rate": 2.036678437179633e-06, "loss": 0.201, "step": 14160 }, { "epoch": 3.99, "learning_rate": 2.0309830276796906e-06, "loss": 0.5432, "step": 14170 }, { "epoch": 4.0, "learning_rate": 2.0252876181797473e-06, "loss": 0.2668, "step": 14180 }, { "epoch": 4.0, "learning_rate": 2.0195922086798043e-06, "loss": 0.3592, "step": 14190 }, { "epoch": 4.0, "learning_rate": 2.013896799179861e-06, "loss": 0.2449, "step": 14200 }, { "epoch": 4.01, "learning_rate": 2.0082013896799184e-06, "loss": 0.2965, "step": 14210 }, { "epoch": 4.01, "learning_rate": 2.002505980179975e-06, "loss": 0.5565, "step": 14220 }, { "epoch": 4.01, "learning_rate": 1.996810570680032e-06, "loss": 0.3033, "step": 14230 }, { "epoch": 4.01, "learning_rate": 1.991115161180089e-06, "loss": 0.3251, "step": 14240 }, { "epoch": 4.02, "learning_rate": 1.985419751680146e-06, "loss": 0.3458, "step": 14250 }, { "epoch": 4.02, "learning_rate": 1.9797243421802027e-06, "loss": 0.1492, "step": 14260 }, { "epoch": 4.02, "learning_rate": 1.9740289326802598e-06, "loss": 0.2058, "step": 14270 }, { "epoch": 4.03, "learning_rate": 1.968333523180317e-06, "loss": 0.3785, "step": 14280 }, { "epoch": 4.03, "learning_rate": 1.962638113680374e-06, "loss": 0.3519, "step": 14290 }, { "epoch": 4.03, "learning_rate": 1.9569427041804305e-06, "loss": 0.4122, "step": 14300 }, { "epoch": 4.03, "learning_rate": 1.951247294680488e-06, "loss": 0.1842, "step": 14310 }, { "epoch": 4.04, "learning_rate": 1.9455518851805446e-06, "loss": 0.3497, "step": 14320 }, { "epoch": 4.04, "learning_rate": 1.9398564756806016e-06, "loss": 0.258, "step": 14330 }, { "epoch": 4.04, "learning_rate": 1.9341610661806586e-06, "loss": 0.2991, "step": 14340 }, { "epoch": 4.05, "learning_rate": 1.9284656566807157e-06, "loss": 0.3041, "step": 14350 }, { "epoch": 4.05, "learning_rate": 1.9227702471807723e-06, "loss": 0.3762, "step": 14360 }, { "epoch": 4.05, "learning_rate": 1.9170748376808293e-06, "loss": 0.2267, "step": 14370 }, { "epoch": 4.05, "learning_rate": 1.9113794281808864e-06, "loss": 0.3088, "step": 14380 }, { "epoch": 4.06, "learning_rate": 1.9056840186809434e-06, "loss": 0.163, "step": 14390 }, { "epoch": 4.06, "learning_rate": 1.8999886091810002e-06, "loss": 0.134, "step": 14400 }, { "epoch": 4.06, "learning_rate": 1.8942931996810573e-06, "loss": 0.3025, "step": 14410 }, { "epoch": 4.07, "learning_rate": 1.8885977901811143e-06, "loss": 0.3232, "step": 14420 }, { "epoch": 4.07, "learning_rate": 1.8829023806811711e-06, "loss": 0.4234, "step": 14430 }, { "epoch": 4.07, "learning_rate": 1.877206971181228e-06, "loss": 0.2133, "step": 14440 }, { "epoch": 4.07, "learning_rate": 1.871511561681285e-06, "loss": 0.5324, "step": 14450 }, { "epoch": 4.08, "learning_rate": 1.865816152181342e-06, "loss": 0.6641, "step": 14460 }, { "epoch": 4.08, "learning_rate": 1.8601207426813989e-06, "loss": 0.2977, "step": 14470 }, { "epoch": 4.08, "learning_rate": 1.854425333181456e-06, "loss": 0.3066, "step": 14480 }, { "epoch": 4.09, "learning_rate": 1.848729923681513e-06, "loss": 0.2809, "step": 14490 }, { "epoch": 4.09, "learning_rate": 1.8430345141815698e-06, "loss": 0.2115, "step": 14500 }, { "epoch": 4.09, "eval_loss": 0.35345104336738586, "eval_runtime": 211.5013, "eval_samples_per_second": 9.555, "eval_steps_per_second": 2.392, "step": 14500 }, { "epoch": 4.09, "learning_rate": 1.8373391046816266e-06, "loss": 0.4939, "step": 14510 }, { "epoch": 4.09, "learning_rate": 1.8316436951816839e-06, "loss": 0.2237, "step": 14520 }, { "epoch": 4.1, "learning_rate": 1.8259482856817407e-06, "loss": 0.0826, "step": 14530 }, { "epoch": 4.1, "learning_rate": 1.8202528761817975e-06, "loss": 0.2169, "step": 14540 }, { "epoch": 4.1, "learning_rate": 1.8145574666818546e-06, "loss": 0.6057, "step": 14550 }, { "epoch": 4.1, "learning_rate": 1.8088620571819116e-06, "loss": 0.2384, "step": 14560 }, { "epoch": 4.11, "learning_rate": 1.8031666476819684e-06, "loss": 0.2647, "step": 14570 }, { "epoch": 4.11, "learning_rate": 1.7974712381820255e-06, "loss": 0.2658, "step": 14580 }, { "epoch": 4.11, "learning_rate": 1.7917758286820825e-06, "loss": 0.2966, "step": 14590 }, { "epoch": 4.12, "learning_rate": 1.7860804191821393e-06, "loss": 0.3193, "step": 14600 }, { "epoch": 4.12, "learning_rate": 1.7803850096821962e-06, "loss": 0.3529, "step": 14610 }, { "epoch": 4.12, "learning_rate": 1.7746896001822534e-06, "loss": 0.3283, "step": 14620 }, { "epoch": 4.12, "learning_rate": 1.7689941906823103e-06, "loss": 0.2698, "step": 14630 }, { "epoch": 4.13, "learning_rate": 1.763298781182367e-06, "loss": 0.3715, "step": 14640 }, { "epoch": 4.13, "learning_rate": 1.7576033716824241e-06, "loss": 0.3999, "step": 14650 }, { "epoch": 4.13, "learning_rate": 1.7519079621824812e-06, "loss": 0.3378, "step": 14660 }, { "epoch": 4.14, "learning_rate": 1.746212552682538e-06, "loss": 0.215, "step": 14670 }, { "epoch": 4.14, "learning_rate": 1.7405171431825948e-06, "loss": 0.3081, "step": 14680 }, { "epoch": 4.14, "learning_rate": 1.734821733682652e-06, "loss": 0.2634, "step": 14690 }, { "epoch": 4.14, "learning_rate": 1.729126324182709e-06, "loss": 0.4724, "step": 14700 }, { "epoch": 4.15, "learning_rate": 1.7234309146827657e-06, "loss": 0.294, "step": 14710 }, { "epoch": 4.15, "learning_rate": 1.717735505182823e-06, "loss": 0.2399, "step": 14720 }, { "epoch": 4.15, "learning_rate": 1.7120400956828798e-06, "loss": 0.2102, "step": 14730 }, { "epoch": 4.16, "learning_rate": 1.7063446861829366e-06, "loss": 0.1503, "step": 14740 }, { "epoch": 4.16, "learning_rate": 1.7006492766829935e-06, "loss": 0.1694, "step": 14750 }, { "epoch": 4.16, "learning_rate": 1.6949538671830507e-06, "loss": 0.4448, "step": 14760 }, { "epoch": 4.16, "learning_rate": 1.6892584576831075e-06, "loss": 0.1987, "step": 14770 }, { "epoch": 4.17, "learning_rate": 1.6835630481831644e-06, "loss": 0.3097, "step": 14780 }, { "epoch": 4.17, "learning_rate": 1.6778676386832216e-06, "loss": 0.2567, "step": 14790 }, { "epoch": 4.17, "learning_rate": 1.6721722291832785e-06, "loss": 0.2016, "step": 14800 }, { "epoch": 4.18, "learning_rate": 1.6664768196833353e-06, "loss": 0.173, "step": 14810 }, { "epoch": 4.18, "learning_rate": 1.6607814101833925e-06, "loss": 0.2601, "step": 14820 }, { "epoch": 4.18, "learning_rate": 1.6550860006834494e-06, "loss": 0.2977, "step": 14830 }, { "epoch": 4.18, "learning_rate": 1.6493905911835062e-06, "loss": 0.2837, "step": 14840 }, { "epoch": 4.19, "learning_rate": 1.643695181683563e-06, "loss": 0.3155, "step": 14850 }, { "epoch": 4.19, "learning_rate": 1.6379997721836203e-06, "loss": 0.2774, "step": 14860 }, { "epoch": 4.19, "learning_rate": 1.632304362683677e-06, "loss": 0.0978, "step": 14870 }, { "epoch": 4.2, "learning_rate": 1.626608953183734e-06, "loss": 0.4131, "step": 14880 }, { "epoch": 4.2, "learning_rate": 1.6209135436837912e-06, "loss": 0.3686, "step": 14890 }, { "epoch": 4.2, "learning_rate": 1.615218134183848e-06, "loss": 0.0879, "step": 14900 }, { "epoch": 4.2, "learning_rate": 1.6095227246839048e-06, "loss": 0.401, "step": 14910 }, { "epoch": 4.21, "learning_rate": 1.6038273151839617e-06, "loss": 0.2656, "step": 14920 }, { "epoch": 4.21, "learning_rate": 1.598131905684019e-06, "loss": 0.3608, "step": 14930 }, { "epoch": 4.21, "learning_rate": 1.5924364961840757e-06, "loss": 0.3265, "step": 14940 }, { "epoch": 4.21, "learning_rate": 1.5867410866841326e-06, "loss": 0.372, "step": 14950 }, { "epoch": 4.22, "learning_rate": 1.5810456771841898e-06, "loss": 0.302, "step": 14960 }, { "epoch": 4.22, "learning_rate": 1.5753502676842467e-06, "loss": 0.2329, "step": 14970 }, { "epoch": 4.22, "learning_rate": 1.5696548581843035e-06, "loss": 0.1317, "step": 14980 }, { "epoch": 4.23, "learning_rate": 1.5639594486843607e-06, "loss": 0.5018, "step": 14990 }, { "epoch": 4.23, "learning_rate": 1.5582640391844176e-06, "loss": 0.4088, "step": 15000 }, { "epoch": 4.23, "eval_loss": 0.3560781478881836, "eval_runtime": 211.6609, "eval_samples_per_second": 9.548, "eval_steps_per_second": 2.391, "step": 15000 }, { "epoch": 4.23, "learning_rate": 1.5525686296844744e-06, "loss": 0.3594, "step": 15010 }, { "epoch": 4.23, "learning_rate": 1.5468732201845312e-06, "loss": 0.1288, "step": 15020 }, { "epoch": 4.24, "learning_rate": 1.5411778106845885e-06, "loss": 0.1063, "step": 15030 }, { "epoch": 4.24, "learning_rate": 1.5354824011846453e-06, "loss": 0.3444, "step": 15040 }, { "epoch": 4.24, "learning_rate": 1.5297869916847021e-06, "loss": 0.393, "step": 15050 }, { "epoch": 4.25, "learning_rate": 1.5240915821847594e-06, "loss": 0.3674, "step": 15060 }, { "epoch": 4.25, "learning_rate": 1.5183961726848162e-06, "loss": 0.2209, "step": 15070 }, { "epoch": 4.25, "learning_rate": 1.512700763184873e-06, "loss": 0.2758, "step": 15080 }, { "epoch": 4.25, "learning_rate": 1.5070053536849299e-06, "loss": 0.3258, "step": 15090 }, { "epoch": 4.26, "learning_rate": 1.5013099441849871e-06, "loss": 0.5878, "step": 15100 }, { "epoch": 4.26, "learning_rate": 1.495614534685044e-06, "loss": 0.2565, "step": 15110 }, { "epoch": 4.26, "learning_rate": 1.4899191251851008e-06, "loss": 0.1671, "step": 15120 }, { "epoch": 4.27, "learning_rate": 1.484223715685158e-06, "loss": 0.3276, "step": 15130 }, { "epoch": 4.27, "learning_rate": 1.4785283061852149e-06, "loss": 0.5138, "step": 15140 }, { "epoch": 4.27, "learning_rate": 1.4728328966852717e-06, "loss": 0.4189, "step": 15150 }, { "epoch": 4.27, "learning_rate": 1.4671374871853285e-06, "loss": 0.1441, "step": 15160 }, { "epoch": 4.28, "learning_rate": 1.4614420776853858e-06, "loss": 0.4461, "step": 15170 }, { "epoch": 4.28, "learning_rate": 1.4557466681854426e-06, "loss": 0.2924, "step": 15180 }, { "epoch": 4.28, "learning_rate": 1.4500512586854994e-06, "loss": 0.2742, "step": 15190 }, { "epoch": 4.29, "learning_rate": 1.4443558491855567e-06, "loss": 0.5659, "step": 15200 }, { "epoch": 4.29, "learning_rate": 1.4386604396856135e-06, "loss": 0.3907, "step": 15210 }, { "epoch": 4.29, "learning_rate": 1.4329650301856703e-06, "loss": 0.3276, "step": 15220 }, { "epoch": 4.29, "learning_rate": 1.4272696206857276e-06, "loss": 0.1779, "step": 15230 }, { "epoch": 4.3, "learning_rate": 1.4215742111857844e-06, "loss": 0.2407, "step": 15240 }, { "epoch": 4.3, "learning_rate": 1.4158788016858412e-06, "loss": 0.5414, "step": 15250 }, { "epoch": 4.3, "learning_rate": 1.410183392185898e-06, "loss": 0.213, "step": 15260 }, { "epoch": 4.31, "learning_rate": 1.4044879826859553e-06, "loss": 0.3669, "step": 15270 }, { "epoch": 4.31, "learning_rate": 1.3987925731860122e-06, "loss": 0.4115, "step": 15280 }, { "epoch": 4.31, "learning_rate": 1.393097163686069e-06, "loss": 0.352, "step": 15290 }, { "epoch": 4.31, "learning_rate": 1.3874017541861262e-06, "loss": 0.5727, "step": 15300 }, { "epoch": 4.32, "learning_rate": 1.381706344686183e-06, "loss": 0.3396, "step": 15310 }, { "epoch": 4.32, "learning_rate": 1.3760109351862399e-06, "loss": 0.2974, "step": 15320 }, { "epoch": 4.32, "learning_rate": 1.370315525686297e-06, "loss": 0.3642, "step": 15330 }, { "epoch": 4.32, "learning_rate": 1.364620116186354e-06, "loss": 0.2653, "step": 15340 }, { "epoch": 4.33, "learning_rate": 1.3589247066864108e-06, "loss": 0.3407, "step": 15350 }, { "epoch": 4.33, "learning_rate": 1.3532292971864676e-06, "loss": 0.3, "step": 15360 }, { "epoch": 4.33, "learning_rate": 1.3475338876865249e-06, "loss": 0.2095, "step": 15370 }, { "epoch": 4.34, "learning_rate": 1.3418384781865817e-06, "loss": 0.2906, "step": 15380 }, { "epoch": 4.34, "learning_rate": 1.3361430686866385e-06, "loss": 0.263, "step": 15390 }, { "epoch": 4.34, "learning_rate": 1.3304476591866956e-06, "loss": 0.2987, "step": 15400 }, { "epoch": 4.34, "learning_rate": 1.3247522496867526e-06, "loss": 0.3188, "step": 15410 }, { "epoch": 4.35, "learning_rate": 1.3190568401868094e-06, "loss": 0.6636, "step": 15420 }, { "epoch": 4.35, "learning_rate": 1.3133614306868665e-06, "loss": 0.4354, "step": 15430 }, { "epoch": 4.35, "learning_rate": 1.3076660211869235e-06, "loss": 0.2682, "step": 15440 }, { "epoch": 4.36, "learning_rate": 1.3019706116869804e-06, "loss": 0.4007, "step": 15450 }, { "epoch": 4.36, "learning_rate": 1.2962752021870372e-06, "loss": 0.4228, "step": 15460 }, { "epoch": 4.36, "learning_rate": 1.2905797926870944e-06, "loss": 0.2764, "step": 15470 }, { "epoch": 4.36, "learning_rate": 1.2848843831871513e-06, "loss": 0.1821, "step": 15480 }, { "epoch": 4.37, "learning_rate": 1.279188973687208e-06, "loss": 0.1724, "step": 15490 }, { "epoch": 4.37, "learning_rate": 1.2734935641872651e-06, "loss": 0.502, "step": 15500 }, { "epoch": 4.37, "eval_loss": 0.3557915985584259, "eval_runtime": 212.001, "eval_samples_per_second": 9.533, "eval_steps_per_second": 2.387, "step": 15500 }, { "epoch": 4.37, "learning_rate": 1.2677981546873222e-06, "loss": 0.4079, "step": 15510 }, { "epoch": 4.38, "learning_rate": 1.262102745187379e-06, "loss": 0.3792, "step": 15520 }, { "epoch": 4.38, "learning_rate": 1.256407335687436e-06, "loss": 0.2857, "step": 15530 }, { "epoch": 4.38, "learning_rate": 1.250711926187493e-06, "loss": 0.2627, "step": 15540 }, { "epoch": 4.38, "learning_rate": 1.24501651668755e-06, "loss": 0.3787, "step": 15550 }, { "epoch": 4.39, "learning_rate": 1.239321107187607e-06, "loss": 0.4017, "step": 15560 }, { "epoch": 4.39, "learning_rate": 1.2336256976876638e-06, "loss": 0.3776, "step": 15570 }, { "epoch": 4.39, "learning_rate": 1.2279302881877208e-06, "loss": 0.3595, "step": 15580 }, { "epoch": 4.4, "learning_rate": 1.2222348786877776e-06, "loss": 0.2288, "step": 15590 }, { "epoch": 4.4, "learning_rate": 1.2165394691878347e-06, "loss": 0.3921, "step": 15600 }, { "epoch": 4.4, "learning_rate": 1.2108440596878917e-06, "loss": 0.3526, "step": 15610 }, { "epoch": 4.4, "learning_rate": 1.2051486501879486e-06, "loss": 0.2754, "step": 15620 }, { "epoch": 4.41, "learning_rate": 1.1994532406880056e-06, "loss": 0.347, "step": 15630 }, { "epoch": 4.41, "learning_rate": 1.1937578311880624e-06, "loss": 0.2734, "step": 15640 }, { "epoch": 4.41, "learning_rate": 1.1880624216881195e-06, "loss": 0.3327, "step": 15650 }, { "epoch": 4.41, "learning_rate": 1.1823670121881765e-06, "loss": 0.3287, "step": 15660 }, { "epoch": 4.42, "learning_rate": 1.1766716026882333e-06, "loss": 0.3132, "step": 15670 }, { "epoch": 4.42, "learning_rate": 1.1709761931882904e-06, "loss": 0.4023, "step": 15680 }, { "epoch": 4.42, "learning_rate": 1.1652807836883472e-06, "loss": 0.3814, "step": 15690 }, { "epoch": 4.43, "learning_rate": 1.1595853741884042e-06, "loss": 0.4485, "step": 15700 }, { "epoch": 4.43, "learning_rate": 1.1538899646884613e-06, "loss": 0.286, "step": 15710 }, { "epoch": 4.43, "learning_rate": 1.1481945551885181e-06, "loss": 0.6182, "step": 15720 }, { "epoch": 4.43, "learning_rate": 1.1424991456885751e-06, "loss": 0.4019, "step": 15730 }, { "epoch": 4.44, "learning_rate": 1.136803736188632e-06, "loss": 0.3184, "step": 15740 }, { "epoch": 4.44, "learning_rate": 1.131108326688689e-06, "loss": 0.4947, "step": 15750 }, { "epoch": 4.44, "learning_rate": 1.125412917188746e-06, "loss": 0.4203, "step": 15760 }, { "epoch": 4.45, "learning_rate": 1.1197175076888029e-06, "loss": 0.1995, "step": 15770 }, { "epoch": 4.45, "learning_rate": 1.11402209818886e-06, "loss": 0.2659, "step": 15780 }, { "epoch": 4.45, "learning_rate": 1.1083266886889168e-06, "loss": 0.3046, "step": 15790 }, { "epoch": 4.45, "learning_rate": 1.1026312791889738e-06, "loss": 0.1809, "step": 15800 }, { "epoch": 4.46, "learning_rate": 1.0969358696890308e-06, "loss": 0.2639, "step": 15810 }, { "epoch": 4.46, "learning_rate": 1.0912404601890877e-06, "loss": 0.272, "step": 15820 }, { "epoch": 4.46, "learning_rate": 1.0855450506891447e-06, "loss": 0.6332, "step": 15830 }, { "epoch": 4.47, "learning_rate": 1.0798496411892015e-06, "loss": 0.3555, "step": 15840 }, { "epoch": 4.47, "learning_rate": 1.0741542316892586e-06, "loss": 0.271, "step": 15850 }, { "epoch": 4.47, "learning_rate": 1.0684588221893156e-06, "loss": 0.1772, "step": 15860 }, { "epoch": 4.47, "learning_rate": 1.0627634126893724e-06, "loss": 0.4972, "step": 15870 }, { "epoch": 4.48, "learning_rate": 1.0570680031894295e-06, "loss": 0.3808, "step": 15880 }, { "epoch": 4.48, "learning_rate": 1.0513725936894863e-06, "loss": 0.2823, "step": 15890 }, { "epoch": 4.48, "learning_rate": 1.0456771841895433e-06, "loss": 0.3016, "step": 15900 }, { "epoch": 4.49, "learning_rate": 1.0399817746896004e-06, "loss": 0.4662, "step": 15910 }, { "epoch": 4.49, "learning_rate": 1.0342863651896572e-06, "loss": 0.3594, "step": 15920 }, { "epoch": 4.49, "learning_rate": 1.0285909556897143e-06, "loss": 0.2838, "step": 15930 }, { "epoch": 4.49, "learning_rate": 1.022895546189771e-06, "loss": 0.3706, "step": 15940 }, { "epoch": 4.5, "learning_rate": 1.0172001366898281e-06, "loss": 0.4332, "step": 15950 }, { "epoch": 4.5, "learning_rate": 1.0115047271898852e-06, "loss": 0.2483, "step": 15960 }, { "epoch": 4.5, "learning_rate": 1.005809317689942e-06, "loss": 0.2208, "step": 15970 }, { "epoch": 4.51, "learning_rate": 1.000113908189999e-06, "loss": 0.3334, "step": 15980 }, { "epoch": 4.51, "learning_rate": 9.944184986900559e-07, "loss": 0.298, "step": 15990 }, { "epoch": 4.51, "learning_rate": 9.88723089190113e-07, "loss": 0.3099, "step": 16000 }, { "epoch": 4.51, "eval_loss": 0.3557519316673279, "eval_runtime": 212.3782, "eval_samples_per_second": 9.516, "eval_steps_per_second": 2.383, "step": 16000 }, { "epoch": 4.51, "learning_rate": 9.8302767969017e-07, "loss": 0.4163, "step": 16010 }, { "epoch": 4.52, "learning_rate": 9.773322701902268e-07, "loss": 0.1947, "step": 16020 }, { "epoch": 4.52, "learning_rate": 9.716368606902838e-07, "loss": 0.3316, "step": 16030 }, { "epoch": 4.52, "learning_rate": 9.659414511903406e-07, "loss": 0.1959, "step": 16040 }, { "epoch": 4.52, "learning_rate": 9.602460416903977e-07, "loss": 0.2092, "step": 16050 }, { "epoch": 4.53, "learning_rate": 9.545506321904547e-07, "loss": 0.1516, "step": 16060 }, { "epoch": 4.53, "learning_rate": 9.488552226905115e-07, "loss": 0.5363, "step": 16070 }, { "epoch": 4.53, "learning_rate": 9.431598131905685e-07, "loss": 0.3025, "step": 16080 }, { "epoch": 4.54, "learning_rate": 9.374644036906254e-07, "loss": 0.4779, "step": 16090 }, { "epoch": 4.54, "learning_rate": 9.317689941906824e-07, "loss": 0.277, "step": 16100 }, { "epoch": 4.54, "learning_rate": 9.260735846907394e-07, "loss": 0.1441, "step": 16110 }, { "epoch": 4.54, "learning_rate": 9.203781751907963e-07, "loss": 0.2157, "step": 16120 }, { "epoch": 4.55, "learning_rate": 9.146827656908533e-07, "loss": 0.2452, "step": 16130 }, { "epoch": 4.55, "learning_rate": 9.089873561909102e-07, "loss": 0.4405, "step": 16140 }, { "epoch": 4.55, "learning_rate": 9.032919466909671e-07, "loss": 0.4546, "step": 16150 }, { "epoch": 4.56, "learning_rate": 8.975965371910241e-07, "loss": 0.4222, "step": 16160 }, { "epoch": 4.56, "learning_rate": 8.919011276910811e-07, "loss": 0.4103, "step": 16170 }, { "epoch": 4.56, "learning_rate": 8.86205718191138e-07, "loss": 0.522, "step": 16180 }, { "epoch": 4.56, "learning_rate": 8.80510308691195e-07, "loss": 0.4269, "step": 16190 }, { "epoch": 4.57, "learning_rate": 8.748148991912519e-07, "loss": 0.3538, "step": 16200 }, { "epoch": 4.57, "learning_rate": 8.691194896913088e-07, "loss": 0.3338, "step": 16210 }, { "epoch": 4.57, "learning_rate": 8.634240801913659e-07, "loss": 0.2906, "step": 16220 }, { "epoch": 4.58, "learning_rate": 8.577286706914228e-07, "loss": 0.3005, "step": 16230 }, { "epoch": 4.58, "learning_rate": 8.520332611914797e-07, "loss": 0.5027, "step": 16240 }, { "epoch": 4.58, "learning_rate": 8.463378516915367e-07, "loss": 0.255, "step": 16250 }, { "epoch": 4.58, "learning_rate": 8.406424421915936e-07, "loss": 0.1749, "step": 16260 }, { "epoch": 4.59, "learning_rate": 8.349470326916507e-07, "loss": 0.2787, "step": 16270 }, { "epoch": 4.59, "learning_rate": 8.292516231917075e-07, "loss": 0.2366, "step": 16280 }, { "epoch": 4.59, "learning_rate": 8.235562136917645e-07, "loss": 0.2687, "step": 16290 }, { "epoch": 4.6, "learning_rate": 8.178608041918215e-07, "loss": 0.4187, "step": 16300 }, { "epoch": 4.6, "learning_rate": 8.121653946918784e-07, "loss": 0.1763, "step": 16310 }, { "epoch": 4.6, "learning_rate": 8.064699851919354e-07, "loss": 0.377, "step": 16320 }, { "epoch": 4.6, "learning_rate": 8.007745756919923e-07, "loss": 0.5291, "step": 16330 }, { "epoch": 4.61, "learning_rate": 7.950791661920493e-07, "loss": 0.3513, "step": 16340 }, { "epoch": 4.61, "learning_rate": 7.893837566921062e-07, "loss": 0.2492, "step": 16350 }, { "epoch": 4.61, "learning_rate": 7.836883471921632e-07, "loss": 0.4625, "step": 16360 }, { "epoch": 4.62, "learning_rate": 7.779929376922202e-07, "loss": 0.2957, "step": 16370 }, { "epoch": 4.62, "learning_rate": 7.72297528192277e-07, "loss": 0.3181, "step": 16380 }, { "epoch": 4.62, "learning_rate": 7.666021186923341e-07, "loss": 0.1754, "step": 16390 }, { "epoch": 4.62, "learning_rate": 7.609067091923909e-07, "loss": 0.4674, "step": 16400 }, { "epoch": 4.63, "learning_rate": 7.55211299692448e-07, "loss": 0.3187, "step": 16410 }, { "epoch": 4.63, "learning_rate": 7.49515890192505e-07, "loss": 0.5942, "step": 16420 }, { "epoch": 4.63, "learning_rate": 7.438204806925618e-07, "loss": 0.3195, "step": 16430 }, { "epoch": 4.63, "learning_rate": 7.381250711926189e-07, "loss": 0.5229, "step": 16440 }, { "epoch": 4.64, "learning_rate": 7.324296616926757e-07, "loss": 0.4654, "step": 16450 }, { "epoch": 4.64, "learning_rate": 7.267342521927327e-07, "loss": 0.4792, "step": 16460 }, { "epoch": 4.64, "learning_rate": 7.210388426927898e-07, "loss": 0.2923, "step": 16470 }, { "epoch": 4.65, "learning_rate": 7.153434331928466e-07, "loss": 0.2751, "step": 16480 }, { "epoch": 4.65, "learning_rate": 7.096480236929036e-07, "loss": 0.1897, "step": 16490 }, { "epoch": 4.65, "learning_rate": 7.039526141929605e-07, "loss": 0.2381, "step": 16500 }, { "epoch": 4.65, "eval_loss": 0.3569630980491638, "eval_runtime": 212.316, "eval_samples_per_second": 9.519, "eval_steps_per_second": 2.383, "step": 16500 }, { "epoch": 4.65, "learning_rate": 6.982572046930175e-07, "loss": 0.4211, "step": 16510 }, { "epoch": 4.66, "learning_rate": 6.925617951930743e-07, "loss": 0.2632, "step": 16520 }, { "epoch": 4.66, "learning_rate": 6.868663856931314e-07, "loss": 0.3168, "step": 16530 }, { "epoch": 4.66, "learning_rate": 6.811709761931884e-07, "loss": 0.1798, "step": 16540 }, { "epoch": 4.67, "learning_rate": 6.754755666932452e-07, "loss": 0.2793, "step": 16550 }, { "epoch": 4.67, "learning_rate": 6.697801571933023e-07, "loss": 0.2896, "step": 16560 }, { "epoch": 4.67, "learning_rate": 6.640847476933591e-07, "loss": 0.4018, "step": 16570 }, { "epoch": 4.67, "learning_rate": 6.583893381934162e-07, "loss": 0.4488, "step": 16580 }, { "epoch": 4.68, "learning_rate": 6.526939286934732e-07, "loss": 0.1678, "step": 16590 }, { "epoch": 4.68, "learning_rate": 6.4699851919353e-07, "loss": 0.2788, "step": 16600 }, { "epoch": 4.68, "learning_rate": 6.413031096935871e-07, "loss": 0.2512, "step": 16610 }, { "epoch": 4.69, "learning_rate": 6.356077001936439e-07, "loss": 0.297, "step": 16620 }, { "epoch": 4.69, "learning_rate": 6.299122906937009e-07, "loss": 0.1882, "step": 16630 }, { "epoch": 4.69, "learning_rate": 6.242168811937579e-07, "loss": 0.4612, "step": 16640 }, { "epoch": 4.69, "learning_rate": 6.185214716938148e-07, "loss": 0.3111, "step": 16650 }, { "epoch": 4.7, "learning_rate": 6.128260621938717e-07, "loss": 0.3121, "step": 16660 }, { "epoch": 4.7, "learning_rate": 6.071306526939288e-07, "loss": 0.2923, "step": 16670 }, { "epoch": 4.7, "learning_rate": 6.014352431939857e-07, "loss": 0.3701, "step": 16680 }, { "epoch": 4.71, "learning_rate": 5.957398336940426e-07, "loss": 0.2431, "step": 16690 }, { "epoch": 4.71, "learning_rate": 5.900444241940996e-07, "loss": 0.3444, "step": 16700 }, { "epoch": 4.71, "learning_rate": 5.843490146941565e-07, "loss": 0.2769, "step": 16710 }, { "epoch": 4.71, "learning_rate": 5.786536051942134e-07, "loss": 0.1501, "step": 16720 }, { "epoch": 4.72, "learning_rate": 5.729581956942705e-07, "loss": 0.3458, "step": 16730 }, { "epoch": 4.72, "learning_rate": 5.672627861943274e-07, "loss": 0.3527, "step": 16740 }, { "epoch": 4.72, "learning_rate": 5.615673766943844e-07, "loss": 0.3205, "step": 16750 }, { "epoch": 4.73, "learning_rate": 5.558719671944413e-07, "loss": 0.2322, "step": 16760 }, { "epoch": 4.73, "learning_rate": 5.501765576944982e-07, "loss": 0.583, "step": 16770 }, { "epoch": 4.73, "learning_rate": 5.444811481945552e-07, "loss": 0.3078, "step": 16780 }, { "epoch": 4.73, "learning_rate": 5.387857386946122e-07, "loss": 0.2509, "step": 16790 }, { "epoch": 4.74, "learning_rate": 5.330903291946691e-07, "loss": 0.2672, "step": 16800 }, { "epoch": 4.74, "learning_rate": 5.273949196947261e-07, "loss": 0.4086, "step": 16810 }, { "epoch": 4.74, "learning_rate": 5.21699510194783e-07, "loss": 0.4899, "step": 16820 }, { "epoch": 4.74, "learning_rate": 5.1600410069484e-07, "loss": 0.2831, "step": 16830 }, { "epoch": 4.75, "learning_rate": 5.10308691194897e-07, "loss": 0.3935, "step": 16840 }, { "epoch": 4.75, "learning_rate": 5.046132816949539e-07, "loss": 0.3414, "step": 16850 }, { "epoch": 4.75, "learning_rate": 4.989178721950108e-07, "loss": 0.2679, "step": 16860 }, { "epoch": 4.76, "learning_rate": 4.932224626950678e-07, "loss": 0.313, "step": 16870 }, { "epoch": 4.76, "learning_rate": 4.875270531951248e-07, "loss": 0.3272, "step": 16880 }, { "epoch": 4.76, "learning_rate": 4.818316436951817e-07, "loss": 0.321, "step": 16890 }, { "epoch": 4.76, "learning_rate": 4.7613623419523863e-07, "loss": 0.4763, "step": 16900 }, { "epoch": 4.77, "learning_rate": 4.7044082469529567e-07, "loss": 0.2577, "step": 16910 }, { "epoch": 4.77, "learning_rate": 4.647454151953526e-07, "loss": 0.2298, "step": 16920 }, { "epoch": 4.77, "learning_rate": 4.5905000569540954e-07, "loss": 0.3451, "step": 16930 }, { "epoch": 4.78, "learning_rate": 4.533545961954665e-07, "loss": 0.2389, "step": 16940 }, { "epoch": 4.78, "learning_rate": 4.476591866955234e-07, "loss": 0.3936, "step": 16950 }, { "epoch": 4.78, "learning_rate": 4.4196377719558034e-07, "loss": 0.3509, "step": 16960 }, { "epoch": 4.78, "learning_rate": 4.362683676956374e-07, "loss": 0.2643, "step": 16970 }, { "epoch": 4.79, "learning_rate": 4.305729581956943e-07, "loss": 0.541, "step": 16980 }, { "epoch": 4.79, "learning_rate": 4.2487754869575125e-07, "loss": 0.1942, "step": 16990 }, { "epoch": 4.79, "learning_rate": 4.191821391958082e-07, "loss": 0.3614, "step": 17000 }, { "epoch": 4.79, "eval_loss": 0.3553633391857147, "eval_runtime": 211.9344, "eval_samples_per_second": 9.536, "eval_steps_per_second": 2.388, "step": 17000 }, { "epoch": 4.8, "learning_rate": 4.1348672969586517e-07, "loss": 0.3707, "step": 17010 }, { "epoch": 4.8, "learning_rate": 4.077913201959221e-07, "loss": 0.3282, "step": 17020 }, { "epoch": 4.8, "learning_rate": 4.020959106959791e-07, "loss": 0.51, "step": 17030 }, { "epoch": 4.8, "learning_rate": 3.9640050119603603e-07, "loss": 0.1609, "step": 17040 }, { "epoch": 4.81, "learning_rate": 3.9070509169609296e-07, "loss": 0.3832, "step": 17050 }, { "epoch": 4.81, "learning_rate": 3.8500968219614995e-07, "loss": 0.2903, "step": 17060 }, { "epoch": 4.81, "learning_rate": 3.793142726962069e-07, "loss": 0.2686, "step": 17070 }, { "epoch": 4.82, "learning_rate": 3.736188631962638e-07, "loss": 0.2917, "step": 17080 }, { "epoch": 4.82, "learning_rate": 3.679234536963208e-07, "loss": 0.265, "step": 17090 }, { "epoch": 4.82, "learning_rate": 3.6222804419637774e-07, "loss": 0.3747, "step": 17100 }, { "epoch": 4.82, "learning_rate": 3.5653263469643473e-07, "loss": 0.5041, "step": 17110 }, { "epoch": 4.83, "learning_rate": 3.5083722519649166e-07, "loss": 0.2131, "step": 17120 }, { "epoch": 4.83, "learning_rate": 3.451418156965486e-07, "loss": 0.346, "step": 17130 }, { "epoch": 4.83, "learning_rate": 3.3944640619660553e-07, "loss": 0.1843, "step": 17140 }, { "epoch": 4.84, "learning_rate": 3.337509966966625e-07, "loss": 0.4283, "step": 17150 }, { "epoch": 4.84, "learning_rate": 3.280555871967195e-07, "loss": 0.4339, "step": 17160 }, { "epoch": 4.84, "learning_rate": 3.2236017769677644e-07, "loss": 0.2597, "step": 17170 }, { "epoch": 4.84, "learning_rate": 3.1666476819683337e-07, "loss": 0.3388, "step": 17180 }, { "epoch": 4.85, "learning_rate": 3.1096935869689036e-07, "loss": 0.4598, "step": 17190 }, { "epoch": 4.85, "learning_rate": 3.052739491969473e-07, "loss": 0.3224, "step": 17200 }, { "epoch": 4.85, "learning_rate": 2.9957853969700423e-07, "loss": 0.3763, "step": 17210 }, { "epoch": 4.85, "learning_rate": 2.938831301970612e-07, "loss": 0.3208, "step": 17220 }, { "epoch": 4.86, "learning_rate": 2.8818772069711815e-07, "loss": 0.1648, "step": 17230 }, { "epoch": 4.86, "learning_rate": 2.824923111971751e-07, "loss": 0.2525, "step": 17240 }, { "epoch": 4.86, "learning_rate": 2.7679690169723207e-07, "loss": 0.5624, "step": 17250 }, { "epoch": 4.87, "learning_rate": 2.71101492197289e-07, "loss": 0.4129, "step": 17260 }, { "epoch": 4.87, "learning_rate": 2.6540608269734594e-07, "loss": 0.2936, "step": 17270 }, { "epoch": 4.87, "learning_rate": 2.5971067319740293e-07, "loss": 0.1014, "step": 17280 }, { "epoch": 4.87, "learning_rate": 2.5401526369745986e-07, "loss": 0.5215, "step": 17290 }, { "epoch": 4.88, "learning_rate": 2.483198541975168e-07, "loss": 0.3502, "step": 17300 }, { "epoch": 4.88, "learning_rate": 2.426244446975738e-07, "loss": 0.4451, "step": 17310 }, { "epoch": 4.88, "learning_rate": 2.3692903519763072e-07, "loss": 0.3947, "step": 17320 }, { "epoch": 4.89, "learning_rate": 2.3123362569768768e-07, "loss": 0.3667, "step": 17330 }, { "epoch": 4.89, "learning_rate": 2.2553821619774464e-07, "loss": 0.3662, "step": 17340 }, { "epoch": 4.89, "learning_rate": 2.1984280669780157e-07, "loss": 0.2084, "step": 17350 }, { "epoch": 4.89, "learning_rate": 2.1414739719785853e-07, "loss": 0.5334, "step": 17360 }, { "epoch": 4.9, "learning_rate": 2.084519876979155e-07, "loss": 0.3224, "step": 17370 }, { "epoch": 4.9, "learning_rate": 2.0275657819797246e-07, "loss": 0.2063, "step": 17380 }, { "epoch": 4.9, "learning_rate": 1.970611686980294e-07, "loss": 0.2368, "step": 17390 }, { "epoch": 4.91, "learning_rate": 1.9136575919808638e-07, "loss": 0.3701, "step": 17400 }, { "epoch": 4.91, "learning_rate": 1.856703496981433e-07, "loss": 0.2256, "step": 17410 }, { "epoch": 4.91, "learning_rate": 1.7997494019820025e-07, "loss": 0.2116, "step": 17420 }, { "epoch": 4.91, "learning_rate": 1.7427953069825723e-07, "loss": 0.4188, "step": 17430 }, { "epoch": 4.92, "learning_rate": 1.6858412119831417e-07, "loss": 0.3854, "step": 17440 }, { "epoch": 4.92, "learning_rate": 1.628887116983711e-07, "loss": 0.2101, "step": 17450 }, { "epoch": 4.92, "learning_rate": 1.571933021984281e-07, "loss": 0.3551, "step": 17460 }, { "epoch": 4.93, "learning_rate": 1.5149789269848502e-07, "loss": 0.3388, "step": 17470 }, { "epoch": 4.93, "learning_rate": 1.4580248319854198e-07, "loss": 0.478, "step": 17480 }, { "epoch": 4.93, "learning_rate": 1.4010707369859895e-07, "loss": 0.1387, "step": 17490 }, { "epoch": 4.93, "learning_rate": 1.3441166419865588e-07, "loss": 0.5615, "step": 17500 }, { "epoch": 4.93, "eval_loss": 0.35653403401374817, "eval_runtime": 212.1272, "eval_samples_per_second": 9.527, "eval_steps_per_second": 2.385, "step": 17500 } ], "logging_steps": 10, "max_steps": 17735, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "total_flos": 666056317861888.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }