{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0722692253154986, "eval_steps": 5290, "global_step": 65000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "eval_loss": 1.797255277633667, "eval_runtime": 160.9316, "eval_samples_per_second": 5.773, "eval_steps_per_second": 5.773, "step": 1 }, { "epoch": 0.0, "learning_rate": 2.9999916262476826e-06, "loss": 2.339, "step": 100 }, { "epoch": 0.01, "learning_rate": 2.9999626800634057e-06, "loss": 2.1788, "step": 200 }, { "epoch": 0.01, "learning_rate": 2.9999130584664085e-06, "loss": 2.1946, "step": 300 }, { "epoch": 0.02, "learning_rate": 2.9998427621406735e-06, "loss": 2.2431, "step": 400 }, { "epoch": 0.02, "learning_rate": 2.9997517920551614e-06, "loss": 2.1155, "step": 500 }, { "epoch": 0.03, "learning_rate": 2.9996401494637996e-06, "loss": 2.0998, "step": 600 }, { "epoch": 0.03, "learning_rate": 2.9995078359054642e-06, "loss": 2.0592, "step": 700 }, { "epoch": 0.04, "learning_rate": 2.999354853203959e-06, "loss": 2.0821, "step": 800 }, { "epoch": 0.04, "learning_rate": 2.9991812034679892e-06, "loss": 1.8844, "step": 900 }, { "epoch": 0.05, "learning_rate": 2.9989868890911354e-06, "loss": 2.1784, "step": 1000 }, { "epoch": 0.05, "learning_rate": 2.9987719127518173e-06, "loss": 2.0341, "step": 1100 }, { "epoch": 0.06, "learning_rate": 2.9985362774132576e-06, "loss": 2.1155, "step": 1200 }, { "epoch": 0.06, "learning_rate": 2.9982799863234435e-06, "loss": 2.0074, "step": 1300 }, { "epoch": 0.07, "learning_rate": 2.998003043015078e-06, "loss": 2.0324, "step": 1400 }, { "epoch": 0.07, "learning_rate": 2.9977054513055346e-06, "loss": 1.9387, "step": 1500 }, { "epoch": 0.08, "learning_rate": 2.997387215296803e-06, "loss": 2.0548, "step": 1600 }, { "epoch": 0.08, "learning_rate": 2.997048339375433e-06, "loss": 2.0709, "step": 1700 }, { "epoch": 0.09, "learning_rate": 2.9966888282124733e-06, "loss": 2.009, "step": 1800 }, { "epoch": 0.09, "learning_rate": 2.9963086867634087e-06, "loss": 1.9616, "step": 1900 }, { "epoch": 0.09, "learning_rate": 2.9959079202680905e-06, "loss": 1.983, "step": 2000 }, { "epoch": 0.1, "learning_rate": 2.9954865342506646e-06, "loss": 2.0902, "step": 2100 }, { "epoch": 0.1, "learning_rate": 2.9950445345194956e-06, "loss": 2.1337, "step": 2200 }, { "epoch": 0.11, "learning_rate": 2.994581927167085e-06, "loss": 1.9246, "step": 2300 }, { "epoch": 0.11, "learning_rate": 2.994098718569992e-06, "loss": 2.0217, "step": 2400 }, { "epoch": 0.12, "learning_rate": 2.9935949153887393e-06, "loss": 2.0509, "step": 2500 }, { "epoch": 0.12, "learning_rate": 2.993070524567726e-06, "loss": 2.013, "step": 2600 }, { "epoch": 0.13, "learning_rate": 2.992525553335129e-06, "loss": 1.8444, "step": 2700 }, { "epoch": 0.13, "learning_rate": 2.991960009202806e-06, "loss": 1.9667, "step": 2800 }, { "epoch": 0.14, "learning_rate": 2.9913738999661895e-06, "loss": 1.9942, "step": 2900 }, { "epoch": 0.14, "learning_rate": 2.990767233704181e-06, "loss": 1.975, "step": 3000 }, { "epoch": 0.15, "learning_rate": 2.9901400187790383e-06, "loss": 2.015, "step": 3100 }, { "epoch": 0.15, "learning_rate": 2.989492263836262e-06, "loss": 2.122, "step": 3200 }, { "epoch": 0.16, "learning_rate": 2.9888239778044748e-06, "loss": 1.8877, "step": 3300 }, { "epoch": 0.16, "learning_rate": 2.988135169895298e-06, "loss": 2.0659, "step": 3400 }, { "epoch": 0.17, "learning_rate": 2.9874258496032273e-06, "loss": 1.8897, "step": 3500 }, { "epoch": 0.17, "learning_rate": 2.9866960267054987e-06, "loss": 1.9466, "step": 3600 }, { "epoch": 0.17, "learning_rate": 2.985945711261956e-06, "loss": 1.9438, "step": 3700 }, { "epoch": 0.18, "learning_rate": 2.9851749136149105e-06, "loss": 2.0251, "step": 3800 }, { "epoch": 0.18, "learning_rate": 2.984383644388999e-06, "loss": 2.0244, "step": 3900 }, { "epoch": 0.19, "learning_rate": 2.9835719144910395e-06, "loss": 1.9022, "step": 4000 }, { "epoch": 0.19, "learning_rate": 2.982739735109876e-06, "loss": 2.0163, "step": 4100 }, { "epoch": 0.2, "learning_rate": 2.98188711771623e-06, "loss": 1.9168, "step": 4200 }, { "epoch": 0.2, "learning_rate": 2.9810140740625364e-06, "loss": 1.9695, "step": 4300 }, { "epoch": 0.21, "learning_rate": 2.9801206161827883e-06, "loss": 1.9114, "step": 4400 }, { "epoch": 0.21, "learning_rate": 2.9792067563923653e-06, "loss": 2.0469, "step": 4500 }, { "epoch": 0.22, "learning_rate": 2.9782725072878657e-06, "loss": 1.8072, "step": 4600 }, { "epoch": 0.22, "learning_rate": 2.9773178817469342e-06, "loss": 1.8899, "step": 4700 }, { "epoch": 0.23, "learning_rate": 2.976342892928083e-06, "loss": 1.9418, "step": 4800 }, { "epoch": 0.23, "learning_rate": 2.9753475542705106e-06, "loss": 2.1559, "step": 4900 }, { "epoch": 0.24, "learning_rate": 2.974331879493916e-06, "loss": 2.001, "step": 5000 }, { "epoch": 0.24, "learning_rate": 2.973295882598313e-06, "loss": 2.051, "step": 5100 }, { "epoch": 0.25, "learning_rate": 2.9722395778638296e-06, "loss": 1.9767, "step": 5200 }, { "epoch": 0.25, "eval_loss": 1.4832066297531128, "eval_runtime": 162.8547, "eval_samples_per_second": 5.704, "eval_steps_per_second": 5.704, "step": 5290 }, { "epoch": 0.25, "learning_rate": 2.971162979850521e-06, "loss": 1.8538, "step": 5300 }, { "epoch": 0.26, "learning_rate": 2.9700661033981615e-06, "loss": 1.7968, "step": 5400 }, { "epoch": 0.26, "learning_rate": 2.9689489636260424e-06, "loss": 1.7703, "step": 5500 }, { "epoch": 0.26, "learning_rate": 2.967811575932764e-06, "loss": 1.9824, "step": 5600 }, { "epoch": 0.27, "learning_rate": 2.9666539559960238e-06, "loss": 1.9332, "step": 5700 }, { "epoch": 0.27, "learning_rate": 2.965476119772398e-06, "loss": 1.8362, "step": 5800 }, { "epoch": 0.28, "learning_rate": 2.964278083497125e-06, "loss": 1.7958, "step": 5900 }, { "epoch": 0.28, "learning_rate": 2.963059863683877e-06, "loss": 1.7677, "step": 6000 }, { "epoch": 0.29, "learning_rate": 2.9618214771245376e-06, "loss": 2.0132, "step": 6100 }, { "epoch": 0.29, "learning_rate": 2.9605629408889673e-06, "loss": 1.8406, "step": 6200 }, { "epoch": 0.3, "learning_rate": 2.9592842723247676e-06, "loss": 2.0235, "step": 6300 }, { "epoch": 0.3, "learning_rate": 2.9579854890570448e-06, "loss": 1.9383, "step": 6400 }, { "epoch": 0.31, "learning_rate": 2.956666608988164e-06, "loss": 1.8556, "step": 6500 }, { "epoch": 0.31, "learning_rate": 2.9553276502975034e-06, "loss": 1.8689, "step": 6600 }, { "epoch": 0.32, "learning_rate": 2.9539686314412053e-06, "loss": 2.0381, "step": 6700 }, { "epoch": 0.32, "learning_rate": 2.9525895711519195e-06, "loss": 2.0205, "step": 6800 }, { "epoch": 0.33, "learning_rate": 2.951190488438546e-06, "loss": 1.8647, "step": 6900 }, { "epoch": 0.33, "learning_rate": 2.9497714025859727e-06, "loss": 1.8074, "step": 7000 }, { "epoch": 0.34, "learning_rate": 2.94833233315481e-06, "loss": 1.9039, "step": 7100 }, { "epoch": 0.34, "learning_rate": 2.9468732999811216e-06, "loss": 2.0103, "step": 7200 }, { "epoch": 0.35, "learning_rate": 2.94539432317615e-06, "loss": 1.9635, "step": 7300 }, { "epoch": 0.35, "learning_rate": 2.943895423126038e-06, "loss": 1.8708, "step": 7400 }, { "epoch": 0.35, "learning_rate": 2.942376620491553e-06, "loss": 1.7572, "step": 7500 }, { "epoch": 0.36, "learning_rate": 2.940837936207796e-06, "loss": 1.9795, "step": 7600 }, { "epoch": 0.36, "learning_rate": 2.9392793914839165e-06, "loss": 2.0192, "step": 7700 }, { "epoch": 0.37, "learning_rate": 2.937701007802819e-06, "loss": 1.8849, "step": 7800 }, { "epoch": 0.37, "learning_rate": 2.9361028069208675e-06, "loss": 1.9925, "step": 7900 }, { "epoch": 0.38, "learning_rate": 2.934484810867586e-06, "loss": 2.004, "step": 8000 }, { "epoch": 0.38, "learning_rate": 2.9328470419453527e-06, "loss": 1.9084, "step": 8100 }, { "epoch": 0.39, "learning_rate": 2.9311895227290954e-06, "loss": 1.8507, "step": 8200 }, { "epoch": 0.39, "learning_rate": 2.929512276065978e-06, "loss": 1.8185, "step": 8300 }, { "epoch": 0.4, "learning_rate": 2.9278153250750875e-06, "loss": 1.7862, "step": 8400 }, { "epoch": 0.4, "learning_rate": 2.9260986931471136e-06, "loss": 1.8444, "step": 8500 }, { "epoch": 0.41, "learning_rate": 2.924362403944027e-06, "loss": 2.0304, "step": 8600 }, { "epoch": 0.41, "learning_rate": 2.922606481398755e-06, "loss": 1.9337, "step": 8700 }, { "epoch": 0.42, "learning_rate": 2.920830949714848e-06, "loss": 1.9937, "step": 8800 }, { "epoch": 0.42, "learning_rate": 2.919035833366148e-06, "loss": 1.9554, "step": 8900 }, { "epoch": 0.43, "learning_rate": 2.917221157096452e-06, "loss": 1.9068, "step": 9000 }, { "epoch": 0.43, "learning_rate": 2.9153869459191693e-06, "loss": 1.9063, "step": 9100 }, { "epoch": 0.43, "learning_rate": 2.913533225116978e-06, "loss": 1.9342, "step": 9200 }, { "epoch": 0.44, "learning_rate": 2.9116600202414754e-06, "loss": 2.0052, "step": 9300 }, { "epoch": 0.44, "learning_rate": 2.9097673571128266e-06, "loss": 1.8102, "step": 9400 }, { "epoch": 0.45, "learning_rate": 2.9078552618194086e-06, "loss": 1.9959, "step": 9500 }, { "epoch": 0.45, "learning_rate": 2.9059237607174494e-06, "loss": 1.9136, "step": 9600 }, { "epoch": 0.46, "learning_rate": 2.9039728804306666e-06, "loss": 1.9124, "step": 9700 }, { "epoch": 0.46, "learning_rate": 2.9020026478498988e-06, "loss": 1.9215, "step": 9800 }, { "epoch": 0.47, "learning_rate": 2.9000130901327377e-06, "loss": 1.93, "step": 9900 }, { "epoch": 0.47, "learning_rate": 2.8980042347031482e-06, "loss": 1.82, "step": 10000 }, { "epoch": 0.48, "learning_rate": 2.8959761092510978e-06, "loss": 1.8436, "step": 10100 }, { "epoch": 0.48, "learning_rate": 2.8939287417321676e-06, "loss": 1.8995, "step": 10200 }, { "epoch": 0.49, "learning_rate": 2.8918621603671737e-06, "loss": 1.9337, "step": 10300 }, { "epoch": 0.49, "learning_rate": 2.8897763936417715e-06, "loss": 1.9088, "step": 10400 }, { "epoch": 0.5, "learning_rate": 2.88767147030607e-06, "loss": 1.8474, "step": 10500 }, { "epoch": 0.5, "eval_loss": 1.4355759620666504, "eval_runtime": 163.1072, "eval_samples_per_second": 5.696, "eval_steps_per_second": 5.696, "step": 10580 }, { "epoch": 0.5, "learning_rate": 2.885547419374229e-06, "loss": 1.9638, "step": 10600 }, { "epoch": 0.51, "learning_rate": 2.883404270124063e-06, "loss": 1.9945, "step": 10700 }, { "epoch": 0.51, "learning_rate": 2.881242052096638e-06, "loss": 1.8143, "step": 10800 }, { "epoch": 0.52, "learning_rate": 2.879060795095863e-06, "loss": 1.7915, "step": 10900 }, { "epoch": 0.52, "learning_rate": 2.8768605291880767e-06, "loss": 1.8868, "step": 11000 }, { "epoch": 0.52, "learning_rate": 2.8746412847016387e-06, "loss": 1.8033, "step": 11100 }, { "epoch": 0.53, "learning_rate": 2.8724030922265068e-06, "loss": 2.0053, "step": 11200 }, { "epoch": 0.53, "learning_rate": 2.870145982613818e-06, "loss": 1.867, "step": 11300 }, { "epoch": 0.54, "learning_rate": 2.867869986975461e-06, "loss": 1.8002, "step": 11400 }, { "epoch": 0.54, "learning_rate": 2.865575136683649e-06, "loss": 1.8835, "step": 11500 }, { "epoch": 0.55, "learning_rate": 2.863261463370487e-06, "loss": 1.7312, "step": 11600 }, { "epoch": 0.55, "learning_rate": 2.8609289989275353e-06, "loss": 1.8402, "step": 11700 }, { "epoch": 0.56, "learning_rate": 2.858577775505371e-06, "loss": 1.9007, "step": 11800 }, { "epoch": 0.56, "learning_rate": 2.856207825513144e-06, "loss": 1.8235, "step": 11900 }, { "epoch": 0.57, "learning_rate": 2.853819181618129e-06, "loss": 1.8568, "step": 12000 }, { "epoch": 0.57, "learning_rate": 2.851411876745278e-06, "loss": 1.9159, "step": 12100 }, { "epoch": 0.58, "learning_rate": 2.848985944076763e-06, "loss": 1.9857, "step": 12200 }, { "epoch": 0.58, "learning_rate": 2.846541417051524e-06, "loss": 1.8676, "step": 12300 }, { "epoch": 0.59, "learning_rate": 2.8440783293648015e-06, "loss": 1.8022, "step": 12400 }, { "epoch": 0.59, "learning_rate": 2.8415967149676773e-06, "loss": 1.8365, "step": 12500 }, { "epoch": 0.6, "learning_rate": 2.8390966080666035e-06, "loss": 1.8702, "step": 12600 }, { "epoch": 0.6, "learning_rate": 2.8365780431229317e-06, "loss": 1.8221, "step": 12700 }, { "epoch": 0.61, "learning_rate": 2.8340410548524395e-06, "loss": 1.8498, "step": 12800 }, { "epoch": 0.61, "learning_rate": 2.8314856782248494e-06, "loss": 1.8906, "step": 12900 }, { "epoch": 0.61, "learning_rate": 2.8289119484633485e-06, "loss": 2.0184, "step": 13000 }, { "epoch": 0.62, "learning_rate": 2.8263199010441038e-06, "loss": 1.8205, "step": 13100 }, { "epoch": 0.62, "learning_rate": 2.82370957169577e-06, "loss": 1.9686, "step": 13200 }, { "epoch": 0.63, "learning_rate": 2.8210809963990004e-06, "loss": 1.7651, "step": 13300 }, { "epoch": 0.63, "learning_rate": 2.8184342113859494e-06, "loss": 1.8216, "step": 13400 }, { "epoch": 0.64, "learning_rate": 2.815769253139773e-06, "loss": 1.8081, "step": 13500 }, { "epoch": 0.64, "learning_rate": 2.813086158394126e-06, "loss": 1.7233, "step": 13600 }, { "epoch": 0.65, "learning_rate": 2.8103849641326563e-06, "loss": 1.8446, "step": 13700 }, { "epoch": 0.65, "learning_rate": 2.807665707588494e-06, "loss": 1.8379, "step": 13800 }, { "epoch": 0.66, "learning_rate": 2.8049284262437393e-06, "loss": 1.8149, "step": 13900 }, { "epoch": 0.66, "learning_rate": 2.802173157828946e-06, "loss": 1.9463, "step": 14000 }, { "epoch": 0.67, "learning_rate": 2.799399940322599e-06, "loss": 1.8382, "step": 14100 }, { "epoch": 0.67, "learning_rate": 2.7966088119505945e-06, "loss": 1.8039, "step": 14200 }, { "epoch": 0.68, "learning_rate": 2.79379981118571e-06, "loss": 2.0244, "step": 14300 }, { "epoch": 0.68, "learning_rate": 2.7909729767470757e-06, "loss": 1.8587, "step": 14400 }, { "epoch": 0.69, "learning_rate": 2.7881283475996405e-06, "loss": 1.8551, "step": 14500 }, { "epoch": 0.69, "learning_rate": 2.7852659629536335e-06, "loss": 1.9153, "step": 14600 }, { "epoch": 0.69, "learning_rate": 2.782385862264027e-06, "loss": 1.7548, "step": 14700 }, { "epoch": 0.7, "learning_rate": 2.779488085229987e-06, "loss": 1.8052, "step": 14800 }, { "epoch": 0.7, "learning_rate": 2.7765726717943334e-06, "loss": 1.7594, "step": 14900 }, { "epoch": 0.71, "learning_rate": 2.773639662142983e-06, "loss": 1.8186, "step": 15000 }, { "epoch": 0.71, "learning_rate": 2.770689096704397e-06, "loss": 1.9036, "step": 15100 }, { "epoch": 0.72, "learning_rate": 2.7677210161490276e-06, "loss": 1.8217, "step": 15200 }, { "epoch": 0.72, "learning_rate": 2.7647354613887523e-06, "loss": 1.8397, "step": 15300 }, { "epoch": 0.73, "learning_rate": 2.761732473576313e-06, "loss": 1.7251, "step": 15400 }, { "epoch": 0.73, "learning_rate": 2.7587120941047475e-06, "loss": 1.8731, "step": 15500 }, { "epoch": 0.74, "learning_rate": 2.7556743646068202e-06, "loss": 1.805, "step": 15600 }, { "epoch": 0.74, "learning_rate": 2.752619326954447e-06, "loss": 1.8677, "step": 15700 }, { "epoch": 0.75, "learning_rate": 2.749547023258118e-06, "loss": 1.8121, "step": 15800 }, { "epoch": 0.75, "eval_loss": 1.4021737575531006, "eval_runtime": 163.1438, "eval_samples_per_second": 5.694, "eval_steps_per_second": 5.694, "step": 15870 }, { "epoch": 0.75, "learning_rate": 2.7464574958663186e-06, "loss": 1.8015, "step": 15900 }, { "epoch": 0.76, "learning_rate": 2.743350787364944e-06, "loss": 1.7014, "step": 16000 }, { "epoch": 0.76, "learning_rate": 2.7402269405767133e-06, "loss": 1.7616, "step": 16100 }, { "epoch": 0.77, "learning_rate": 2.7370859985605794e-06, "loss": 1.7529, "step": 16200 }, { "epoch": 0.77, "learning_rate": 2.7339280046111336e-06, "loss": 1.7992, "step": 16300 }, { "epoch": 0.78, "learning_rate": 2.7307530022580115e-06, "loss": 1.5267, "step": 16400 }, { "epoch": 0.78, "learning_rate": 2.7275610352652913e-06, "loss": 1.6973, "step": 16500 }, { "epoch": 0.78, "learning_rate": 2.7243521476308908e-06, "loss": 1.813, "step": 16600 }, { "epoch": 0.79, "learning_rate": 2.721126383585962e-06, "loss": 1.842, "step": 16700 }, { "epoch": 0.79, "learning_rate": 2.7178837875942787e-06, "loss": 1.9349, "step": 16800 }, { "epoch": 0.8, "learning_rate": 2.7146244043516273e-06, "loss": 1.7218, "step": 16900 }, { "epoch": 0.8, "learning_rate": 2.7113482787851883e-06, "loss": 1.8096, "step": 17000 }, { "epoch": 0.81, "learning_rate": 2.7080554560529164e-06, "loss": 1.7827, "step": 17100 }, { "epoch": 0.81, "learning_rate": 2.7047459815429214e-06, "loss": 1.7434, "step": 17200 }, { "epoch": 0.82, "learning_rate": 2.7014199008728377e-06, "loss": 1.8203, "step": 17300 }, { "epoch": 0.82, "learning_rate": 2.698077259889201e-06, "loss": 1.7201, "step": 17400 }, { "epoch": 0.83, "learning_rate": 2.6947181046668113e-06, "loss": 1.8474, "step": 17500 }, { "epoch": 0.83, "learning_rate": 2.691342481508102e-06, "loss": 1.7868, "step": 17600 }, { "epoch": 0.84, "learning_rate": 2.6879504369424983e-06, "loss": 1.7272, "step": 17700 }, { "epoch": 0.84, "learning_rate": 2.6845420177257774e-06, "loss": 1.8764, "step": 17800 }, { "epoch": 0.85, "learning_rate": 2.6811172708394243e-06, "loss": 1.5964, "step": 17900 }, { "epoch": 0.85, "learning_rate": 2.6776762434899845e-06, "loss": 1.7725, "step": 18000 }, { "epoch": 0.86, "learning_rate": 2.6742189831084106e-06, "loss": 1.8118, "step": 18100 }, { "epoch": 0.86, "learning_rate": 2.6707455373494125e-06, "loss": 1.6714, "step": 18200 }, { "epoch": 0.86, "learning_rate": 2.667255954090798e-06, "loss": 1.7673, "step": 18300 }, { "epoch": 0.87, "learning_rate": 2.6637502814328124e-06, "loss": 1.8517, "step": 18400 }, { "epoch": 0.87, "learning_rate": 2.6602285676974786e-06, "loss": 1.7459, "step": 18500 }, { "epoch": 0.88, "learning_rate": 2.6566908614279262e-06, "loss": 1.8677, "step": 18600 }, { "epoch": 0.88, "learning_rate": 2.6531372113877273e-06, "loss": 1.8378, "step": 18700 }, { "epoch": 0.89, "learning_rate": 2.649567666560222e-06, "loss": 1.7712, "step": 18800 }, { "epoch": 0.89, "learning_rate": 2.645982276147842e-06, "loss": 1.7846, "step": 18900 }, { "epoch": 0.9, "learning_rate": 2.6423810895714345e-06, "loss": 1.7452, "step": 19000 }, { "epoch": 0.9, "learning_rate": 2.6387641564695807e-06, "loss": 1.8064, "step": 19100 }, { "epoch": 0.91, "learning_rate": 2.635131526697911e-06, "loss": 1.6403, "step": 19200 }, { "epoch": 0.91, "learning_rate": 2.631483250328417e-06, "loss": 1.7232, "step": 19300 }, { "epoch": 0.92, "learning_rate": 2.627819377648764e-06, "loss": 1.8836, "step": 19400 }, { "epoch": 0.92, "learning_rate": 2.6241399591615938e-06, "loss": 1.8373, "step": 19500 }, { "epoch": 0.93, "learning_rate": 2.620445045583833e-06, "loss": 1.7807, "step": 19600 }, { "epoch": 0.93, "learning_rate": 2.6167346878459907e-06, "loss": 1.8299, "step": 19700 }, { "epoch": 0.94, "learning_rate": 2.6130089370914575e-06, "loss": 1.8572, "step": 19800 }, { "epoch": 0.94, "learning_rate": 2.609267844675801e-06, "loss": 1.651, "step": 19900 }, { "epoch": 0.95, "learning_rate": 2.605511462166057e-06, "loss": 1.8989, "step": 20000 }, { "epoch": 0.95, "learning_rate": 2.6017398413400198e-06, "loss": 1.8421, "step": 20100 }, { "epoch": 0.95, "learning_rate": 2.597953034185528e-06, "loss": 1.8114, "step": 20200 }, { "epoch": 0.96, "learning_rate": 2.5941510928997473e-06, "loss": 1.8759, "step": 20300 }, { "epoch": 0.96, "learning_rate": 2.590334069888451e-06, "loss": 1.8544, "step": 20400 }, { "epoch": 0.97, "learning_rate": 2.5865020177652995e-06, "loss": 1.717, "step": 20500 }, { "epoch": 0.97, "learning_rate": 2.5826549893511133e-06, "loss": 1.7786, "step": 20600 }, { "epoch": 0.98, "learning_rate": 2.578793037673145e-06, "loss": 1.6818, "step": 20700 }, { "epoch": 0.98, "learning_rate": 2.574916215964348e-06, "loss": 1.6679, "step": 20800 }, { "epoch": 0.99, "learning_rate": 2.5710245776626463e-06, "loss": 1.8773, "step": 20900 }, { "epoch": 0.99, "learning_rate": 2.5671181764101916e-06, "loss": 1.6672, "step": 21000 }, { "epoch": 1.0, "learning_rate": 2.56319706605263e-06, "loss": 1.8333, "step": 21100 }, { "epoch": 1.0, "eval_loss": 1.367815613746643, "eval_runtime": 161.7042, "eval_samples_per_second": 5.745, "eval_steps_per_second": 5.745, "step": 21160 }, { "epoch": 1.0, "learning_rate": 2.5592613006383554e-06, "loss": 1.873, "step": 21200 }, { "epoch": 1.01, "learning_rate": 2.5553109344177676e-06, "loss": 1.7398, "step": 21300 }, { "epoch": 1.01, "learning_rate": 2.5513460218425225e-06, "loss": 1.8562, "step": 21400 }, { "epoch": 1.02, "learning_rate": 2.5473666175647824e-06, "loss": 1.8687, "step": 21500 }, { "epoch": 1.02, "learning_rate": 2.543372776436463e-06, "loss": 1.8159, "step": 21600 }, { "epoch": 1.03, "learning_rate": 2.539364553508476e-06, "loss": 1.7736, "step": 21700 }, { "epoch": 1.03, "learning_rate": 2.5353420040299714e-06, "loss": 1.8746, "step": 21800 }, { "epoch": 1.04, "learning_rate": 2.531305183447576e-06, "loss": 1.7582, "step": 21900 }, { "epoch": 1.04, "learning_rate": 2.527254147404629e-06, "loss": 1.9113, "step": 22000 }, { "epoch": 1.04, "learning_rate": 2.5231889517404136e-06, "loss": 1.8019, "step": 22100 }, { "epoch": 1.05, "learning_rate": 2.5191096524893894e-06, "loss": 1.8494, "step": 22200 }, { "epoch": 1.05, "learning_rate": 2.5150163058804203e-06, "loss": 1.698, "step": 22300 }, { "epoch": 1.06, "learning_rate": 2.5109089683359967e-06, "loss": 1.7218, "step": 22400 }, { "epoch": 1.06, "learning_rate": 2.5067876964714582e-06, "loss": 1.7944, "step": 22500 }, { "epoch": 1.07, "learning_rate": 2.502652547094218e-06, "loss": 1.8057, "step": 22600 }, { "epoch": 1.07, "learning_rate": 2.4985035772029737e-06, "loss": 1.677, "step": 22700 }, { "epoch": 1.08, "learning_rate": 2.4943408439869243e-06, "loss": 1.8319, "step": 22800 }, { "epoch": 1.08, "learning_rate": 2.490164404824983e-06, "loss": 1.742, "step": 22900 }, { "epoch": 1.09, "learning_rate": 2.485974317284983e-06, "loss": 1.7521, "step": 23000 }, { "epoch": 1.09, "learning_rate": 2.4817706391228884e-06, "loss": 1.8927, "step": 23100 }, { "epoch": 1.1, "learning_rate": 2.4775534282819945e-06, "loss": 1.6825, "step": 23200 }, { "epoch": 1.1, "learning_rate": 2.473322742892131e-06, "loss": 1.7289, "step": 23300 }, { "epoch": 1.11, "learning_rate": 2.4690786412688594e-06, "loss": 1.8572, "step": 23400 }, { "epoch": 1.11, "learning_rate": 2.4648211819126706e-06, "loss": 1.7959, "step": 23500 }, { "epoch": 1.12, "learning_rate": 2.460550423508178e-06, "loss": 1.765, "step": 23600 }, { "epoch": 1.12, "learning_rate": 2.4562664249233064e-06, "loss": 1.7334, "step": 23700 }, { "epoch": 1.12, "learning_rate": 2.451969245208486e-06, "loss": 1.6651, "step": 23800 }, { "epoch": 1.13, "learning_rate": 2.4476589435958323e-06, "loss": 1.7472, "step": 23900 }, { "epoch": 1.13, "learning_rate": 2.4433355794983336e-06, "loss": 1.8278, "step": 24000 }, { "epoch": 1.14, "learning_rate": 2.43899921250903e-06, "loss": 1.6537, "step": 24100 }, { "epoch": 1.14, "learning_rate": 2.4346499024001946e-06, "loss": 1.6281, "step": 24200 }, { "epoch": 1.15, "learning_rate": 2.430287709122506e-06, "loss": 1.8405, "step": 24300 }, { "epoch": 1.15, "learning_rate": 2.425912692804224e-06, "loss": 1.7661, "step": 24400 }, { "epoch": 1.16, "learning_rate": 2.4215249137503624e-06, "loss": 1.7644, "step": 24500 }, { "epoch": 1.16, "learning_rate": 2.417124432441853e-06, "loss": 1.6826, "step": 24600 }, { "epoch": 1.17, "learning_rate": 2.412711309534717e-06, "loss": 1.7262, "step": 24700 }, { "epoch": 1.17, "learning_rate": 2.4082856058592265e-06, "loss": 1.8845, "step": 24800 }, { "epoch": 1.18, "learning_rate": 2.4038473824190656e-06, "loss": 1.922, "step": 24900 }, { "epoch": 1.18, "learning_rate": 2.399396700390491e-06, "loss": 1.703, "step": 25000 }, { "epoch": 1.19, "learning_rate": 2.394933621121487e-06, "loss": 1.83, "step": 25100 }, { "epoch": 1.19, "learning_rate": 2.3904582061309217e-06, "loss": 1.6753, "step": 25200 }, { "epoch": 1.2, "learning_rate": 2.3859705171076983e-06, "loss": 1.8203, "step": 25300 }, { "epoch": 1.2, "learning_rate": 2.3814706159099038e-06, "loss": 1.7362, "step": 25400 }, { "epoch": 1.21, "learning_rate": 2.376958564563958e-06, "loss": 1.8836, "step": 25500 }, { "epoch": 1.21, "learning_rate": 2.372434425263757e-06, "loss": 1.7072, "step": 25600 }, { "epoch": 1.21, "learning_rate": 2.367898260369818e-06, "loss": 1.6916, "step": 25700 }, { "epoch": 1.22, "learning_rate": 2.3633501324084165e-06, "loss": 1.6549, "step": 25800 }, { "epoch": 1.22, "learning_rate": 2.358790104070728e-06, "loss": 1.7526, "step": 25900 }, { "epoch": 1.23, "learning_rate": 2.354218238211962e-06, "loss": 1.7785, "step": 26000 }, { "epoch": 1.23, "learning_rate": 2.349634597850495e-06, "loss": 1.7332, "step": 26100 }, { "epoch": 1.24, "learning_rate": 2.3450392461670026e-06, "loss": 1.7434, "step": 26200 }, { "epoch": 1.24, "learning_rate": 2.3404322465035903e-06, "loss": 1.8742, "step": 26300 }, { "epoch": 1.25, "learning_rate": 2.3358136623629167e-06, "loss": 1.6601, "step": 26400 }, { "epoch": 1.25, "eval_loss": 1.3507641553878784, "eval_runtime": 162.7404, "eval_samples_per_second": 5.708, "eval_steps_per_second": 5.708, "step": 26450 }, { "epoch": 1.25, "learning_rate": 2.331183557407322e-06, "loss": 1.7639, "step": 26500 }, { "epoch": 1.26, "learning_rate": 2.3265419954579467e-06, "loss": 1.849, "step": 26600 }, { "epoch": 1.26, "learning_rate": 2.321889040493856e-06, "loss": 1.9006, "step": 26700 }, { "epoch": 1.27, "learning_rate": 2.317224756651156e-06, "loss": 1.6524, "step": 26800 }, { "epoch": 1.27, "learning_rate": 2.3125492082221074e-06, "loss": 1.8237, "step": 26900 }, { "epoch": 1.28, "learning_rate": 2.307862459654243e-06, "loss": 1.7348, "step": 27000 }, { "epoch": 1.28, "learning_rate": 2.303164575549478e-06, "loss": 1.6887, "step": 27100 }, { "epoch": 1.29, "learning_rate": 2.298455620663217e-06, "loss": 1.7558, "step": 27200 }, { "epoch": 1.29, "learning_rate": 2.293735659903468e-06, "loss": 1.8181, "step": 27300 }, { "epoch": 1.3, "learning_rate": 2.2890047583299385e-06, "loss": 1.7344, "step": 27400 }, { "epoch": 1.3, "learning_rate": 2.284262981153147e-06, "loss": 1.8456, "step": 27500 }, { "epoch": 1.3, "learning_rate": 2.27951039373352e-06, "loss": 1.711, "step": 27600 }, { "epoch": 1.31, "learning_rate": 2.2747470615804907e-06, "loss": 1.7673, "step": 27700 }, { "epoch": 1.31, "learning_rate": 2.269973050351599e-06, "loss": 1.7957, "step": 27800 }, { "epoch": 1.32, "learning_rate": 2.265188425851583e-06, "loss": 1.6838, "step": 27900 }, { "epoch": 1.32, "learning_rate": 2.260393254031475e-06, "loss": 1.6342, "step": 28000 }, { "epoch": 1.33, "learning_rate": 2.2555876009876904e-06, "loss": 1.8296, "step": 28100 }, { "epoch": 1.33, "learning_rate": 2.250771532961118e-06, "loss": 1.7831, "step": 28200 }, { "epoch": 1.34, "learning_rate": 2.2459451163362036e-06, "loss": 1.7551, "step": 28300 }, { "epoch": 1.34, "learning_rate": 2.241108417640041e-06, "loss": 1.708, "step": 28400 }, { "epoch": 1.35, "learning_rate": 2.2362615035414496e-06, "loss": 1.7695, "step": 28500 }, { "epoch": 1.35, "learning_rate": 2.231404440850058e-06, "loss": 1.6231, "step": 28600 }, { "epoch": 1.36, "learning_rate": 2.2265372965153827e-06, "loss": 1.7269, "step": 28700 }, { "epoch": 1.36, "learning_rate": 2.2216601376259044e-06, "loss": 1.6641, "step": 28800 }, { "epoch": 1.37, "learning_rate": 2.2167730314081447e-06, "loss": 1.7724, "step": 28900 }, { "epoch": 1.37, "learning_rate": 2.211876045225738e-06, "loss": 1.909, "step": 29000 }, { "epoch": 1.38, "learning_rate": 2.2069692465785034e-06, "loss": 1.7163, "step": 29100 }, { "epoch": 1.38, "learning_rate": 2.202052703101516e-06, "loss": 1.857, "step": 29200 }, { "epoch": 1.38, "learning_rate": 2.1971264825641716e-06, "loss": 1.6806, "step": 29300 }, { "epoch": 1.39, "learning_rate": 2.1921906528692556e-06, "loss": 1.7828, "step": 29400 }, { "epoch": 1.39, "learning_rate": 2.187245282052004e-06, "loss": 1.7669, "step": 29500 }, { "epoch": 1.4, "learning_rate": 2.1822904382791686e-06, "loss": 1.7001, "step": 29600 }, { "epoch": 1.4, "learning_rate": 2.1773261898480747e-06, "loss": 1.6504, "step": 29700 }, { "epoch": 1.41, "learning_rate": 2.172352605185682e-06, "loss": 1.6888, "step": 29800 }, { "epoch": 1.41, "learning_rate": 2.167369752847639e-06, "loss": 1.6804, "step": 29900 }, { "epoch": 1.42, "learning_rate": 2.162377701517341e-06, "loss": 1.5615, "step": 30000 }, { "epoch": 1.42, "learning_rate": 2.1573765200049817e-06, "loss": 1.6089, "step": 30100 }, { "epoch": 1.43, "learning_rate": 2.1523662772466025e-06, "loss": 1.7575, "step": 30200 }, { "epoch": 1.43, "learning_rate": 2.1473470423031475e-06, "loss": 1.6443, "step": 30300 }, { "epoch": 1.44, "learning_rate": 2.1423188843595067e-06, "loss": 1.6201, "step": 30400 }, { "epoch": 1.44, "learning_rate": 2.1372818727235653e-06, "loss": 1.7594, "step": 30500 }, { "epoch": 1.45, "learning_rate": 2.132236076825247e-06, "loss": 1.6505, "step": 30600 }, { "epoch": 1.45, "learning_rate": 2.127181566215557e-06, "loss": 1.8139, "step": 30700 }, { "epoch": 1.46, "learning_rate": 2.122118410565624e-06, "loss": 1.738, "step": 30800 }, { "epoch": 1.46, "learning_rate": 2.11704667966574e-06, "loss": 1.693, "step": 30900 }, { "epoch": 1.47, "learning_rate": 2.111966443424397e-06, "loss": 1.8003, "step": 31000 }, { "epoch": 1.47, "learning_rate": 2.1068777718673254e-06, "loss": 1.8407, "step": 31100 }, { "epoch": 1.47, "learning_rate": 2.101780735136526e-06, "loss": 1.5816, "step": 31200 }, { "epoch": 1.48, "learning_rate": 2.0966754034893047e-06, "loss": 1.6609, "step": 31300 }, { "epoch": 1.48, "learning_rate": 2.0915618472973062e-06, "loss": 1.7292, "step": 31400 }, { "epoch": 1.49, "learning_rate": 2.0864401370455406e-06, "loss": 1.7347, "step": 31500 }, { "epoch": 1.49, "learning_rate": 2.081310343331413e-06, "loss": 1.748, "step": 31600 }, { "epoch": 1.5, "learning_rate": 2.0761725368637496e-06, "loss": 1.5452, "step": 31700 }, { "epoch": 1.5, "eval_loss": 1.3357341289520264, "eval_runtime": 162.3538, "eval_samples_per_second": 5.722, "eval_steps_per_second": 5.722, "step": 31740 }, { "epoch": 1.5, "learning_rate": 2.0710267884618273e-06, "loss": 1.6686, "step": 31800 }, { "epoch": 1.51, "learning_rate": 2.0658731690543905e-06, "loss": 1.72, "step": 31900 }, { "epoch": 1.51, "learning_rate": 2.0607117496786794e-06, "loss": 1.7252, "step": 32000 }, { "epoch": 1.52, "learning_rate": 2.0555426014794477e-06, "loss": 1.6562, "step": 32100 }, { "epoch": 1.52, "learning_rate": 2.050365795707983e-06, "loss": 1.6878, "step": 32200 }, { "epoch": 1.53, "learning_rate": 2.0451814037211256e-06, "loss": 1.7308, "step": 32300 }, { "epoch": 1.53, "learning_rate": 2.0399894969802814e-06, "loss": 1.6544, "step": 32400 }, { "epoch": 1.54, "learning_rate": 2.034790147050442e-06, "loss": 1.7115, "step": 32500 }, { "epoch": 1.54, "learning_rate": 2.0295834255991927e-06, "loss": 1.8076, "step": 32600 }, { "epoch": 1.55, "learning_rate": 2.024369404395731e-06, "loss": 1.6923, "step": 32700 }, { "epoch": 1.55, "learning_rate": 2.01914815530987e-06, "loss": 1.8198, "step": 32800 }, { "epoch": 1.56, "learning_rate": 2.013919750311055e-06, "loss": 1.5914, "step": 32900 }, { "epoch": 1.56, "learning_rate": 2.008684261467365e-06, "loss": 1.7334, "step": 33000 }, { "epoch": 1.56, "learning_rate": 2.003441760944525e-06, "loss": 1.6914, "step": 33100 }, { "epoch": 1.57, "learning_rate": 1.998192321004908e-06, "loss": 1.5967, "step": 33200 }, { "epoch": 1.57, "learning_rate": 1.992936014006538e-06, "loss": 1.6271, "step": 33300 }, { "epoch": 1.58, "learning_rate": 1.9876729124020963e-06, "loss": 1.5439, "step": 33400 }, { "epoch": 1.58, "learning_rate": 1.982403088737918e-06, "loss": 1.5242, "step": 33500 }, { "epoch": 1.59, "learning_rate": 1.977126615652999e-06, "loss": 1.7863, "step": 33600 }, { "epoch": 1.59, "learning_rate": 1.9718435658779864e-06, "loss": 1.7852, "step": 33700 }, { "epoch": 1.6, "learning_rate": 1.9665540122341817e-06, "loss": 1.7474, "step": 33800 }, { "epoch": 1.6, "learning_rate": 1.9612580276325363e-06, "loss": 1.818, "step": 33900 }, { "epoch": 1.61, "learning_rate": 1.9559556850726433e-06, "loss": 1.8187, "step": 34000 }, { "epoch": 1.61, "learning_rate": 1.9506470576417362e-06, "loss": 1.6308, "step": 34100 }, { "epoch": 1.62, "learning_rate": 1.9453322185136772e-06, "loss": 1.5877, "step": 34200 }, { "epoch": 1.62, "learning_rate": 1.9400112409479507e-06, "loss": 1.5775, "step": 34300 }, { "epoch": 1.63, "learning_rate": 1.9346841982886527e-06, "loss": 1.6369, "step": 34400 }, { "epoch": 1.63, "learning_rate": 1.929351163963481e-06, "loss": 1.7436, "step": 34500 }, { "epoch": 1.64, "learning_rate": 1.924012211482721e-06, "loss": 1.7817, "step": 34600 }, { "epoch": 1.64, "learning_rate": 1.918667414438235e-06, "loss": 1.7958, "step": 34700 }, { "epoch": 1.64, "learning_rate": 1.9133168465024454e-06, "loss": 1.6632, "step": 34800 }, { "epoch": 1.65, "learning_rate": 1.907960581427321e-06, "loss": 1.7518, "step": 34900 }, { "epoch": 1.65, "learning_rate": 1.9025986930433594e-06, "loss": 1.7184, "step": 35000 }, { "epoch": 1.66, "learning_rate": 1.8972312552585695e-06, "loss": 1.6154, "step": 35100 }, { "epoch": 1.66, "learning_rate": 1.891858342057453e-06, "loss": 1.7069, "step": 35200 }, { "epoch": 1.67, "learning_rate": 1.8864800274999842e-06, "loss": 1.6902, "step": 35300 }, { "epoch": 1.67, "learning_rate": 1.8810963857205902e-06, "loss": 1.6736, "step": 35400 }, { "epoch": 1.68, "learning_rate": 1.8757074909271275e-06, "loss": 1.7893, "step": 35500 }, { "epoch": 1.68, "learning_rate": 1.8703134173998603e-06, "loss": 1.7374, "step": 35600 }, { "epoch": 1.69, "learning_rate": 1.864914239490436e-06, "loss": 1.7173, "step": 35700 }, { "epoch": 1.69, "learning_rate": 1.8595100316208608e-06, "loss": 1.6844, "step": 35800 }, { "epoch": 1.7, "learning_rate": 1.854100868282473e-06, "loss": 1.6794, "step": 35900 }, { "epoch": 1.7, "learning_rate": 1.8486868240349173e-06, "loss": 1.65, "step": 36000 }, { "epoch": 1.71, "learning_rate": 1.8432679735051177e-06, "loss": 1.6641, "step": 36100 }, { "epoch": 1.71, "learning_rate": 1.8378443913862453e-06, "loss": 1.6942, "step": 36200 }, { "epoch": 1.72, "learning_rate": 1.8324161524366935e-06, "loss": 1.782, "step": 36300 }, { "epoch": 1.72, "learning_rate": 1.8269833314790437e-06, "loss": 1.5728, "step": 36400 }, { "epoch": 1.73, "learning_rate": 1.8215460033990368e-06, "loss": 1.6751, "step": 36500 }, { "epoch": 1.73, "learning_rate": 1.8161042431445376e-06, "loss": 1.5691, "step": 36600 }, { "epoch": 1.73, "learning_rate": 1.8106581257245064e-06, "loss": 1.7601, "step": 36700 }, { "epoch": 1.74, "learning_rate": 1.8052077262079612e-06, "loss": 1.6157, "step": 36800 }, { "epoch": 1.74, "learning_rate": 1.799753119722943e-06, "loss": 1.7615, "step": 36900 }, { "epoch": 1.75, "learning_rate": 1.7942943814554837e-06, "loss": 1.7381, "step": 37000 }, { "epoch": 1.75, "eval_loss": 1.319101095199585, "eval_runtime": 162.3139, "eval_samples_per_second": 5.723, "eval_steps_per_second": 5.723, "step": 37030 }, { "epoch": 1.75, "learning_rate": 1.7888315866485659e-06, "loss": 1.7177, "step": 37100 }, { "epoch": 1.76, "learning_rate": 1.7833648106010884e-06, "loss": 1.7527, "step": 37200 }, { "epoch": 1.76, "learning_rate": 1.7778941286668257e-06, "loss": 1.6938, "step": 37300 }, { "epoch": 1.77, "learning_rate": 1.772419616253393e-06, "loss": 1.7706, "step": 37400 }, { "epoch": 1.77, "learning_rate": 1.7669413488212027e-06, "loss": 1.6078, "step": 37500 }, { "epoch": 1.78, "learning_rate": 1.761459401882427e-06, "loss": 1.6867, "step": 37600 }, { "epoch": 1.78, "learning_rate": 1.755973850999957e-06, "loss": 1.6677, "step": 37700 }, { "epoch": 1.79, "learning_rate": 1.750484771786358e-06, "loss": 1.6582, "step": 37800 }, { "epoch": 1.79, "learning_rate": 1.7449922399028333e-06, "loss": 1.6047, "step": 37900 }, { "epoch": 1.8, "learning_rate": 1.7394963310581735e-06, "loss": 1.8746, "step": 38000 }, { "epoch": 1.8, "learning_rate": 1.733997121007721e-06, "loss": 1.549, "step": 38100 }, { "epoch": 1.81, "learning_rate": 1.7284946855523186e-06, "loss": 1.7323, "step": 38200 }, { "epoch": 1.81, "learning_rate": 1.7229891005372704e-06, "loss": 1.734, "step": 38300 }, { "epoch": 1.82, "learning_rate": 1.7174804418512918e-06, "loss": 1.6329, "step": 38400 }, { "epoch": 1.82, "learning_rate": 1.7119687854254674e-06, "loss": 1.5707, "step": 38500 }, { "epoch": 1.82, "learning_rate": 1.7064542072322015e-06, "loss": 1.7011, "step": 38600 }, { "epoch": 1.83, "learning_rate": 1.7009367832841715e-06, "loss": 1.6164, "step": 38700 }, { "epoch": 1.83, "learning_rate": 1.6954165896332817e-06, "loss": 1.6312, "step": 38800 }, { "epoch": 1.84, "learning_rate": 1.6898937023696123e-06, "loss": 1.7649, "step": 38900 }, { "epoch": 1.84, "learning_rate": 1.6843681976203744e-06, "loss": 1.6634, "step": 39000 }, { "epoch": 1.85, "learning_rate": 1.6788401515488557e-06, "loss": 1.6431, "step": 39100 }, { "epoch": 1.85, "learning_rate": 1.673309640353376e-06, "loss": 1.7147, "step": 39200 }, { "epoch": 1.86, "learning_rate": 1.6677767402662318e-06, "loss": 1.881, "step": 39300 }, { "epoch": 1.86, "learning_rate": 1.6622415275526502e-06, "loss": 1.6384, "step": 39400 }, { "epoch": 1.87, "learning_rate": 1.6567040785097333e-06, "loss": 1.6662, "step": 39500 }, { "epoch": 1.87, "learning_rate": 1.6511644694654109e-06, "loss": 1.6323, "step": 39600 }, { "epoch": 1.88, "learning_rate": 1.6456227767773842e-06, "loss": 1.7642, "step": 39700 }, { "epoch": 1.88, "learning_rate": 1.6400790768320761e-06, "loss": 1.6971, "step": 39800 }, { "epoch": 1.89, "learning_rate": 1.6345334460435775e-06, "loss": 1.7224, "step": 39900 }, { "epoch": 1.89, "learning_rate": 1.6289859608525936e-06, "loss": 1.7847, "step": 40000 }, { "epoch": 1.9, "learning_rate": 1.623436697725391e-06, "loss": 1.6998, "step": 40100 }, { "epoch": 1.9, "learning_rate": 1.6178857331527427e-06, "loss": 1.7637, "step": 40200 }, { "epoch": 1.9, "learning_rate": 1.6123331436488752e-06, "loss": 1.738, "step": 40300 }, { "epoch": 1.91, "learning_rate": 1.6067790057504125e-06, "loss": 1.8809, "step": 40400 }, { "epoch": 1.91, "learning_rate": 1.6012233960153213e-06, "loss": 1.6865, "step": 40500 }, { "epoch": 1.92, "learning_rate": 1.5956663910218566e-06, "loss": 1.7502, "step": 40600 }, { "epoch": 1.92, "learning_rate": 1.590108067367505e-06, "loss": 1.7131, "step": 40700 }, { "epoch": 1.93, "learning_rate": 1.58454850166793e-06, "loss": 1.6668, "step": 40800 }, { "epoch": 1.93, "learning_rate": 1.5789877705559149e-06, "loss": 1.6616, "step": 40900 }, { "epoch": 1.94, "learning_rate": 1.573425950680308e-06, "loss": 1.8484, "step": 41000 }, { "epoch": 1.94, "learning_rate": 1.567863118704963e-06, "loss": 1.722, "step": 41100 }, { "epoch": 1.95, "learning_rate": 1.562299351307686e-06, "loss": 1.6145, "step": 41200 }, { "epoch": 1.95, "learning_rate": 1.5567347251791773e-06, "loss": 1.744, "step": 41300 }, { "epoch": 1.96, "learning_rate": 1.5511693170219723e-06, "loss": 1.7476, "step": 41400 }, { "epoch": 1.96, "learning_rate": 1.5456032035493878e-06, "loss": 1.6705, "step": 41500 }, { "epoch": 1.97, "learning_rate": 1.5400364614844604e-06, "loss": 1.5381, "step": 41600 }, { "epoch": 1.97, "learning_rate": 1.5344691675588926e-06, "loss": 1.7072, "step": 41700 }, { "epoch": 1.98, "learning_rate": 1.5289013985119934e-06, "loss": 1.7217, "step": 41800 }, { "epoch": 1.98, "learning_rate": 1.5233332310896214e-06, "loss": 1.6447, "step": 41900 }, { "epoch": 1.99, "learning_rate": 1.5177647420431253e-06, "loss": 1.6961, "step": 42000 }, { "epoch": 1.99, "learning_rate": 1.5121960081282878e-06, "loss": 1.8037, "step": 42100 }, { "epoch": 1.99, "learning_rate": 1.5066271061042672e-06, "loss": 1.6076, "step": 42200 }, { "epoch": 2.0, "learning_rate": 1.5010581127325374e-06, "loss": 1.6256, "step": 42300 }, { "epoch": 2.0, "eval_loss": 1.309001088142395, "eval_runtime": 163.9053, "eval_samples_per_second": 5.668, "eval_steps_per_second": 5.668, "step": 42320 }, { "epoch": 2.0, "learning_rate": 1.4954891047758328e-06, "loss": 1.6049, "step": 42400 }, { "epoch": 2.01, "learning_rate": 1.489920158997089e-06, "loss": 1.5866, "step": 42500 }, { "epoch": 2.01, "learning_rate": 1.4843513521583844e-06, "loss": 1.6174, "step": 42600 }, { "epoch": 2.02, "learning_rate": 1.4787827610198813e-06, "loss": 1.711, "step": 42700 }, { "epoch": 2.02, "learning_rate": 1.4732144623387696e-06, "loss": 1.6283, "step": 42800 }, { "epoch": 2.03, "learning_rate": 1.4676465328682085e-06, "loss": 1.7035, "step": 42900 }, { "epoch": 2.03, "learning_rate": 1.4620790493562662e-06, "loss": 1.6869, "step": 43000 }, { "epoch": 2.04, "learning_rate": 1.4565120885448656e-06, "loss": 1.6827, "step": 43100 }, { "epoch": 2.04, "learning_rate": 1.4509457271687238e-06, "loss": 1.7237, "step": 43200 }, { "epoch": 2.05, "learning_rate": 1.4453800419542962e-06, "loss": 1.6418, "step": 43300 }, { "epoch": 2.05, "learning_rate": 1.4398151096187167e-06, "loss": 1.7514, "step": 43400 }, { "epoch": 2.06, "learning_rate": 1.434251006868743e-06, "loss": 1.7102, "step": 43500 }, { "epoch": 2.06, "learning_rate": 1.4286878103996967e-06, "loss": 1.6147, "step": 43600 }, { "epoch": 2.07, "learning_rate": 1.4231255968944078e-06, "loss": 1.557, "step": 43700 }, { "epoch": 2.07, "learning_rate": 1.4175644430221568e-06, "loss": 1.6971, "step": 43800 }, { "epoch": 2.07, "learning_rate": 1.412004425437619e-06, "loss": 1.6645, "step": 43900 }, { "epoch": 2.08, "learning_rate": 1.4064456207798066e-06, "loss": 1.688, "step": 44000 }, { "epoch": 2.08, "learning_rate": 1.4008881056710125e-06, "loss": 1.7062, "step": 44100 }, { "epoch": 2.09, "learning_rate": 1.3953319567157556e-06, "loss": 1.5745, "step": 44200 }, { "epoch": 2.09, "learning_rate": 1.3897772504997228e-06, "loss": 1.5922, "step": 44300 }, { "epoch": 2.1, "learning_rate": 1.3842240635887154e-06, "loss": 1.7366, "step": 44400 }, { "epoch": 2.1, "learning_rate": 1.3786724725275911e-06, "loss": 1.7974, "step": 44500 }, { "epoch": 2.11, "learning_rate": 1.3731225538392125e-06, "loss": 1.7394, "step": 44600 }, { "epoch": 2.11, "learning_rate": 1.367574384023388e-06, "loss": 1.7766, "step": 44700 }, { "epoch": 2.12, "learning_rate": 1.3620280395558218e-06, "loss": 1.631, "step": 44800 }, { "epoch": 2.12, "learning_rate": 1.3564835968870557e-06, "loss": 1.6251, "step": 44900 }, { "epoch": 2.13, "learning_rate": 1.3509411324414191e-06, "loss": 1.6983, "step": 45000 }, { "epoch": 2.13, "learning_rate": 1.345400722615972e-06, "loss": 1.6382, "step": 45100 }, { "epoch": 2.14, "learning_rate": 1.3398624437794549e-06, "loss": 1.6588, "step": 45200 }, { "epoch": 2.14, "learning_rate": 1.3343263722712342e-06, "loss": 1.8123, "step": 45300 }, { "epoch": 2.15, "learning_rate": 1.3287925844002496e-06, "loss": 1.6796, "step": 45400 }, { "epoch": 2.15, "learning_rate": 1.3232611564439656e-06, "loss": 1.5431, "step": 45500 }, { "epoch": 2.16, "learning_rate": 1.3177321646473154e-06, "loss": 1.57, "step": 45600 }, { "epoch": 2.16, "learning_rate": 1.3122056852216538e-06, "loss": 1.6356, "step": 45700 }, { "epoch": 2.16, "learning_rate": 1.3066817943437054e-06, "loss": 1.6333, "step": 45800 }, { "epoch": 2.17, "learning_rate": 1.3011605681545126e-06, "loss": 1.595, "step": 45900 }, { "epoch": 2.17, "learning_rate": 1.29564208275839e-06, "loss": 1.5615, "step": 46000 }, { "epoch": 2.18, "learning_rate": 1.2901264142218712e-06, "loss": 1.7929, "step": 46100 }, { "epoch": 2.18, "learning_rate": 1.2846136385726644e-06, "loss": 1.8091, "step": 46200 }, { "epoch": 2.19, "learning_rate": 1.2791038317986009e-06, "loss": 1.6715, "step": 46300 }, { "epoch": 2.19, "learning_rate": 1.2735970698465896e-06, "loss": 1.6615, "step": 46400 }, { "epoch": 2.2, "learning_rate": 1.2680934286215696e-06, "loss": 1.6615, "step": 46500 }, { "epoch": 2.2, "learning_rate": 1.2625929839854644e-06, "loss": 1.7039, "step": 46600 }, { "epoch": 2.21, "learning_rate": 1.2570958117561357e-06, "loss": 1.7209, "step": 46700 }, { "epoch": 2.21, "learning_rate": 1.2516019877063388e-06, "loss": 1.7251, "step": 46800 }, { "epoch": 2.22, "learning_rate": 1.2461115875626768e-06, "loss": 1.7202, "step": 46900 }, { "epoch": 2.22, "learning_rate": 1.2406246870045588e-06, "loss": 1.7948, "step": 47000 }, { "epoch": 2.23, "learning_rate": 1.2351413616631561e-06, "loss": 1.6631, "step": 47100 }, { "epoch": 2.23, "learning_rate": 1.2296616871203584e-06, "loss": 1.6321, "step": 47200 }, { "epoch": 2.24, "learning_rate": 1.2241857389077332e-06, "loss": 1.7737, "step": 47300 }, { "epoch": 2.24, "learning_rate": 1.2187135925054852e-06, "loss": 1.5694, "step": 47400 }, { "epoch": 2.25, "learning_rate": 1.2132453233414145e-06, "loss": 1.7562, "step": 47500 }, { "epoch": 2.25, "learning_rate": 1.207781006789877e-06, "loss": 1.5521, "step": 47600 }, { "epoch": 2.25, "eval_loss": 1.2960591316223145, "eval_runtime": 158.903, "eval_samples_per_second": 5.846, "eval_steps_per_second": 5.846, "step": 47610 }, { "epoch": 2.25, "learning_rate": 1.202320718170748e-06, "loss": 1.6698, "step": 47700 }, { "epoch": 2.26, "learning_rate": 1.1968645327483792e-06, "loss": 1.5465, "step": 47800 }, { "epoch": 2.26, "learning_rate": 1.1914125257305654e-06, "loss": 1.6406, "step": 47900 }, { "epoch": 2.27, "learning_rate": 1.1859647722675075e-06, "loss": 1.6434, "step": 48000 }, { "epoch": 2.27, "learning_rate": 1.1805213474507738e-06, "loss": 1.5834, "step": 48100 }, { "epoch": 2.28, "learning_rate": 1.1750823263122683e-06, "loss": 1.683, "step": 48200 }, { "epoch": 2.28, "learning_rate": 1.169647783823193e-06, "loss": 1.5975, "step": 48300 }, { "epoch": 2.29, "learning_rate": 1.1642177948930188e-06, "loss": 1.6729, "step": 48400 }, { "epoch": 2.29, "learning_rate": 1.1587924343684486e-06, "loss": 1.688, "step": 48500 }, { "epoch": 2.3, "learning_rate": 1.1533717770323887e-06, "loss": 1.6362, "step": 48600 }, { "epoch": 2.3, "learning_rate": 1.1479558976029164e-06, "loss": 1.7004, "step": 48700 }, { "epoch": 2.31, "learning_rate": 1.1425448707322505e-06, "loss": 1.6087, "step": 48800 }, { "epoch": 2.31, "learning_rate": 1.137138771005723e-06, "loss": 1.6815, "step": 48900 }, { "epoch": 2.32, "learning_rate": 1.1317376729407493e-06, "loss": 1.5914, "step": 49000 }, { "epoch": 2.32, "learning_rate": 1.1263416509858032e-06, "loss": 1.5619, "step": 49100 }, { "epoch": 2.33, "learning_rate": 1.1209507795193888e-06, "loss": 1.6197, "step": 49200 }, { "epoch": 2.33, "learning_rate": 1.1155651328490174e-06, "loss": 1.6824, "step": 49300 }, { "epoch": 2.33, "learning_rate": 1.11018478521018e-06, "loss": 1.7277, "step": 49400 }, { "epoch": 2.34, "learning_rate": 1.1048098107653282e-06, "loss": 1.6273, "step": 49500 }, { "epoch": 2.34, "learning_rate": 1.0994402836028472e-06, "loss": 1.6803, "step": 49600 }, { "epoch": 2.35, "learning_rate": 1.0940762777360401e-06, "loss": 1.5929, "step": 49700 }, { "epoch": 2.35, "learning_rate": 1.0887178671021024e-06, "loss": 1.6484, "step": 49800 }, { "epoch": 2.36, "learning_rate": 1.0833651255611058e-06, "loss": 1.7423, "step": 49900 }, { "epoch": 2.36, "learning_rate": 1.0780181268949805e-06, "loss": 1.6847, "step": 50000 }, { "epoch": 2.37, "learning_rate": 1.0726769448064956e-06, "loss": 1.6074, "step": 50100 }, { "epoch": 2.37, "learning_rate": 1.0673416529182462e-06, "loss": 1.7478, "step": 50200 }, { "epoch": 2.38, "learning_rate": 1.0620123247716362e-06, "loss": 1.7042, "step": 50300 }, { "epoch": 2.38, "learning_rate": 1.0566890338258655e-06, "loss": 1.6337, "step": 50400 }, { "epoch": 2.39, "learning_rate": 1.0513718534569187e-06, "loss": 1.7174, "step": 50500 }, { "epoch": 2.39, "learning_rate": 1.0460608569565506e-06, "loss": 1.6805, "step": 50600 }, { "epoch": 2.4, "learning_rate": 1.0407561175312802e-06, "loss": 1.5872, "step": 50700 }, { "epoch": 2.4, "learning_rate": 1.035457708301377e-06, "loss": 1.7103, "step": 50800 }, { "epoch": 2.41, "learning_rate": 1.0301657022998575e-06, "loss": 1.7544, "step": 50900 }, { "epoch": 2.41, "learning_rate": 1.0248801724714746e-06, "loss": 1.6165, "step": 51000 }, { "epoch": 2.42, "learning_rate": 1.019601191671715e-06, "loss": 1.5813, "step": 51100 }, { "epoch": 2.42, "learning_rate": 1.0143288326657935e-06, "loss": 1.6332, "step": 51200 }, { "epoch": 2.42, "learning_rate": 1.0090631681276508e-06, "loss": 1.7332, "step": 51300 }, { "epoch": 2.43, "learning_rate": 1.0038042706389505e-06, "loss": 1.5387, "step": 51400 }, { "epoch": 2.43, "learning_rate": 9.985522126880806e-07, "loss": 1.5534, "step": 51500 }, { "epoch": 2.44, "learning_rate": 9.93307066669153e-07, "loss": 1.6457, "step": 51600 }, { "epoch": 2.44, "learning_rate": 9.880689048810049e-07, "loss": 1.6818, "step": 51700 }, { "epoch": 2.45, "learning_rate": 9.828377995262048e-07, "loss": 1.5609, "step": 51800 }, { "epoch": 2.45, "learning_rate": 9.77613822710054e-07, "loss": 1.7747, "step": 51900 }, { "epoch": 2.46, "learning_rate": 9.72397046439596e-07, "loss": 1.7221, "step": 52000 }, { "epoch": 2.46, "learning_rate": 9.671875426226204e-07, "loss": 1.7983, "step": 52100 }, { "epoch": 2.47, "learning_rate": 9.61985383066676e-07, "loss": 1.6314, "step": 52200 }, { "epoch": 2.47, "learning_rate": 9.567906394780763e-07, "loss": 1.6959, "step": 52300 }, { "epoch": 2.48, "learning_rate": 9.516033834609155e-07, "loss": 1.6105, "step": 52400 }, { "epoch": 2.48, "learning_rate": 9.464236865160779e-07, "loss": 1.573, "step": 52500 }, { "epoch": 2.49, "learning_rate": 9.412516200402556e-07, "loss": 1.6789, "step": 52600 }, { "epoch": 2.49, "learning_rate": 9.360872553249605e-07, "loss": 1.7057, "step": 52700 }, { "epoch": 2.5, "learning_rate": 9.30930663555545e-07, "loss": 1.6102, "step": 52800 }, { "epoch": 2.5, "learning_rate": 9.257819158102203e-07, "loss": 1.8318, "step": 52900 }, { "epoch": 2.5, "eval_loss": 1.2909756898880005, "eval_runtime": 158.004, "eval_samples_per_second": 5.88, "eval_steps_per_second": 5.88, "step": 52900 }, { "epoch": 2.51, "learning_rate": 9.206410830590746e-07, "loss": 1.6514, "step": 53000 }, { "epoch": 2.51, "learning_rate": 9.15508236163097e-07, "loss": 1.7379, "step": 53100 }, { "epoch": 2.51, "learning_rate": 9.103834458732002e-07, "loss": 1.6323, "step": 53200 }, { "epoch": 2.52, "learning_rate": 9.052667828292439e-07, "loss": 1.8245, "step": 53300 }, { "epoch": 2.52, "learning_rate": 9.001583175590636e-07, "loss": 1.5375, "step": 53400 }, { "epoch": 2.53, "learning_rate": 8.950581204774961e-07, "loss": 1.737, "step": 53500 }, { "epoch": 2.53, "learning_rate": 8.899662618854105e-07, "loss": 1.6755, "step": 53600 }, { "epoch": 2.54, "learning_rate": 8.848828119687375e-07, "loss": 1.6737, "step": 53700 }, { "epoch": 2.54, "learning_rate": 8.798078407975051e-07, "loss": 1.7876, "step": 53800 }, { "epoch": 2.55, "learning_rate": 8.747414183248682e-07, "loss": 1.6804, "step": 53900 }, { "epoch": 2.55, "learning_rate": 8.696836143861491e-07, "loss": 1.5951, "step": 54000 }, { "epoch": 2.56, "learning_rate": 8.646344986978708e-07, "loss": 1.6206, "step": 54100 }, { "epoch": 2.56, "learning_rate": 8.595941408567983e-07, "loss": 1.7823, "step": 54200 }, { "epoch": 2.57, "learning_rate": 8.545626103389805e-07, "loss": 1.6832, "step": 54300 }, { "epoch": 2.57, "learning_rate": 8.495399764987894e-07, "loss": 1.6455, "step": 54400 }, { "epoch": 2.58, "learning_rate": 8.445263085679645e-07, "loss": 1.6894, "step": 54500 }, { "epoch": 2.58, "learning_rate": 8.395216756546627e-07, "loss": 1.5944, "step": 54600 }, { "epoch": 2.59, "learning_rate": 8.345261467425003e-07, "loss": 1.7441, "step": 54700 }, { "epoch": 2.59, "learning_rate": 8.295397906896052e-07, "loss": 1.7046, "step": 54800 }, { "epoch": 2.59, "learning_rate": 8.245626762276663e-07, "loss": 1.6335, "step": 54900 }, { "epoch": 2.6, "learning_rate": 8.195948719609889e-07, "loss": 1.7515, "step": 55000 }, { "epoch": 2.6, "learning_rate": 8.146364463655458e-07, "loss": 1.6208, "step": 55100 }, { "epoch": 2.61, "learning_rate": 8.096874677880322e-07, "loss": 1.6655, "step": 55200 }, { "epoch": 2.61, "learning_rate": 8.047480044449309e-07, "loss": 1.7218, "step": 55300 }, { "epoch": 2.62, "learning_rate": 7.998181244215638e-07, "loss": 1.5814, "step": 55400 }, { "epoch": 2.62, "learning_rate": 7.948978956711576e-07, "loss": 1.7588, "step": 55500 }, { "epoch": 2.63, "learning_rate": 7.899873860139058e-07, "loss": 1.6841, "step": 55600 }, { "epoch": 2.63, "learning_rate": 7.850866631360363e-07, "loss": 1.6321, "step": 55700 }, { "epoch": 2.64, "learning_rate": 7.801957945888744e-07, "loss": 1.654, "step": 55800 }, { "epoch": 2.64, "learning_rate": 7.75314847787914e-07, "loss": 1.6165, "step": 55900 }, { "epoch": 2.65, "learning_rate": 7.704438900118902e-07, "loss": 1.7136, "step": 56000 }, { "epoch": 2.65, "learning_rate": 7.655829884018475e-07, "loss": 1.6892, "step": 56100 }, { "epoch": 2.66, "learning_rate": 7.607322099602175e-07, "loss": 1.6254, "step": 56200 }, { "epoch": 2.66, "learning_rate": 7.558916215498944e-07, "loss": 1.5811, "step": 56300 }, { "epoch": 2.67, "learning_rate": 7.510612898933145e-07, "loss": 1.6081, "step": 56400 }, { "epoch": 2.67, "learning_rate": 7.462412815715343e-07, "loss": 1.5603, "step": 56500 }, { "epoch": 2.68, "learning_rate": 7.414316630233144e-07, "loss": 1.7405, "step": 56600 }, { "epoch": 2.68, "learning_rate": 7.366325005442026e-07, "loss": 1.6653, "step": 56700 }, { "epoch": 2.68, "learning_rate": 7.318438602856225e-07, "loss": 1.6596, "step": 56800 }, { "epoch": 2.69, "learning_rate": 7.270658082539581e-07, "loss": 1.706, "step": 56900 }, { "epoch": 2.69, "learning_rate": 7.222984103096469e-07, "loss": 1.718, "step": 57000 }, { "epoch": 2.7, "learning_rate": 7.175417321662698e-07, "loss": 1.6861, "step": 57100 }, { "epoch": 2.7, "learning_rate": 7.127958393896484e-07, "loss": 1.668, "step": 57200 }, { "epoch": 2.71, "learning_rate": 7.080607973969376e-07, "loss": 1.7527, "step": 57300 }, { "epoch": 2.71, "learning_rate": 7.033366714557257e-07, "loss": 1.7254, "step": 57400 }, { "epoch": 2.72, "learning_rate": 6.986235266831368e-07, "loss": 1.5732, "step": 57500 }, { "epoch": 2.72, "learning_rate": 6.93921428044928e-07, "loss": 1.6163, "step": 57600 }, { "epoch": 2.73, "learning_rate": 6.892304403545984e-07, "loss": 1.7492, "step": 57700 }, { "epoch": 2.73, "learning_rate": 6.845506282724956e-07, "loss": 1.7095, "step": 57800 }, { "epoch": 2.74, "learning_rate": 6.798820563049212e-07, "loss": 1.7914, "step": 57900 }, { "epoch": 2.74, "learning_rate": 6.75224788803245e-07, "loss": 1.6378, "step": 58000 }, { "epoch": 2.75, "learning_rate": 6.70578889963015e-07, "loss": 1.6761, "step": 58100 }, { "epoch": 2.75, "eval_loss": 1.2901337146759033, "eval_runtime": 158.1238, "eval_samples_per_second": 5.875, "eval_steps_per_second": 5.875, "step": 58190 }, { "epoch": 2.75, "learning_rate": 6.659444238230763e-07, "loss": 1.6017, "step": 58200 }, { "epoch": 2.76, "learning_rate": 6.613214542646845e-07, "loss": 1.5221, "step": 58300 }, { "epoch": 2.76, "learning_rate": 6.567100450106276e-07, "loss": 1.7276, "step": 58400 }, { "epoch": 2.77, "learning_rate": 6.521102596243459e-07, "loss": 1.5169, "step": 58500 }, { "epoch": 2.77, "learning_rate": 6.475221615090591e-07, "loss": 1.7469, "step": 58600 }, { "epoch": 2.77, "learning_rate": 6.429458139068882e-07, "loss": 1.646, "step": 58700 }, { "epoch": 2.78, "learning_rate": 6.383812798979856e-07, "loss": 1.6483, "step": 58800 }, { "epoch": 2.78, "learning_rate": 6.338286223996673e-07, "loss": 1.5527, "step": 58900 }, { "epoch": 2.79, "learning_rate": 6.29287904165543e-07, "loss": 1.6215, "step": 59000 }, { "epoch": 2.79, "learning_rate": 6.247591877846517e-07, "loss": 1.6239, "step": 59100 }, { "epoch": 2.8, "learning_rate": 6.202425356805997e-07, "loss": 1.6994, "step": 59200 }, { "epoch": 2.8, "learning_rate": 6.157380101107016e-07, "loss": 1.5472, "step": 59300 }, { "epoch": 2.81, "learning_rate": 6.112456731651181e-07, "loss": 1.589, "step": 59400 }, { "epoch": 2.81, "learning_rate": 6.067655867660037e-07, "loss": 1.6836, "step": 59500 }, { "epoch": 2.82, "learning_rate": 6.022978126666509e-07, "loss": 1.6906, "step": 59600 }, { "epoch": 2.82, "learning_rate": 5.978424124506421e-07, "loss": 1.7639, "step": 59700 }, { "epoch": 2.83, "learning_rate": 5.933994475309969e-07, "loss": 1.6307, "step": 59800 }, { "epoch": 2.83, "learning_rate": 5.889689791493279e-07, "loss": 1.6508, "step": 59900 }, { "epoch": 2.84, "learning_rate": 5.84551068374996e-07, "loss": 1.6107, "step": 60000 }, { "epoch": 2.84, "learning_rate": 5.801457761042689e-07, "loss": 1.6451, "step": 60100 }, { "epoch": 2.85, "learning_rate": 5.757531630594812e-07, "loss": 1.6345, "step": 60200 }, { "epoch": 2.85, "learning_rate": 5.71373289788197e-07, "loss": 1.5496, "step": 60300 }, { "epoch": 2.85, "learning_rate": 5.670062166623781e-07, "loss": 1.6161, "step": 60400 }, { "epoch": 2.86, "learning_rate": 5.626520038775476e-07, "loss": 1.618, "step": 60500 }, { "epoch": 2.86, "learning_rate": 5.583107114519624e-07, "loss": 1.5446, "step": 60600 }, { "epoch": 2.87, "learning_rate": 5.539823992257877e-07, "loss": 1.6561, "step": 60700 }, { "epoch": 2.87, "learning_rate": 5.496671268602682e-07, "loss": 1.7354, "step": 60800 }, { "epoch": 2.88, "learning_rate": 5.453649538369088e-07, "loss": 1.5153, "step": 60900 }, { "epoch": 2.88, "learning_rate": 5.410759394566529e-07, "loss": 1.6056, "step": 61000 }, { "epoch": 2.89, "learning_rate": 5.368001428390672e-07, "loss": 1.674, "step": 61100 }, { "epoch": 2.89, "learning_rate": 5.325376229215244e-07, "loss": 1.6993, "step": 61200 }, { "epoch": 2.9, "learning_rate": 5.282884384583917e-07, "loss": 1.6882, "step": 61300 }, { "epoch": 2.9, "learning_rate": 5.240526480202211e-07, "loss": 1.5872, "step": 61400 }, { "epoch": 2.91, "learning_rate": 5.198303099929429e-07, "loss": 1.6554, "step": 61500 }, { "epoch": 2.91, "learning_rate": 5.156214825770591e-07, "loss": 1.6168, "step": 61600 }, { "epoch": 2.92, "learning_rate": 5.114262237868423e-07, "loss": 1.5752, "step": 61700 }, { "epoch": 2.92, "learning_rate": 5.072445914495355e-07, "loss": 1.655, "step": 61800 }, { "epoch": 2.93, "learning_rate": 5.030766432045565e-07, "loss": 1.6429, "step": 61900 }, { "epoch": 2.93, "learning_rate": 4.989224365027009e-07, "loss": 1.6156, "step": 62000 }, { "epoch": 2.94, "learning_rate": 4.947820286053518e-07, "loss": 1.6634, "step": 62100 }, { "epoch": 2.94, "learning_rate": 4.906554765836916e-07, "loss": 1.7337, "step": 62200 }, { "epoch": 2.94, "learning_rate": 4.865428373179121e-07, "loss": 1.6085, "step": 62300 }, { "epoch": 2.95, "learning_rate": 4.824441674964334e-07, "loss": 1.6445, "step": 62400 }, { "epoch": 2.95, "learning_rate": 4.783595236151211e-07, "loss": 1.7347, "step": 62500 }, { "epoch": 2.96, "learning_rate": 4.7428896197650816e-07, "loss": 1.5851, "step": 62600 }, { "epoch": 2.96, "learning_rate": 4.702325386890184e-07, "loss": 1.6059, "step": 62700 }, { "epoch": 2.97, "learning_rate": 4.661903096661929e-07, "loss": 1.6562, "step": 62800 }, { "epoch": 2.97, "learning_rate": 4.6216233062592107e-07, "loss": 1.6983, "step": 62900 }, { "epoch": 2.98, "learning_rate": 4.581486570896701e-07, "loss": 1.7001, "step": 63000 }, { "epoch": 2.98, "learning_rate": 4.541493443817206e-07, "loss": 1.5994, "step": 63100 }, { "epoch": 2.99, "learning_rate": 4.501644476284045e-07, "loss": 1.6582, "step": 63200 }, { "epoch": 2.99, "learning_rate": 4.4619402175734606e-07, "loss": 1.7147, "step": 63300 }, { "epoch": 3.0, "learning_rate": 4.4223812149670195e-07, "loss": 1.6312, "step": 63400 }, { "epoch": 3.0, "eval_loss": 1.287863850593567, "eval_runtime": 157.981, "eval_samples_per_second": 5.88, "eval_steps_per_second": 5.88, "step": 63480 }, { "epoch": 3.0, "learning_rate": 4.3829680137440883e-07, "loss": 1.5559, "step": 63500 }, { "epoch": 3.01, "learning_rate": 4.343701157174329e-07, "loss": 1.6739, "step": 63600 }, { "epoch": 3.01, "learning_rate": 4.3045811865101767e-07, "loss": 1.4717, "step": 63700 }, { "epoch": 3.02, "learning_rate": 4.265608640979411e-07, "loss": 1.6384, "step": 63800 }, { "epoch": 3.02, "learning_rate": 4.226784057777699e-07, "loss": 1.5138, "step": 63900 }, { "epoch": 3.03, "learning_rate": 4.1881079720612204e-07, "loss": 1.5968, "step": 64000 }, { "epoch": 3.03, "learning_rate": 4.149580916939255e-07, "loss": 1.5826, "step": 64100 }, { "epoch": 3.03, "learning_rate": 4.1112034234668615e-07, "loss": 1.7272, "step": 64200 }, { "epoch": 3.04, "learning_rate": 4.0729760206375404e-07, "loss": 1.722, "step": 64300 }, { "epoch": 3.04, "learning_rate": 4.0348992353759657e-07, "loss": 1.6016, "step": 64400 }, { "epoch": 3.05, "learning_rate": 3.9969735925306884e-07, "loss": 1.5948, "step": 64500 }, { "epoch": 3.05, "learning_rate": 3.95919961486693e-07, "loss": 1.4295, "step": 64600 }, { "epoch": 3.06, "learning_rate": 3.9215778230593563e-07, "loss": 1.6671, "step": 64700 }, { "epoch": 3.06, "learning_rate": 3.8841087356849295e-07, "loss": 1.6863, "step": 64800 }, { "epoch": 3.07, "learning_rate": 3.846792869215725e-07, "loss": 1.7321, "step": 64900 }, { "epoch": 3.07, "learning_rate": 3.8096307380118334e-07, "loss": 1.6549, "step": 65000 } ], "logging_steps": 100, "max_steps": 84628, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 5000, "total_flos": 3.849368373382349e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }