{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "global_step": 1312119, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.999999809468501e-05, "loss": 11.0183, "step": 1 }, { "epoch": 0.0, "learning_rate": 4.999904734250476e-05, "loss": 8.1808, "step": 500 }, { "epoch": 0.0, "learning_rate": 4.999809468500952e-05, "loss": 7.4836, "step": 1000 }, { "epoch": 0.0, "learning_rate": 4.999714202751428e-05, "loss": 7.0935, "step": 1500 }, { "epoch": 0.0, "learning_rate": 4.999618937001903e-05, "loss": 6.8477, "step": 2000 }, { "epoch": 0.0, "learning_rate": 4.9995236712523794e-05, "loss": 6.6176, "step": 2500 }, { "epoch": 0.0, "learning_rate": 4.999428596034354e-05, "loss": 6.4164, "step": 3000 }, { "epoch": 0.0, "learning_rate": 4.9993333302848294e-05, "loss": 6.2092, "step": 3500 }, { "epoch": 0.0, "learning_rate": 4.999238064535305e-05, "loss": 6.0244, "step": 4000 }, { "epoch": 0.0, "learning_rate": 4.999142798785781e-05, "loss": 5.9575, "step": 4500 }, { "epoch": 0.0, "learning_rate": 4.999047533036257e-05, "loss": 5.7715, "step": 5000 }, { "epoch": 0.0, "learning_rate": 4.998952267286733e-05, "loss": 5.6966, "step": 5500 }, { "epoch": 0.0, "learning_rate": 4.9988570015372085e-05, "loss": 5.516, "step": 6000 }, { "epoch": 0.0, "learning_rate": 4.998761735787684e-05, "loss": 5.4432, "step": 6500 }, { "epoch": 0.01, "learning_rate": 4.9986664700381595e-05, "loss": 5.3452, "step": 7000 }, { "epoch": 0.01, "learning_rate": 4.998571204288636e-05, "loss": 5.21, "step": 7500 }, { "epoch": 0.01, "learning_rate": 4.998475938539112e-05, "loss": 5.1467, "step": 8000 }, { "epoch": 0.01, "learning_rate": 4.998380672789587e-05, "loss": 5.0636, "step": 8500 }, { "epoch": 0.01, "learning_rate": 4.998285597571562e-05, "loss": 4.989, "step": 9000 }, { "epoch": 0.01, "learning_rate": 4.9981905223535366e-05, "loss": 4.8955, "step": 9500 }, { "epoch": 0.01, "learning_rate": 4.9980952566040125e-05, "loss": 4.8076, "step": 10000 }, { "epoch": 0.01, "learning_rate": 4.997999990854488e-05, "loss": 4.7676, "step": 10500 }, { "epoch": 0.01, "learning_rate": 4.9979047251049634e-05, "loss": 4.7127, "step": 11000 }, { "epoch": 0.01, "learning_rate": 4.997809649886939e-05, "loss": 4.5816, "step": 11500 }, { "epoch": 0.01, "learning_rate": 4.997714384137415e-05, "loss": 4.507, "step": 12000 }, { "epoch": 0.01, "learning_rate": 4.99761911838789e-05, "loss": 4.4491, "step": 12500 }, { "epoch": 0.01, "learning_rate": 4.997523852638366e-05, "loss": 4.4011, "step": 13000 }, { "epoch": 0.01, "learning_rate": 4.997428586888842e-05, "loss": 4.3559, "step": 13500 }, { "epoch": 0.01, "learning_rate": 4.9973333211393174e-05, "loss": 4.3054, "step": 14000 }, { "epoch": 0.01, "learning_rate": 4.997238055389793e-05, "loss": 4.276, "step": 14500 }, { "epoch": 0.01, "learning_rate": 4.997142789640269e-05, "loss": 4.2676, "step": 15000 }, { "epoch": 0.01, "learning_rate": 4.997047523890745e-05, "loss": 4.165, "step": 15500 }, { "epoch": 0.01, "learning_rate": 4.996952258141221e-05, "loss": 4.1877, "step": 16000 }, { "epoch": 0.01, "learning_rate": 4.9968569923916966e-05, "loss": 4.1484, "step": 16500 }, { "epoch": 0.01, "learning_rate": 4.9967617266421724e-05, "loss": 4.1325, "step": 17000 }, { "epoch": 0.01, "learning_rate": 4.996666651424147e-05, "loss": 4.0963, "step": 17500 }, { "epoch": 0.01, "learning_rate": 4.9965713856746224e-05, "loss": 4.0634, "step": 18000 }, { "epoch": 0.01, "learning_rate": 4.996476119925099e-05, "loss": 4.0009, "step": 18500 }, { "epoch": 0.01, "learning_rate": 4.996381044707074e-05, "loss": 3.9726, "step": 19000 }, { "epoch": 0.01, "learning_rate": 4.996285778957549e-05, "loss": 3.9568, "step": 19500 }, { "epoch": 0.02, "learning_rate": 4.996190513208025e-05, "loss": 3.9486, "step": 20000 }, { "epoch": 0.02, "learning_rate": 4.9960952474585005e-05, "loss": 3.9449, "step": 20500 }, { "epoch": 0.02, "learning_rate": 4.995999981708976e-05, "loss": 3.9204, "step": 21000 }, { "epoch": 0.02, "learning_rate": 4.995904715959452e-05, "loss": 3.9092, "step": 21500 }, { "epoch": 0.02, "learning_rate": 4.995809450209928e-05, "loss": 3.8644, "step": 22000 }, { "epoch": 0.02, "learning_rate": 4.995714374991903e-05, "loss": 3.8551, "step": 22500 }, { "epoch": 0.02, "learning_rate": 4.995619109242378e-05, "loss": 3.8465, "step": 23000 }, { "epoch": 0.02, "learning_rate": 4.9955238434928545e-05, "loss": 3.8054, "step": 23500 }, { "epoch": 0.02, "learning_rate": 4.99542857774333e-05, "loss": 3.8097, "step": 24000 }, { "epoch": 0.02, "learning_rate": 4.9953333119938055e-05, "loss": 3.7812, "step": 24500 }, { "epoch": 0.02, "learning_rate": 4.995238046244281e-05, "loss": 3.7919, "step": 25000 }, { "epoch": 0.02, "learning_rate": 4.995142780494757e-05, "loss": 3.7534, "step": 25500 }, { "epoch": 0.02, "learning_rate": 4.995047514745233e-05, "loss": 3.7539, "step": 26000 }, { "epoch": 0.02, "learning_rate": 4.994952439527208e-05, "loss": 3.7195, "step": 26500 }, { "epoch": 0.02, "learning_rate": 4.9948571737776836e-05, "loss": 3.7117, "step": 27000 }, { "epoch": 0.02, "learning_rate": 4.9947620985596584e-05, "loss": 3.7003, "step": 27500 }, { "epoch": 0.02, "learning_rate": 4.994666832810134e-05, "loss": 3.6942, "step": 28000 }, { "epoch": 0.02, "learning_rate": 4.9945715670606094e-05, "loss": 3.671, "step": 28500 }, { "epoch": 0.02, "learning_rate": 4.994476301311086e-05, "loss": 3.68, "step": 29000 }, { "epoch": 0.02, "learning_rate": 4.994381035561561e-05, "loss": 3.6613, "step": 29500 }, { "epoch": 0.02, "learning_rate": 4.994285769812037e-05, "loss": 3.6584, "step": 30000 }, { "epoch": 0.02, "eval_accuracy": 0.40299365911369156, "eval_loss": 3.563416004180908, "eval_runtime": 9454.6759, "eval_samples_per_second": 29.085, "eval_steps_per_second": 7.271, "step": 30000 }, { "epoch": 0.02, "learning_rate": 4.9941905040625134e-05, "loss": 3.6474, "step": 30500 }, { "epoch": 0.02, "learning_rate": 4.9940952383129886e-05, "loss": 3.6035, "step": 31000 }, { "epoch": 0.02, "learning_rate": 4.9939999725634644e-05, "loss": 3.6543, "step": 31500 }, { "epoch": 0.02, "learning_rate": 4.99390470681394e-05, "loss": 3.6094, "step": 32000 }, { "epoch": 0.02, "learning_rate": 4.993809631595915e-05, "loss": 3.6101, "step": 32500 }, { "epoch": 0.03, "learning_rate": 4.99371455637789e-05, "loss": 3.5973, "step": 33000 }, { "epoch": 0.03, "learning_rate": 4.993619290628366e-05, "loss": 3.6071, "step": 33500 }, { "epoch": 0.03, "learning_rate": 4.9935240248788415e-05, "loss": 3.5643, "step": 34000 }, { "epoch": 0.03, "learning_rate": 4.9934287591293174e-05, "loss": 3.5414, "step": 34500 }, { "epoch": 0.03, "learning_rate": 4.9933334933797925e-05, "loss": 3.54, "step": 35000 }, { "epoch": 0.03, "learning_rate": 4.993238227630268e-05, "loss": 3.5529, "step": 35500 }, { "epoch": 0.03, "learning_rate": 4.993143152412244e-05, "loss": 3.5395, "step": 36000 }, { "epoch": 0.03, "learning_rate": 4.993047886662719e-05, "loss": 3.5193, "step": 36500 }, { "epoch": 0.03, "learning_rate": 4.992952620913195e-05, "loss": 3.5087, "step": 37000 }, { "epoch": 0.03, "learning_rate": 4.9928573551636706e-05, "loss": 3.4989, "step": 37500 }, { "epoch": 0.03, "learning_rate": 4.9927620894141465e-05, "loss": 3.5179, "step": 38000 }, { "epoch": 0.03, "learning_rate": 4.992666823664622e-05, "loss": 3.5175, "step": 38500 }, { "epoch": 0.03, "learning_rate": 4.9925715579150975e-05, "loss": 3.4805, "step": 39000 }, { "epoch": 0.03, "learning_rate": 4.992476482697073e-05, "loss": 3.4935, "step": 39500 }, { "epoch": 0.03, "learning_rate": 4.992381216947549e-05, "loss": 3.4869, "step": 40000 }, { "epoch": 0.03, "learning_rate": 4.992285951198024e-05, "loss": 3.4929, "step": 40500 }, { "epoch": 0.03, "learning_rate": 4.9921906854485e-05, "loss": 3.4931, "step": 41000 }, { "epoch": 0.03, "learning_rate": 4.9920954196989756e-05, "loss": 3.4775, "step": 41500 }, { "epoch": 0.03, "learning_rate": 4.9920003444809504e-05, "loss": 3.4333, "step": 42000 }, { "epoch": 0.03, "learning_rate": 4.991905078731426e-05, "loss": 3.4168, "step": 42500 }, { "epoch": 0.03, "learning_rate": 4.991809812981902e-05, "loss": 3.4649, "step": 43000 }, { "epoch": 0.03, "learning_rate": 4.991714547232378e-05, "loss": 3.4142, "step": 43500 }, { "epoch": 0.03, "learning_rate": 4.991619472014353e-05, "loss": 3.4316, "step": 44000 }, { "epoch": 0.03, "learning_rate": 4.991524206264828e-05, "loss": 3.4588, "step": 44500 }, { "epoch": 0.03, "learning_rate": 4.9914289405153044e-05, "loss": 3.4399, "step": 45000 }, { "epoch": 0.03, "learning_rate": 4.9913336747657796e-05, "loss": 3.4236, "step": 45500 }, { "epoch": 0.04, "learning_rate": 4.9912384090162554e-05, "loss": 3.3971, "step": 46000 }, { "epoch": 0.04, "learning_rate": 4.991143143266732e-05, "loss": 3.4369, "step": 46500 }, { "epoch": 0.04, "learning_rate": 4.991047877517207e-05, "loss": 3.384, "step": 47000 }, { "epoch": 0.04, "learning_rate": 4.990952611767683e-05, "loss": 3.3818, "step": 47500 }, { "epoch": 0.04, "learning_rate": 4.990857346018159e-05, "loss": 3.3393, "step": 48000 }, { "epoch": 0.04, "learning_rate": 4.9907620802686345e-05, "loss": 3.3585, "step": 48500 }, { "epoch": 0.04, "learning_rate": 4.9906670050506094e-05, "loss": 3.3456, "step": 49000 }, { "epoch": 0.04, "learning_rate": 4.9905717393010845e-05, "loss": 3.4045, "step": 49500 }, { "epoch": 0.04, "learning_rate": 4.99047666408306e-05, "loss": 3.3669, "step": 50000 }, { "epoch": 0.04, "learning_rate": 4.990381398333536e-05, "loss": 3.3354, "step": 50500 }, { "epoch": 0.04, "learning_rate": 4.990286132584011e-05, "loss": 3.3635, "step": 51000 }, { "epoch": 0.04, "learning_rate": 4.990190866834487e-05, "loss": 3.3211, "step": 51500 }, { "epoch": 0.04, "learning_rate": 4.9900956010849626e-05, "loss": 3.3472, "step": 52000 }, { "epoch": 0.04, "learning_rate": 4.9900003353354385e-05, "loss": 3.3663, "step": 52500 }, { "epoch": 0.04, "learning_rate": 4.989905260117413e-05, "loss": 3.3315, "step": 53000 }, { "epoch": 0.04, "learning_rate": 4.989809994367889e-05, "loss": 3.3305, "step": 53500 }, { "epoch": 0.04, "learning_rate": 4.989714728618365e-05, "loss": 3.3257, "step": 54000 }, { "epoch": 0.04, "learning_rate": 4.989619462868841e-05, "loss": 3.2911, "step": 54500 }, { "epoch": 0.04, "learning_rate": 4.989524387650815e-05, "loss": 3.3134, "step": 55000 }, { "epoch": 0.04, "learning_rate": 4.9894291219012914e-05, "loss": 3.3043, "step": 55500 }, { "epoch": 0.04, "learning_rate": 4.989333856151767e-05, "loss": 3.291, "step": 56000 }, { "epoch": 0.04, "learning_rate": 4.9892385904022424e-05, "loss": 3.3177, "step": 56500 }, { "epoch": 0.04, "learning_rate": 4.989143324652719e-05, "loss": 3.3043, "step": 57000 }, { "epoch": 0.04, "learning_rate": 4.989048058903194e-05, "loss": 3.2928, "step": 57500 }, { "epoch": 0.04, "learning_rate": 4.98895279315367e-05, "loss": 3.2675, "step": 58000 }, { "epoch": 0.04, "learning_rate": 4.988857527404146e-05, "loss": 3.2713, "step": 58500 }, { "epoch": 0.04, "learning_rate": 4.9887624521861206e-05, "loss": 3.2884, "step": 59000 }, { "epoch": 0.05, "learning_rate": 4.9886671864365964e-05, "loss": 3.304, "step": 59500 }, { "epoch": 0.05, "learning_rate": 4.9885719206870716e-05, "loss": 3.2648, "step": 60000 }, { "epoch": 0.05, "eval_accuracy": 0.4417787159886874, "eval_loss": 3.2018163204193115, "eval_runtime": 9409.4577, "eval_samples_per_second": 29.225, "eval_steps_per_second": 7.306, "step": 60000 }, { "epoch": 0.05, "learning_rate": 4.988476654937548e-05, "loss": 3.2698, "step": 60500 }, { "epoch": 0.05, "learning_rate": 4.988381389188024e-05, "loss": 3.2763, "step": 61000 }, { "epoch": 0.05, "learning_rate": 4.988286123438499e-05, "loss": 3.2728, "step": 61500 }, { "epoch": 0.05, "learning_rate": 4.988190857688975e-05, "loss": 3.259, "step": 62000 }, { "epoch": 0.05, "learning_rate": 4.9880957824709504e-05, "loss": 3.256, "step": 62500 }, { "epoch": 0.05, "learning_rate": 4.9880005167214255e-05, "loss": 3.2937, "step": 63000 }, { "epoch": 0.05, "learning_rate": 4.9879052509719014e-05, "loss": 3.2598, "step": 63500 }, { "epoch": 0.05, "learning_rate": 4.987809985222377e-05, "loss": 3.2054, "step": 64000 }, { "epoch": 0.05, "learning_rate": 4.987714719472853e-05, "loss": 3.2119, "step": 64500 }, { "epoch": 0.05, "learning_rate": 4.987619453723329e-05, "loss": 3.2475, "step": 65000 }, { "epoch": 0.05, "learning_rate": 4.987524187973805e-05, "loss": 3.2348, "step": 65500 }, { "epoch": 0.05, "learning_rate": 4.9874291127557795e-05, "loss": 3.2456, "step": 66000 }, { "epoch": 0.05, "learning_rate": 4.987333847006255e-05, "loss": 3.2274, "step": 66500 }, { "epoch": 0.05, "learning_rate": 4.9872385812567305e-05, "loss": 3.2269, "step": 67000 }, { "epoch": 0.05, "learning_rate": 4.987143315507207e-05, "loss": 3.2275, "step": 67500 }, { "epoch": 0.05, "learning_rate": 4.987048049757682e-05, "loss": 3.2366, "step": 68000 }, { "epoch": 0.05, "learning_rate": 4.986952784008158e-05, "loss": 3.2054, "step": 68500 }, { "epoch": 0.05, "learning_rate": 4.986857708790133e-05, "loss": 3.2311, "step": 69000 }, { "epoch": 0.05, "learning_rate": 4.9867624430406086e-05, "loss": 3.1811, "step": 69500 }, { "epoch": 0.05, "learning_rate": 4.9866671772910844e-05, "loss": 3.1802, "step": 70000 }, { "epoch": 0.05, "learning_rate": 4.9865719115415596e-05, "loss": 3.2048, "step": 70500 }, { "epoch": 0.05, "learning_rate": 4.986476645792036e-05, "loss": 3.2102, "step": 71000 }, { "epoch": 0.05, "learning_rate": 4.986381380042512e-05, "loss": 3.1934, "step": 71500 }, { "epoch": 0.05, "learning_rate": 4.986286114292987e-05, "loss": 3.1902, "step": 72000 }, { "epoch": 0.06, "learning_rate": 4.9861908485434636e-05, "loss": 3.1633, "step": 72500 }, { "epoch": 0.06, "learning_rate": 4.986095582793939e-05, "loss": 3.1777, "step": 73000 }, { "epoch": 0.06, "learning_rate": 4.9860003170444146e-05, "loss": 3.1979, "step": 73500 }, { "epoch": 0.06, "learning_rate": 4.9859052418263894e-05, "loss": 3.2044, "step": 74000 }, { "epoch": 0.06, "learning_rate": 4.985809976076865e-05, "loss": 3.1902, "step": 74500 }, { "epoch": 0.06, "learning_rate": 4.985714710327341e-05, "loss": 3.1951, "step": 75000 }, { "epoch": 0.06, "learning_rate": 4.985619635109316e-05, "loss": 3.1676, "step": 75500 }, { "epoch": 0.06, "learning_rate": 4.985524369359792e-05, "loss": 3.1914, "step": 76000 }, { "epoch": 0.06, "learning_rate": 4.9854291036102675e-05, "loss": 3.1693, "step": 76500 }, { "epoch": 0.06, "learning_rate": 4.985333837860743e-05, "loss": 3.1765, "step": 77000 }, { "epoch": 0.06, "learning_rate": 4.9852385721112185e-05, "loss": 3.1891, "step": 77500 }, { "epoch": 0.06, "learning_rate": 4.985143306361695e-05, "loss": 3.183, "step": 78000 }, { "epoch": 0.06, "learning_rate": 4.98504804061217e-05, "loss": 3.1294, "step": 78500 }, { "epoch": 0.06, "learning_rate": 4.984952774862646e-05, "loss": 3.1808, "step": 79000 }, { "epoch": 0.06, "learning_rate": 4.98485789017612e-05, "loss": 3.1302, "step": 79500 }, { "epoch": 0.06, "learning_rate": 4.984762624426596e-05, "loss": 3.1417, "step": 80000 }, { "epoch": 0.06, "learning_rate": 4.9846675492085705e-05, "loss": 3.1261, "step": 80500 }, { "epoch": 0.06, "learning_rate": 4.984572283459046e-05, "loss": 3.1386, "step": 81000 }, { "epoch": 0.06, "learning_rate": 4.9844770177095215e-05, "loss": 3.1012, "step": 81500 }, { "epoch": 0.06, "learning_rate": 4.984381751959998e-05, "loss": 3.1128, "step": 82000 }, { "epoch": 0.06, "learning_rate": 4.984286486210474e-05, "loss": 3.1119, "step": 82500 }, { "epoch": 0.06, "learning_rate": 4.984191220460949e-05, "loss": 3.1521, "step": 83000 }, { "epoch": 0.06, "learning_rate": 4.9840959547114255e-05, "loss": 3.1592, "step": 83500 }, { "epoch": 0.06, "learning_rate": 4.9840006889619006e-05, "loss": 3.1215, "step": 84000 }, { "epoch": 0.06, "learning_rate": 4.9839054232123764e-05, "loss": 3.1312, "step": 84500 }, { "epoch": 0.06, "learning_rate": 4.983810157462852e-05, "loss": 3.1187, "step": 85000 }, { "epoch": 0.07, "learning_rate": 4.983715082244827e-05, "loss": 3.1045, "step": 85500 }, { "epoch": 0.07, "learning_rate": 4.983619816495303e-05, "loss": 3.1085, "step": 86000 }, { "epoch": 0.07, "learning_rate": 4.983524550745778e-05, "loss": 3.1197, "step": 86500 }, { "epoch": 0.07, "learning_rate": 4.9834292849962546e-05, "loss": 3.1308, "step": 87000 }, { "epoch": 0.07, "learning_rate": 4.9833342097782294e-05, "loss": 3.0803, "step": 87500 }, { "epoch": 0.07, "learning_rate": 4.9832389440287046e-05, "loss": 3.1056, "step": 88000 }, { "epoch": 0.07, "learning_rate": 4.9831436782791804e-05, "loss": 3.0895, "step": 88500 }, { "epoch": 0.07, "learning_rate": 4.983048412529657e-05, "loss": 3.0868, "step": 89000 }, { "epoch": 0.07, "learning_rate": 4.982953146780132e-05, "loss": 3.1079, "step": 89500 }, { "epoch": 0.07, "learning_rate": 4.982858071562107e-05, "loss": 3.0978, "step": 90000 }, { "epoch": 0.07, "eval_accuracy": 0.4609307256219239, "eval_loss": 3.0301129817962646, "eval_runtime": 9409.0489, "eval_samples_per_second": 29.226, "eval_steps_per_second": 7.306, "step": 90000 }, { "epoch": 0.07, "learning_rate": 4.982762996344082e-05, "loss": 3.1018, "step": 90500 }, { "epoch": 0.07, "learning_rate": 4.9826677305945575e-05, "loss": 3.1006, "step": 91000 }, { "epoch": 0.07, "learning_rate": 4.9825724648450334e-05, "loss": 3.0716, "step": 91500 }, { "epoch": 0.07, "learning_rate": 4.982477199095509e-05, "loss": 3.0794, "step": 92000 }, { "epoch": 0.07, "learning_rate": 4.982381933345985e-05, "loss": 3.1029, "step": 92500 }, { "epoch": 0.07, "learning_rate": 4.982286667596461e-05, "loss": 3.088, "step": 93000 }, { "epoch": 0.07, "learning_rate": 4.982191401846936e-05, "loss": 3.0811, "step": 93500 }, { "epoch": 0.07, "learning_rate": 4.9820961360974125e-05, "loss": 3.0673, "step": 94000 }, { "epoch": 0.07, "learning_rate": 4.982000870347888e-05, "loss": 3.045, "step": 94500 }, { "epoch": 0.07, "learning_rate": 4.9819057951298625e-05, "loss": 3.063, "step": 95000 }, { "epoch": 0.07, "learning_rate": 4.981810529380338e-05, "loss": 3.0866, "step": 95500 }, { "epoch": 0.07, "learning_rate": 4.981715263630814e-05, "loss": 3.067, "step": 96000 }, { "epoch": 0.07, "learning_rate": 4.98161999788129e-05, "loss": 3.057, "step": 96500 }, { "epoch": 0.07, "learning_rate": 4.981524732131766e-05, "loss": 3.0727, "step": 97000 }, { "epoch": 0.07, "learning_rate": 4.9814296569137406e-05, "loss": 3.0557, "step": 97500 }, { "epoch": 0.07, "learning_rate": 4.9813343911642165e-05, "loss": 3.0411, "step": 98000 }, { "epoch": 0.08, "learning_rate": 4.981239125414692e-05, "loss": 3.0588, "step": 98500 }, { "epoch": 0.08, "learning_rate": 4.9811438596651674e-05, "loss": 3.0624, "step": 99000 }, { "epoch": 0.08, "learning_rate": 4.981048593915644e-05, "loss": 3.0663, "step": 99500 }, { "epoch": 0.08, "learning_rate": 4.980953328166119e-05, "loss": 3.0606, "step": 100000 }, { "epoch": 0.08, "learning_rate": 4.980858062416595e-05, "loss": 3.0231, "step": 100500 }, { "epoch": 0.08, "learning_rate": 4.98076298719857e-05, "loss": 3.0685, "step": 101000 }, { "epoch": 0.08, "learning_rate": 4.9806677214490456e-05, "loss": 3.0324, "step": 101500 }, { "epoch": 0.08, "learning_rate": 4.9805724556995214e-05, "loss": 3.0673, "step": 102000 }, { "epoch": 0.08, "learning_rate": 4.9804771899499966e-05, "loss": 3.0388, "step": 102500 }, { "epoch": 0.08, "learning_rate": 4.980381924200473e-05, "loss": 3.0676, "step": 103000 }, { "epoch": 0.08, "learning_rate": 4.980286848982448e-05, "loss": 3.0401, "step": 103500 }, { "epoch": 0.08, "learning_rate": 4.980191583232923e-05, "loss": 3.0467, "step": 104000 }, { "epoch": 0.08, "learning_rate": 4.980096317483399e-05, "loss": 3.0953, "step": 104500 }, { "epoch": 0.08, "learning_rate": 4.9800010517338754e-05, "loss": 3.0276, "step": 105000 }, { "epoch": 0.08, "learning_rate": 4.9799057859843505e-05, "loss": 3.0124, "step": 105500 }, { "epoch": 0.08, "learning_rate": 4.9798105202348264e-05, "loss": 3.0765, "step": 106000 }, { "epoch": 0.08, "learning_rate": 4.979715445016802e-05, "loss": 3.0445, "step": 106500 }, { "epoch": 0.08, "learning_rate": 4.979620179267277e-05, "loss": 3.0133, "step": 107000 }, { "epoch": 0.08, "learning_rate": 4.979524913517753e-05, "loss": 3.0313, "step": 107500 }, { "epoch": 0.08, "learning_rate": 4.979429647768229e-05, "loss": 3.0353, "step": 108000 }, { "epoch": 0.08, "learning_rate": 4.9793343820187045e-05, "loss": 3.0559, "step": 108500 }, { "epoch": 0.08, "learning_rate": 4.9792393068006793e-05, "loss": 3.0513, "step": 109000 }, { "epoch": 0.08, "learning_rate": 4.9791440410511545e-05, "loss": 3.0325, "step": 109500 }, { "epoch": 0.08, "learning_rate": 4.979048775301631e-05, "loss": 2.9789, "step": 110000 }, { "epoch": 0.08, "learning_rate": 4.978953509552106e-05, "loss": 3.0123, "step": 110500 }, { "epoch": 0.08, "learning_rate": 4.978858243802582e-05, "loss": 3.0376, "step": 111000 }, { "epoch": 0.08, "learning_rate": 4.978763168584557e-05, "loss": 2.9944, "step": 111500 }, { "epoch": 0.09, "learning_rate": 4.9786679028350326e-05, "loss": 3.0379, "step": 112000 }, { "epoch": 0.09, "learning_rate": 4.9785726370855085e-05, "loss": 3.037, "step": 112500 }, { "epoch": 0.09, "learning_rate": 4.978477371335984e-05, "loss": 3.033, "step": 113000 }, { "epoch": 0.09, "learning_rate": 4.978382296117959e-05, "loss": 3.0064, "step": 113500 }, { "epoch": 0.09, "learning_rate": 4.978287030368435e-05, "loss": 3.0156, "step": 114000 }, { "epoch": 0.09, "learning_rate": 4.978191764618911e-05, "loss": 2.982, "step": 114500 }, { "epoch": 0.09, "learning_rate": 4.978096498869386e-05, "loss": 2.9832, "step": 115000 }, { "epoch": 0.09, "learning_rate": 4.9780012331198624e-05, "loss": 2.9951, "step": 115500 }, { "epoch": 0.09, "learning_rate": 4.9779059673703376e-05, "loss": 3.0045, "step": 116000 }, { "epoch": 0.09, "learning_rate": 4.9778107016208134e-05, "loss": 3.0056, "step": 116500 }, { "epoch": 0.09, "learning_rate": 4.977715626402788e-05, "loss": 3.0038, "step": 117000 }, { "epoch": 0.09, "learning_rate": 4.977620360653264e-05, "loss": 3.0, "step": 117500 }, { "epoch": 0.09, "learning_rate": 4.97752509490374e-05, "loss": 2.9905, "step": 118000 }, { "epoch": 0.09, "learning_rate": 4.977429829154216e-05, "loss": 2.9814, "step": 118500 }, { "epoch": 0.09, "learning_rate": 4.9773345634046916e-05, "loss": 3.015, "step": 119000 }, { "epoch": 0.09, "learning_rate": 4.9772392976551674e-05, "loss": 2.9816, "step": 119500 }, { "epoch": 0.09, "learning_rate": 4.9771440319056425e-05, "loss": 2.9834, "step": 120000 }, { "epoch": 0.09, "eval_accuracy": 0.4731336645831918, "eval_loss": 2.9267640113830566, "eval_runtime": 9410.96, "eval_samples_per_second": 29.22, "eval_steps_per_second": 7.305, "step": 120000 }, { "epoch": 0.09, "learning_rate": 4.977048766156119e-05, "loss": 3.017, "step": 120500 }, { "epoch": 0.09, "learning_rate": 4.976953690938094e-05, "loss": 2.9735, "step": 121000 }, { "epoch": 0.09, "learning_rate": 4.976858425188569e-05, "loss": 2.9824, "step": 121500 }, { "epoch": 0.09, "learning_rate": 4.976763349970544e-05, "loss": 3.014, "step": 122000 }, { "epoch": 0.09, "learning_rate": 4.9766680842210204e-05, "loss": 2.986, "step": 122500 }, { "epoch": 0.09, "learning_rate": 4.9765730090029945e-05, "loss": 2.9826, "step": 123000 }, { "epoch": 0.09, "learning_rate": 4.97647774325347e-05, "loss": 2.9521, "step": 123500 }, { "epoch": 0.09, "learning_rate": 4.976382477503946e-05, "loss": 2.9768, "step": 124000 }, { "epoch": 0.09, "learning_rate": 4.976287211754422e-05, "loss": 2.9776, "step": 124500 }, { "epoch": 0.1, "learning_rate": 4.976191946004898e-05, "loss": 2.9986, "step": 125000 }, { "epoch": 0.1, "learning_rate": 4.976096680255373e-05, "loss": 3.0055, "step": 125500 }, { "epoch": 0.1, "learning_rate": 4.9760014145058495e-05, "loss": 2.9619, "step": 126000 }, { "epoch": 0.1, "learning_rate": 4.9759061487563246e-05, "loss": 2.9778, "step": 126500 }, { "epoch": 0.1, "learning_rate": 4.9758108830068005e-05, "loss": 2.9979, "step": 127000 }, { "epoch": 0.1, "learning_rate": 4.975715617257277e-05, "loss": 2.9515, "step": 127500 }, { "epoch": 0.1, "learning_rate": 4.975620542039251e-05, "loss": 2.9911, "step": 128000 }, { "epoch": 0.1, "learning_rate": 4.975525276289727e-05, "loss": 2.999, "step": 128500 }, { "epoch": 0.1, "learning_rate": 4.975430010540203e-05, "loss": 2.9594, "step": 129000 }, { "epoch": 0.1, "learning_rate": 4.9753347447906786e-05, "loss": 2.9552, "step": 129500 }, { "epoch": 0.1, "learning_rate": 4.9752394790411544e-05, "loss": 2.9616, "step": 130000 }, { "epoch": 0.1, "learning_rate": 4.9751442132916296e-05, "loss": 2.9647, "step": 130500 }, { "epoch": 0.1, "learning_rate": 4.975048947542106e-05, "loss": 2.9816, "step": 131000 }, { "epoch": 0.1, "learning_rate": 4.974953681792582e-05, "loss": 2.9796, "step": 131500 }, { "epoch": 0.1, "learning_rate": 4.974858416043057e-05, "loss": 2.9673, "step": 132000 }, { "epoch": 0.1, "learning_rate": 4.974763340825032e-05, "loss": 2.956, "step": 132500 }, { "epoch": 0.1, "learning_rate": 4.9746680750755084e-05, "loss": 2.9462, "step": 133000 }, { "epoch": 0.1, "learning_rate": 4.9745728093259836e-05, "loss": 2.952, "step": 133500 }, { "epoch": 0.1, "learning_rate": 4.9744775435764594e-05, "loss": 3.0012, "step": 134000 }, { "epoch": 0.1, "learning_rate": 4.974382277826935e-05, "loss": 2.9776, "step": 134500 }, { "epoch": 0.1, "learning_rate": 4.974287012077411e-05, "loss": 2.9598, "step": 135000 }, { "epoch": 0.1, "learning_rate": 4.974191936859386e-05, "loss": 2.9493, "step": 135500 }, { "epoch": 0.1, "learning_rate": 4.974096671109861e-05, "loss": 2.9561, "step": 136000 }, { "epoch": 0.1, "learning_rate": 4.9740014053603375e-05, "loss": 2.9514, "step": 136500 }, { "epoch": 0.1, "learning_rate": 4.973906139610813e-05, "loss": 2.9681, "step": 137000 }, { "epoch": 0.1, "learning_rate": 4.9738112549242865e-05, "loss": 2.9495, "step": 137500 }, { "epoch": 0.11, "learning_rate": 4.973716179706261e-05, "loss": 2.9459, "step": 138000 }, { "epoch": 0.11, "learning_rate": 4.973620913956737e-05, "loss": 2.9566, "step": 138500 }, { "epoch": 0.11, "learning_rate": 4.973525648207213e-05, "loss": 2.9313, "step": 139000 }, { "epoch": 0.11, "learning_rate": 4.973430382457689e-05, "loss": 2.9254, "step": 139500 }, { "epoch": 0.11, "learning_rate": 4.9733351167081647e-05, "loss": 2.9166, "step": 140000 }, { "epoch": 0.11, "learning_rate": 4.9732398509586405e-05, "loss": 2.9289, "step": 140500 }, { "epoch": 0.11, "learning_rate": 4.973144585209116e-05, "loss": 2.9558, "step": 141000 }, { "epoch": 0.11, "learning_rate": 4.9730493194595915e-05, "loss": 2.9195, "step": 141500 }, { "epoch": 0.11, "learning_rate": 4.972954053710068e-05, "loss": 2.9223, "step": 142000 }, { "epoch": 0.11, "learning_rate": 4.972858978492043e-05, "loss": 2.9649, "step": 142500 }, { "epoch": 0.11, "learning_rate": 4.972763712742518e-05, "loss": 2.9333, "step": 143000 }, { "epoch": 0.11, "learning_rate": 4.972668446992994e-05, "loss": 2.9285, "step": 143500 }, { "epoch": 0.11, "learning_rate": 4.97257318124347e-05, "loss": 2.9219, "step": 144000 }, { "epoch": 0.11, "learning_rate": 4.9724779154939454e-05, "loss": 2.953, "step": 144500 }, { "epoch": 0.11, "learning_rate": 4.972382649744421e-05, "loss": 2.9132, "step": 145000 }, { "epoch": 0.11, "learning_rate": 4.972287383994897e-05, "loss": 2.9079, "step": 145500 }, { "epoch": 0.11, "learning_rate": 4.972192118245373e-05, "loss": 2.9099, "step": 146000 }, { "epoch": 0.11, "learning_rate": 4.972097043027348e-05, "loss": 2.9591, "step": 146500 }, { "epoch": 0.11, "learning_rate": 4.9720017772778236e-05, "loss": 2.9362, "step": 147000 }, { "epoch": 0.11, "learning_rate": 4.9719067020597984e-05, "loss": 2.8915, "step": 147500 }, { "epoch": 0.11, "learning_rate": 4.971811436310274e-05, "loss": 2.9103, "step": 148000 }, { "epoch": 0.11, "learning_rate": 4.9717161705607494e-05, "loss": 2.9463, "step": 148500 }, { "epoch": 0.11, "learning_rate": 4.971620904811226e-05, "loss": 2.9158, "step": 149000 }, { "epoch": 0.11, "learning_rate": 4.971525639061701e-05, "loss": 2.9283, "step": 149500 }, { "epoch": 0.11, "learning_rate": 4.971430563843676e-05, "loss": 2.9484, "step": 150000 }, { "epoch": 0.11, "eval_accuracy": 0.4822463949408844, "eval_loss": 2.8511881828308105, "eval_runtime": 9418.6976, "eval_samples_per_second": 29.196, "eval_steps_per_second": 7.299, "step": 150000 }, { "epoch": 0.11, "learning_rate": 4.971335298094152e-05, "loss": 2.9239, "step": 150500 }, { "epoch": 0.12, "learning_rate": 4.9712400323446275e-05, "loss": 2.9036, "step": 151000 }, { "epoch": 0.12, "learning_rate": 4.9711447665951034e-05, "loss": 2.915, "step": 151500 }, { "epoch": 0.12, "learning_rate": 4.971049500845579e-05, "loss": 2.9037, "step": 152000 }, { "epoch": 0.12, "learning_rate": 4.970954235096055e-05, "loss": 2.9162, "step": 152500 }, { "epoch": 0.12, "learning_rate": 4.970858969346531e-05, "loss": 2.9086, "step": 153000 }, { "epoch": 0.12, "learning_rate": 4.970763703597006e-05, "loss": 2.9218, "step": 153500 }, { "epoch": 0.12, "learning_rate": 4.970668628378981e-05, "loss": 2.904, "step": 154000 }, { "epoch": 0.12, "learning_rate": 4.9705735531609556e-05, "loss": 2.915, "step": 154500 }, { "epoch": 0.12, "learning_rate": 4.9704782874114315e-05, "loss": 2.9285, "step": 155000 }, { "epoch": 0.12, "learning_rate": 4.970383021661907e-05, "loss": 2.9272, "step": 155500 }, { "epoch": 0.12, "learning_rate": 4.970287755912383e-05, "loss": 2.9167, "step": 156000 }, { "epoch": 0.12, "learning_rate": 4.970192490162859e-05, "loss": 2.9075, "step": 156500 }, { "epoch": 0.12, "learning_rate": 4.970097224413335e-05, "loss": 2.9193, "step": 157000 }, { "epoch": 0.12, "learning_rate": 4.9700021491953096e-05, "loss": 2.9223, "step": 157500 }, { "epoch": 0.12, "learning_rate": 4.9699068834457854e-05, "loss": 2.9181, "step": 158000 }, { "epoch": 0.12, "learning_rate": 4.969811617696261e-05, "loss": 2.9265, "step": 158500 }, { "epoch": 0.12, "learning_rate": 4.9697163519467364e-05, "loss": 2.9026, "step": 159000 }, { "epoch": 0.12, "learning_rate": 4.969621086197212e-05, "loss": 2.8882, "step": 159500 }, { "epoch": 0.12, "learning_rate": 4.969525820447689e-05, "loss": 2.9127, "step": 160000 }, { "epoch": 0.12, "learning_rate": 4.969430554698164e-05, "loss": 2.9058, "step": 160500 }, { "epoch": 0.12, "learning_rate": 4.96933528894864e-05, "loss": 2.8779, "step": 161000 }, { "epoch": 0.12, "learning_rate": 4.9692400231991156e-05, "loss": 2.9242, "step": 161500 }, { "epoch": 0.12, "learning_rate": 4.9691447574495914e-05, "loss": 2.894, "step": 162000 }, { "epoch": 0.12, "learning_rate": 4.969049682231566e-05, "loss": 2.8995, "step": 162500 }, { "epoch": 0.12, "learning_rate": 4.968954416482042e-05, "loss": 2.904, "step": 163000 }, { "epoch": 0.12, "learning_rate": 4.968859150732518e-05, "loss": 2.8794, "step": 163500 }, { "epoch": 0.12, "learning_rate": 4.968763884982993e-05, "loss": 2.9044, "step": 164000 }, { "epoch": 0.13, "learning_rate": 4.968668619233469e-05, "loss": 2.8428, "step": 164500 }, { "epoch": 0.13, "learning_rate": 4.9685733534839454e-05, "loss": 2.8494, "step": 165000 }, { "epoch": 0.13, "learning_rate": 4.9684782782659195e-05, "loss": 2.9191, "step": 165500 }, { "epoch": 0.13, "learning_rate": 4.9683830125163954e-05, "loss": 2.8787, "step": 166000 }, { "epoch": 0.13, "learning_rate": 4.968287746766871e-05, "loss": 2.9042, "step": 166500 }, { "epoch": 0.13, "learning_rate": 4.968192481017347e-05, "loss": 2.9144, "step": 167000 }, { "epoch": 0.13, "learning_rate": 4.968097405799322e-05, "loss": 2.8736, "step": 167500 }, { "epoch": 0.13, "learning_rate": 4.968002140049798e-05, "loss": 2.876, "step": 168000 }, { "epoch": 0.13, "learning_rate": 4.9679068743002735e-05, "loss": 2.8678, "step": 168500 }, { "epoch": 0.13, "learning_rate": 4.967811799082248e-05, "loss": 2.8589, "step": 169000 }, { "epoch": 0.13, "learning_rate": 4.967716533332724e-05, "loss": 2.9275, "step": 169500 }, { "epoch": 0.13, "learning_rate": 4.967621267583199e-05, "loss": 2.8589, "step": 170000 }, { "epoch": 0.13, "learning_rate": 4.967526001833676e-05, "loss": 2.8833, "step": 170500 }, { "epoch": 0.13, "learning_rate": 4.967430736084151e-05, "loss": 2.8853, "step": 171000 }, { "epoch": 0.13, "learning_rate": 4.967335470334627e-05, "loss": 2.8735, "step": 171500 }, { "epoch": 0.13, "learning_rate": 4.9672402045851026e-05, "loss": 2.8955, "step": 172000 }, { "epoch": 0.13, "learning_rate": 4.9671451293670774e-05, "loss": 2.8867, "step": 172500 }, { "epoch": 0.13, "learning_rate": 4.967049863617553e-05, "loss": 2.8953, "step": 173000 }, { "epoch": 0.13, "learning_rate": 4.9669545978680284e-05, "loss": 2.8616, "step": 173500 }, { "epoch": 0.13, "learning_rate": 4.966859332118505e-05, "loss": 2.863, "step": 174000 }, { "epoch": 0.13, "learning_rate": 4.966764066368981e-05, "loss": 2.858, "step": 174500 }, { "epoch": 0.13, "learning_rate": 4.966668800619456e-05, "loss": 2.8498, "step": 175000 }, { "epoch": 0.13, "learning_rate": 4.9665735348699324e-05, "loss": 2.8584, "step": 175500 }, { "epoch": 0.13, "learning_rate": 4.9664782691204076e-05, "loss": 2.9082, "step": 176000 }, { "epoch": 0.13, "learning_rate": 4.9663830033708834e-05, "loss": 2.8263, "step": 176500 }, { "epoch": 0.13, "learning_rate": 4.966287737621359e-05, "loss": 2.8803, "step": 177000 }, { "epoch": 0.14, "learning_rate": 4.966192662403334e-05, "loss": 2.8448, "step": 177500 }, { "epoch": 0.14, "learning_rate": 4.96609739665381e-05, "loss": 2.8578, "step": 178000 }, { "epoch": 0.14, "learning_rate": 4.966002130904285e-05, "loss": 2.8743, "step": 178500 }, { "epoch": 0.14, "learning_rate": 4.9659068651547615e-05, "loss": 2.8525, "step": 179000 }, { "epoch": 0.14, "learning_rate": 4.9658115994052374e-05, "loss": 2.8555, "step": 179500 }, { "epoch": 0.14, "learning_rate": 4.9657165241872115e-05, "loss": 2.8477, "step": 180000 }, { "epoch": 0.14, "eval_accuracy": 0.4890978909293843, "eval_loss": 2.795215129852295, "eval_runtime": 9432.8544, "eval_samples_per_second": 29.152, "eval_steps_per_second": 7.288, "step": 180000 }, { "epoch": 0.14, "learning_rate": 4.9656212584376874e-05, "loss": 2.8582, "step": 180500 }, { "epoch": 0.14, "learning_rate": 4.965526183219663e-05, "loss": 2.8457, "step": 181000 }, { "epoch": 0.14, "learning_rate": 4.965430917470138e-05, "loss": 2.8208, "step": 181500 }, { "epoch": 0.14, "learning_rate": 4.965335651720614e-05, "loss": 2.8652, "step": 182000 }, { "epoch": 0.14, "learning_rate": 4.96524038597109e-05, "loss": 2.8593, "step": 182500 }, { "epoch": 0.14, "learning_rate": 4.9651451202215655e-05, "loss": 2.8295, "step": 183000 }, { "epoch": 0.14, "learning_rate": 4.965049854472041e-05, "loss": 2.86, "step": 183500 }, { "epoch": 0.14, "learning_rate": 4.964954588722517e-05, "loss": 2.8383, "step": 184000 }, { "epoch": 0.14, "learning_rate": 4.964859322972993e-05, "loss": 2.8651, "step": 184500 }, { "epoch": 0.14, "learning_rate": 4.964764438286467e-05, "loss": 2.8398, "step": 185000 }, { "epoch": 0.14, "learning_rate": 4.9646691725369426e-05, "loss": 2.8199, "step": 185500 }, { "epoch": 0.14, "learning_rate": 4.964573906787418e-05, "loss": 2.8475, "step": 186000 }, { "epoch": 0.14, "learning_rate": 4.964478641037894e-05, "loss": 2.8669, "step": 186500 }, { "epoch": 0.14, "learning_rate": 4.9643833752883694e-05, "loss": 2.7931, "step": 187000 }, { "epoch": 0.14, "learning_rate": 4.964288300070344e-05, "loss": 2.8364, "step": 187500 }, { "epoch": 0.14, "learning_rate": 4.96419303432082e-05, "loss": 2.8269, "step": 188000 }, { "epoch": 0.14, "learning_rate": 4.964097768571296e-05, "loss": 2.8357, "step": 188500 }, { "epoch": 0.14, "learning_rate": 4.964002502821772e-05, "loss": 2.836, "step": 189000 }, { "epoch": 0.14, "learning_rate": 4.9639072370722476e-05, "loss": 2.8448, "step": 189500 }, { "epoch": 0.14, "learning_rate": 4.9638119713227234e-05, "loss": 2.8249, "step": 190000 }, { "epoch": 0.15, "learning_rate": 4.963716705573199e-05, "loss": 2.8253, "step": 190500 }, { "epoch": 0.15, "learning_rate": 4.9636214398236744e-05, "loss": 2.8479, "step": 191000 }, { "epoch": 0.15, "learning_rate": 4.963526174074151e-05, "loss": 2.8745, "step": 191500 }, { "epoch": 0.15, "learning_rate": 4.963431098856126e-05, "loss": 2.8462, "step": 192000 }, { "epoch": 0.15, "learning_rate": 4.963335833106601e-05, "loss": 2.8358, "step": 192500 }, { "epoch": 0.15, "learning_rate": 4.963240757888576e-05, "loss": 2.8586, "step": 193000 }, { "epoch": 0.15, "learning_rate": 4.963145492139052e-05, "loss": 2.8598, "step": 193500 }, { "epoch": 0.15, "learning_rate": 4.9630502263895274e-05, "loss": 2.8436, "step": 194000 }, { "epoch": 0.15, "learning_rate": 4.962954960640003e-05, "loss": 2.8385, "step": 194500 }, { "epoch": 0.15, "learning_rate": 4.962859694890479e-05, "loss": 2.8045, "step": 195000 }, { "epoch": 0.15, "learning_rate": 4.962764429140955e-05, "loss": 2.8111, "step": 195500 }, { "epoch": 0.15, "learning_rate": 4.96266916339143e-05, "loss": 2.8257, "step": 196000 }, { "epoch": 0.15, "learning_rate": 4.962573897641906e-05, "loss": 2.87, "step": 196500 }, { "epoch": 0.15, "learning_rate": 4.9624788224238813e-05, "loss": 2.8326, "step": 197000 }, { "epoch": 0.15, "learning_rate": 4.9623835566743565e-05, "loss": 2.8528, "step": 197500 }, { "epoch": 0.15, "learning_rate": 4.962288290924832e-05, "loss": 2.8219, "step": 198000 }, { "epoch": 0.15, "learning_rate": 4.962193025175308e-05, "loss": 2.843, "step": 198500 }, { "epoch": 0.15, "learning_rate": 4.962097759425784e-05, "loss": 2.8355, "step": 199000 }, { "epoch": 0.15, "learning_rate": 4.962002684207759e-05, "loss": 2.851, "step": 199500 }, { "epoch": 0.15, "learning_rate": 4.9619074184582346e-05, "loss": 2.7945, "step": 200000 }, { "epoch": 0.15, "learning_rate": 4.9618121527087105e-05, "loss": 2.8149, "step": 200500 }, { "epoch": 0.15, "learning_rate": 4.961716886959186e-05, "loss": 2.8337, "step": 201000 }, { "epoch": 0.15, "learning_rate": 4.961621811741161e-05, "loss": 2.853, "step": 201500 }, { "epoch": 0.15, "learning_rate": 4.961526736523135e-05, "loss": 2.8083, "step": 202000 }, { "epoch": 0.15, "learning_rate": 4.961431470773612e-05, "loss": 2.8415, "step": 202500 }, { "epoch": 0.15, "learning_rate": 4.9613363955555866e-05, "loss": 2.8454, "step": 203000 }, { "epoch": 0.16, "learning_rate": 4.961241129806062e-05, "loss": 2.8115, "step": 203500 }, { "epoch": 0.16, "learning_rate": 4.9611458640565376e-05, "loss": 2.8149, "step": 204000 }, { "epoch": 0.16, "learning_rate": 4.961050598307014e-05, "loss": 2.817, "step": 204500 }, { "epoch": 0.16, "learning_rate": 4.960955332557489e-05, "loss": 2.8427, "step": 205000 }, { "epoch": 0.16, "learning_rate": 4.960860066807965e-05, "loss": 2.8041, "step": 205500 }, { "epoch": 0.16, "learning_rate": 4.960764801058441e-05, "loss": 2.8263, "step": 206000 }, { "epoch": 0.16, "learning_rate": 4.960669535308917e-05, "loss": 2.8128, "step": 206500 }, { "epoch": 0.16, "learning_rate": 4.960574269559392e-05, "loss": 2.8204, "step": 207000 }, { "epoch": 0.16, "learning_rate": 4.9604790038098684e-05, "loss": 2.7701, "step": 207500 }, { "epoch": 0.16, "learning_rate": 4.960383738060344e-05, "loss": 2.8026, "step": 208000 }, { "epoch": 0.16, "learning_rate": 4.9602884723108194e-05, "loss": 2.8336, "step": 208500 }, { "epoch": 0.16, "learning_rate": 4.960193206561295e-05, "loss": 2.8053, "step": 209000 }, { "epoch": 0.16, "learning_rate": 4.960097940811771e-05, "loss": 2.8329, "step": 209500 }, { "epoch": 0.16, "learning_rate": 4.960002675062247e-05, "loss": 2.8102, "step": 210000 }, { "epoch": 0.16, "eval_accuracy": 0.49527599043120424, "eval_loss": 2.747093677520752, "eval_runtime": 9430.05, "eval_samples_per_second": 29.161, "eval_steps_per_second": 7.29, "step": 210000 }, { "epoch": 0.16, "learning_rate": 4.959907599844222e-05, "loss": 2.7952, "step": 210500 }, { "epoch": 0.16, "learning_rate": 4.9598123340946975e-05, "loss": 2.8051, "step": 211000 }, { "epoch": 0.16, "learning_rate": 4.9597170683451733e-05, "loss": 2.832, "step": 211500 }, { "epoch": 0.16, "learning_rate": 4.9596218025956485e-05, "loss": 2.7877, "step": 212000 }, { "epoch": 0.16, "learning_rate": 4.959526727377623e-05, "loss": 2.8293, "step": 212500 }, { "epoch": 0.16, "learning_rate": 4.9594314616281e-05, "loss": 2.8296, "step": 213000 }, { "epoch": 0.16, "learning_rate": 4.959336195878575e-05, "loss": 2.8155, "step": 213500 }, { "epoch": 0.16, "learning_rate": 4.959240930129051e-05, "loss": 2.8317, "step": 214000 }, { "epoch": 0.16, "learning_rate": 4.9591458549110256e-05, "loss": 2.8036, "step": 214500 }, { "epoch": 0.16, "learning_rate": 4.9590505891615015e-05, "loss": 2.8087, "step": 215000 }, { "epoch": 0.16, "learning_rate": 4.958955323411977e-05, "loss": 2.8455, "step": 215500 }, { "epoch": 0.16, "learning_rate": 4.958860057662453e-05, "loss": 2.8149, "step": 216000 }, { "epoch": 0.17, "learning_rate": 4.958764791912929e-05, "loss": 2.7848, "step": 216500 }, { "epoch": 0.17, "learning_rate": 4.958669526163405e-05, "loss": 2.8028, "step": 217000 }, { "epoch": 0.17, "learning_rate": 4.9585744509453796e-05, "loss": 2.8419, "step": 217500 }, { "epoch": 0.17, "learning_rate": 4.958479185195855e-05, "loss": 2.8478, "step": 218000 }, { "epoch": 0.17, "learning_rate": 4.95838410997783e-05, "loss": 2.7895, "step": 218500 }, { "epoch": 0.17, "learning_rate": 4.958288844228306e-05, "loss": 2.7878, "step": 219000 }, { "epoch": 0.17, "learning_rate": 4.958193578478781e-05, "loss": 2.8194, "step": 219500 }, { "epoch": 0.17, "learning_rate": 4.958098312729257e-05, "loss": 2.7735, "step": 220000 }, { "epoch": 0.17, "learning_rate": 4.958003046979733e-05, "loss": 2.8111, "step": 220500 }, { "epoch": 0.17, "learning_rate": 4.957907971761708e-05, "loss": 2.8361, "step": 221000 }, { "epoch": 0.17, "learning_rate": 4.9578127060121836e-05, "loss": 2.7916, "step": 221500 }, { "epoch": 0.17, "learning_rate": 4.9577174402626594e-05, "loss": 2.7754, "step": 222000 }, { "epoch": 0.17, "learning_rate": 4.957622174513135e-05, "loss": 2.7829, "step": 222500 }, { "epoch": 0.17, "learning_rate": 4.9575269087636104e-05, "loss": 2.8057, "step": 223000 }, { "epoch": 0.17, "learning_rate": 4.957431643014087e-05, "loss": 2.8106, "step": 223500 }, { "epoch": 0.17, "learning_rate": 4.957336377264563e-05, "loss": 2.8135, "step": 224000 }, { "epoch": 0.17, "learning_rate": 4.957241111515038e-05, "loss": 2.7848, "step": 224500 }, { "epoch": 0.17, "learning_rate": 4.957146036297013e-05, "loss": 2.8022, "step": 225000 }, { "epoch": 0.17, "learning_rate": 4.957050770547489e-05, "loss": 2.7634, "step": 225500 }, { "epoch": 0.17, "learning_rate": 4.956955504797964e-05, "loss": 2.8107, "step": 226000 }, { "epoch": 0.17, "learning_rate": 4.95686023904844e-05, "loss": 2.7913, "step": 226500 }, { "epoch": 0.17, "learning_rate": 4.956765163830415e-05, "loss": 2.7764, "step": 227000 }, { "epoch": 0.17, "learning_rate": 4.956669898080891e-05, "loss": 2.7623, "step": 227500 }, { "epoch": 0.17, "learning_rate": 4.9565748228628657e-05, "loss": 2.7985, "step": 228000 }, { "epoch": 0.17, "learning_rate": 4.9564795571133415e-05, "loss": 2.7821, "step": 228500 }, { "epoch": 0.17, "learning_rate": 4.956384291363817e-05, "loss": 2.7838, "step": 229000 }, { "epoch": 0.17, "learning_rate": 4.956289025614293e-05, "loss": 2.7913, "step": 229500 }, { "epoch": 0.18, "learning_rate": 4.956193759864768e-05, "loss": 2.7662, "step": 230000 }, { "epoch": 0.18, "learning_rate": 4.956098494115244e-05, "loss": 2.804, "step": 230500 }, { "epoch": 0.18, "learning_rate": 4.95600322836572e-05, "loss": 2.796, "step": 231000 }, { "epoch": 0.18, "learning_rate": 4.955907962616196e-05, "loss": 2.8101, "step": 231500 }, { "epoch": 0.18, "learning_rate": 4.9558126968666716e-05, "loss": 2.8038, "step": 232000 }, { "epoch": 0.18, "learning_rate": 4.9557176216486464e-05, "loss": 2.7837, "step": 232500 }, { "epoch": 0.18, "learning_rate": 4.955622355899122e-05, "loss": 2.8052, "step": 233000 }, { "epoch": 0.18, "learning_rate": 4.955527090149598e-05, "loss": 2.7893, "step": 233500 }, { "epoch": 0.18, "learning_rate": 4.955431824400074e-05, "loss": 2.751, "step": 234000 }, { "epoch": 0.18, "learning_rate": 4.95533655865055e-05, "loss": 2.7621, "step": 234500 }, { "epoch": 0.18, "learning_rate": 4.955241292901025e-05, "loss": 2.7687, "step": 235000 }, { "epoch": 0.18, "learning_rate": 4.955146027151501e-05, "loss": 2.7471, "step": 235500 }, { "epoch": 0.18, "learning_rate": 4.955050761401977e-05, "loss": 2.7743, "step": 236000 }, { "epoch": 0.18, "learning_rate": 4.9549556861839514e-05, "loss": 2.8044, "step": 236500 }, { "epoch": 0.18, "learning_rate": 4.954860420434427e-05, "loss": 2.7905, "step": 237000 }, { "epoch": 0.18, "learning_rate": 4.954765154684903e-05, "loss": 2.7859, "step": 237500 }, { "epoch": 0.18, "learning_rate": 4.954670079466878e-05, "loss": 2.7853, "step": 238000 }, { "epoch": 0.18, "learning_rate": 4.954574813717354e-05, "loss": 2.7774, "step": 238500 }, { "epoch": 0.18, "learning_rate": 4.9544795479678295e-05, "loss": 2.7867, "step": 239000 }, { "epoch": 0.18, "learning_rate": 4.9543842822183054e-05, "loss": 2.7452, "step": 239500 }, { "epoch": 0.18, "learning_rate": 4.95428920700028e-05, "loss": 2.7967, "step": 240000 }, { "epoch": 0.18, "eval_accuracy": 0.4996462228988974, "eval_loss": 2.710792303085327, "eval_runtime": 9435.8846, "eval_samples_per_second": 29.143, "eval_steps_per_second": 7.286, "step": 240000 }, { "epoch": 0.18, "learning_rate": 4.954193941250755e-05, "loss": 2.7987, "step": 240500 }, { "epoch": 0.18, "learning_rate": 4.954098675501231e-05, "loss": 2.7928, "step": 241000 }, { "epoch": 0.18, "learning_rate": 4.954003600283206e-05, "loss": 2.8072, "step": 241500 }, { "epoch": 0.18, "learning_rate": 4.953908334533682e-05, "loss": 2.783, "step": 242000 }, { "epoch": 0.18, "learning_rate": 4.9538130687841576e-05, "loss": 2.7966, "step": 242500 }, { "epoch": 0.19, "learning_rate": 4.9537178030346335e-05, "loss": 2.7721, "step": 243000 }, { "epoch": 0.19, "learning_rate": 4.953622537285109e-05, "loss": 2.785, "step": 243500 }, { "epoch": 0.19, "learning_rate": 4.953527271535585e-05, "loss": 2.7477, "step": 244000 }, { "epoch": 0.19, "learning_rate": 4.95343200578606e-05, "loss": 2.7637, "step": 244500 }, { "epoch": 0.19, "learning_rate": 4.953336740036537e-05, "loss": 2.7794, "step": 245000 }, { "epoch": 0.19, "learning_rate": 4.9532416648185116e-05, "loss": 2.7604, "step": 245500 }, { "epoch": 0.19, "learning_rate": 4.953146399068987e-05, "loss": 2.7663, "step": 246000 }, { "epoch": 0.19, "learning_rate": 4.9530511333194626e-05, "loss": 2.7636, "step": 246500 }, { "epoch": 0.19, "learning_rate": 4.952956058101438e-05, "loss": 2.7356, "step": 247000 }, { "epoch": 0.19, "learning_rate": 4.952860792351913e-05, "loss": 2.7926, "step": 247500 }, { "epoch": 0.19, "learning_rate": 4.952765526602389e-05, "loss": 2.7601, "step": 248000 }, { "epoch": 0.19, "learning_rate": 4.952670260852865e-05, "loss": 2.7536, "step": 248500 }, { "epoch": 0.19, "learning_rate": 4.952574995103341e-05, "loss": 2.7557, "step": 249000 }, { "epoch": 0.19, "learning_rate": 4.9524797293538166e-05, "loss": 2.7436, "step": 249500 }, { "epoch": 0.19, "learning_rate": 4.9523844636042924e-05, "loss": 2.7933, "step": 250000 }, { "epoch": 0.19, "learning_rate": 4.952289197854768e-05, "loss": 2.7836, "step": 250500 }, { "epoch": 0.19, "learning_rate": 4.9521939321052434e-05, "loss": 2.7916, "step": 251000 }, { "epoch": 0.19, "learning_rate": 4.952098666355719e-05, "loss": 2.784, "step": 251500 }, { "epoch": 0.19, "learning_rate": 4.952003591137695e-05, "loss": 2.8058, "step": 252000 }, { "epoch": 0.19, "learning_rate": 4.95190832538817e-05, "loss": 2.7708, "step": 252500 }, { "epoch": 0.19, "learning_rate": 4.951813059638646e-05, "loss": 2.7649, "step": 253000 }, { "epoch": 0.19, "learning_rate": 4.9517177938891215e-05, "loss": 2.7618, "step": 253500 }, { "epoch": 0.19, "learning_rate": 4.9516227186710964e-05, "loss": 2.7666, "step": 254000 }, { "epoch": 0.19, "learning_rate": 4.951527452921572e-05, "loss": 2.7531, "step": 254500 }, { "epoch": 0.19, "learning_rate": 4.951432187172048e-05, "loss": 2.7349, "step": 255000 }, { "epoch": 0.19, "learning_rate": 4.951336921422524e-05, "loss": 2.7424, "step": 255500 }, { "epoch": 0.2, "learning_rate": 4.951241655673e-05, "loss": 2.7557, "step": 256000 }, { "epoch": 0.2, "learning_rate": 4.951146389923475e-05, "loss": 2.7773, "step": 256500 }, { "epoch": 0.2, "learning_rate": 4.951051124173951e-05, "loss": 2.7801, "step": 257000 }, { "epoch": 0.2, "learning_rate": 4.9509558584244265e-05, "loss": 2.7726, "step": 257500 }, { "epoch": 0.2, "learning_rate": 4.950860783206401e-05, "loss": 2.7549, "step": 258000 }, { "epoch": 0.2, "learning_rate": 4.950765517456877e-05, "loss": 2.751, "step": 258500 }, { "epoch": 0.2, "learning_rate": 4.950670251707353e-05, "loss": 2.7645, "step": 259000 }, { "epoch": 0.2, "learning_rate": 4.950574985957829e-05, "loss": 2.7528, "step": 259500 }, { "epoch": 0.2, "learning_rate": 4.9504797202083046e-05, "loss": 2.7187, "step": 260000 }, { "epoch": 0.2, "learning_rate": 4.950384644990279e-05, "loss": 2.7688, "step": 260500 }, { "epoch": 0.2, "learning_rate": 4.950289379240755e-05, "loss": 2.7486, "step": 261000 }, { "epoch": 0.2, "learning_rate": 4.950194113491231e-05, "loss": 2.7852, "step": 261500 }, { "epoch": 0.2, "learning_rate": 4.950098847741706e-05, "loss": 2.78, "step": 262000 }, { "epoch": 0.2, "learning_rate": 4.950003581992183e-05, "loss": 2.7747, "step": 262500 }, { "epoch": 0.2, "learning_rate": 4.949908316242658e-05, "loss": 2.7546, "step": 263000 }, { "epoch": 0.2, "learning_rate": 4.949813241024633e-05, "loss": 2.7687, "step": 263500 }, { "epoch": 0.2, "learning_rate": 4.9497179752751086e-05, "loss": 2.7588, "step": 264000 }, { "epoch": 0.2, "learning_rate": 4.9496227095255844e-05, "loss": 2.7483, "step": 264500 }, { "epoch": 0.2, "learning_rate": 4.94952744377606e-05, "loss": 2.7599, "step": 265000 }, { "epoch": 0.2, "learning_rate": 4.949432368558035e-05, "loss": 2.73, "step": 265500 }, { "epoch": 0.2, "learning_rate": 4.949337102808511e-05, "loss": 2.7373, "step": 266000 }, { "epoch": 0.2, "learning_rate": 4.949241837058987e-05, "loss": 2.7502, "step": 266500 }, { "epoch": 0.2, "learning_rate": 4.949146571309462e-05, "loss": 2.7494, "step": 267000 }, { "epoch": 0.2, "learning_rate": 4.949051305559938e-05, "loss": 2.7689, "step": 267500 }, { "epoch": 0.2, "learning_rate": 4.948956230341913e-05, "loss": 2.7502, "step": 268000 }, { "epoch": 0.2, "learning_rate": 4.9488609645923884e-05, "loss": 2.7764, "step": 268500 }, { "epoch": 0.21, "learning_rate": 4.948765698842864e-05, "loss": 2.7502, "step": 269000 }, { "epoch": 0.21, "learning_rate": 4.94867043309334e-05, "loss": 2.7163, "step": 269500 }, { "epoch": 0.21, "learning_rate": 4.948575167343816e-05, "loss": 2.7153, "step": 270000 }, { "epoch": 0.21, "eval_accuracy": 0.5035932644127394, "eval_loss": 2.677302837371826, "eval_runtime": 9440.3567, "eval_samples_per_second": 29.129, "eval_steps_per_second": 7.282, "step": 270000 }, { "epoch": 0.21, "learning_rate": 4.948480092125791e-05, "loss": 2.7439, "step": 270500 }, { "epoch": 0.21, "learning_rate": 4.9483848263762665e-05, "loss": 2.7588, "step": 271000 }, { "epoch": 0.21, "learning_rate": 4.948289560626742e-05, "loss": 2.7715, "step": 271500 }, { "epoch": 0.21, "learning_rate": 4.948194294877218e-05, "loss": 2.7281, "step": 272000 }, { "epoch": 0.21, "learning_rate": 4.948099219659193e-05, "loss": 2.7371, "step": 272500 }, { "epoch": 0.21, "learning_rate": 4.948003953909668e-05, "loss": 2.7099, "step": 273000 }, { "epoch": 0.21, "learning_rate": 4.9479086881601446e-05, "loss": 2.7504, "step": 273500 }, { "epoch": 0.21, "learning_rate": 4.9478136129421195e-05, "loss": 2.7276, "step": 274000 }, { "epoch": 0.21, "learning_rate": 4.9477183471925946e-05, "loss": 2.7398, "step": 274500 }, { "epoch": 0.21, "learning_rate": 4.9476230814430704e-05, "loss": 2.7525, "step": 275000 }, { "epoch": 0.21, "learning_rate": 4.947527815693546e-05, "loss": 2.7582, "step": 275500 }, { "epoch": 0.21, "learning_rate": 4.947432549944022e-05, "loss": 2.7391, "step": 276000 }, { "epoch": 0.21, "learning_rate": 4.947337284194498e-05, "loss": 2.7516, "step": 276500 }, { "epoch": 0.21, "learning_rate": 4.947242018444974e-05, "loss": 2.7765, "step": 277000 }, { "epoch": 0.21, "learning_rate": 4.9471467526954496e-05, "loss": 2.7253, "step": 277500 }, { "epoch": 0.21, "learning_rate": 4.947051486945925e-05, "loss": 2.764, "step": 278000 }, { "epoch": 0.21, "learning_rate": 4.946956221196401e-05, "loss": 2.7455, "step": 278500 }, { "epoch": 0.21, "learning_rate": 4.9468609554468764e-05, "loss": 2.7432, "step": 279000 }, { "epoch": 0.21, "learning_rate": 4.946765689697352e-05, "loss": 2.7165, "step": 279500 }, { "epoch": 0.21, "learning_rate": 4.946670614479327e-05, "loss": 2.7309, "step": 280000 }, { "epoch": 0.21, "learning_rate": 4.946575348729803e-05, "loss": 2.7511, "step": 280500 }, { "epoch": 0.21, "learning_rate": 4.946480082980279e-05, "loss": 2.7209, "step": 281000 }, { "epoch": 0.21, "learning_rate": 4.946384817230754e-05, "loss": 2.7342, "step": 281500 }, { "epoch": 0.21, "learning_rate": 4.9462895514812304e-05, "loss": 2.7181, "step": 282000 }, { "epoch": 0.22, "learning_rate": 4.946194476263205e-05, "loss": 2.7304, "step": 282500 }, { "epoch": 0.22, "learning_rate": 4.9460992105136804e-05, "loss": 2.7433, "step": 283000 }, { "epoch": 0.22, "learning_rate": 4.946003944764156e-05, "loss": 2.7141, "step": 283500 }, { "epoch": 0.22, "learning_rate": 4.945908679014633e-05, "loss": 2.7364, "step": 284000 }, { "epoch": 0.22, "learning_rate": 4.945813413265108e-05, "loss": 2.7202, "step": 284500 }, { "epoch": 0.22, "learning_rate": 4.945718147515584e-05, "loss": 2.7397, "step": 285000 }, { "epoch": 0.22, "learning_rate": 4.9456228817660595e-05, "loss": 2.7349, "step": 285500 }, { "epoch": 0.22, "learning_rate": 4.945527616016535e-05, "loss": 2.7261, "step": 286000 }, { "epoch": 0.22, "learning_rate": 4.94543254079851e-05, "loss": 2.7468, "step": 286500 }, { "epoch": 0.22, "learning_rate": 4.945337275048986e-05, "loss": 2.741, "step": 287000 }, { "epoch": 0.22, "learning_rate": 4.945242009299462e-05, "loss": 2.7476, "step": 287500 }, { "epoch": 0.22, "learning_rate": 4.945146743549937e-05, "loss": 2.764, "step": 288000 }, { "epoch": 0.22, "learning_rate": 4.945051668331912e-05, "loss": 2.7357, "step": 288500 }, { "epoch": 0.22, "learning_rate": 4.944956402582388e-05, "loss": 2.7472, "step": 289000 }, { "epoch": 0.22, "learning_rate": 4.9448611368328634e-05, "loss": 2.7483, "step": 289500 }, { "epoch": 0.22, "learning_rate": 4.944765871083339e-05, "loss": 2.7366, "step": 290000 }, { "epoch": 0.22, "learning_rate": 4.944670605333815e-05, "loss": 2.7329, "step": 290500 }, { "epoch": 0.22, "learning_rate": 4.944575339584291e-05, "loss": 2.6905, "step": 291000 }, { "epoch": 0.22, "learning_rate": 4.944480073834767e-05, "loss": 2.71, "step": 291500 }, { "epoch": 0.22, "learning_rate": 4.9443848080852426e-05, "loss": 2.7157, "step": 292000 }, { "epoch": 0.22, "learning_rate": 4.9442895423357184e-05, "loss": 2.7285, "step": 292500 }, { "epoch": 0.22, "learning_rate": 4.944194467117693e-05, "loss": 2.7128, "step": 293000 }, { "epoch": 0.22, "learning_rate": 4.944099391899668e-05, "loss": 2.7213, "step": 293500 }, { "epoch": 0.22, "learning_rate": 4.944004126150143e-05, "loss": 2.7189, "step": 294000 }, { "epoch": 0.22, "learning_rate": 4.94390886040062e-05, "loss": 2.7038, "step": 294500 }, { "epoch": 0.22, "learning_rate": 4.943813594651095e-05, "loss": 2.7175, "step": 295000 }, { "epoch": 0.23, "learning_rate": 4.943718328901571e-05, "loss": 2.7151, "step": 295500 }, { "epoch": 0.23, "learning_rate": 4.943623063152047e-05, "loss": 2.7271, "step": 296000 }, { "epoch": 0.23, "learning_rate": 4.9435277974025224e-05, "loss": 2.7508, "step": 296500 }, { "epoch": 0.23, "learning_rate": 4.943432531652998e-05, "loss": 2.6948, "step": 297000 }, { "epoch": 0.23, "learning_rate": 4.943337265903474e-05, "loss": 2.7273, "step": 297500 }, { "epoch": 0.23, "learning_rate": 4.94324200015395e-05, "loss": 2.728, "step": 298000 }, { "epoch": 0.23, "learning_rate": 4.943147115467424e-05, "loss": 2.76, "step": 298500 }, { "epoch": 0.23, "learning_rate": 4.943051849717899e-05, "loss": 2.7177, "step": 299000 }, { "epoch": 0.23, "learning_rate": 4.9429565839683753e-05, "loss": 2.7213, "step": 299500 }, { "epoch": 0.23, "learning_rate": 4.942861318218851e-05, "loss": 2.7472, "step": 300000 }, { "epoch": 0.23, "eval_accuracy": 0.5076805733110031, "eval_loss": 2.6502606868743896, "eval_runtime": 9429.9896, "eval_samples_per_second": 29.161, "eval_steps_per_second": 7.29, "step": 300000 }, { "epoch": 0.23, "learning_rate": 4.942766243000825e-05, "loss": 2.7177, "step": 300500 }, { "epoch": 0.23, "learning_rate": 4.942670977251301e-05, "loss": 2.6843, "step": 301000 }, { "epoch": 0.23, "learning_rate": 4.9425757115017777e-05, "loss": 2.7062, "step": 301500 }, { "epoch": 0.23, "learning_rate": 4.942480445752253e-05, "loss": 2.7284, "step": 302000 }, { "epoch": 0.23, "learning_rate": 4.9423851800027286e-05, "loss": 2.7322, "step": 302500 }, { "epoch": 0.23, "learning_rate": 4.9422901047847035e-05, "loss": 2.7356, "step": 303000 }, { "epoch": 0.23, "learning_rate": 4.942194839035179e-05, "loss": 2.7371, "step": 303500 }, { "epoch": 0.23, "learning_rate": 4.942099573285655e-05, "loss": 2.7274, "step": 304000 }, { "epoch": 0.23, "learning_rate": 4.94200430753613e-05, "loss": 2.7091, "step": 304500 }, { "epoch": 0.23, "learning_rate": 4.941909041786607e-05, "loss": 2.7325, "step": 305000 }, { "epoch": 0.23, "learning_rate": 4.9418137760370826e-05, "loss": 2.6878, "step": 305500 }, { "epoch": 0.23, "learning_rate": 4.941718510287558e-05, "loss": 2.7247, "step": 306000 }, { "epoch": 0.23, "learning_rate": 4.941623244538034e-05, "loss": 2.7304, "step": 306500 }, { "epoch": 0.23, "learning_rate": 4.9415279787885094e-05, "loss": 2.7058, "step": 307000 }, { "epoch": 0.23, "learning_rate": 4.941433094101983e-05, "loss": 2.7205, "step": 307500 }, { "epoch": 0.23, "learning_rate": 4.941337828352459e-05, "loss": 2.6802, "step": 308000 }, { "epoch": 0.24, "learning_rate": 4.941242562602935e-05, "loss": 2.7081, "step": 308500 }, { "epoch": 0.24, "learning_rate": 4.941147296853411e-05, "loss": 2.7157, "step": 309000 }, { "epoch": 0.24, "learning_rate": 4.9410520311038866e-05, "loss": 2.7099, "step": 309500 }, { "epoch": 0.24, "learning_rate": 4.940956765354362e-05, "loss": 2.7206, "step": 310000 }, { "epoch": 0.24, "learning_rate": 4.940861690136337e-05, "loss": 2.7085, "step": 310500 }, { "epoch": 0.24, "learning_rate": 4.940766424386813e-05, "loss": 2.7348, "step": 311000 }, { "epoch": 0.24, "learning_rate": 4.940671158637288e-05, "loss": 2.7232, "step": 311500 }, { "epoch": 0.24, "learning_rate": 4.940575892887764e-05, "loss": 2.7226, "step": 312000 }, { "epoch": 0.24, "learning_rate": 4.94048062713824e-05, "loss": 2.6962, "step": 312500 }, { "epoch": 0.24, "learning_rate": 4.940385551920215e-05, "loss": 2.7015, "step": 313000 }, { "epoch": 0.24, "learning_rate": 4.9402902861706905e-05, "loss": 2.7018, "step": 313500 }, { "epoch": 0.24, "learning_rate": 4.9401950204211663e-05, "loss": 2.672, "step": 314000 }, { "epoch": 0.24, "learning_rate": 4.940099754671642e-05, "loss": 2.7106, "step": 314500 }, { "epoch": 0.24, "learning_rate": 4.940004488922118e-05, "loss": 2.7232, "step": 315000 }, { "epoch": 0.24, "learning_rate": 4.939909223172594e-05, "loss": 2.7032, "step": 315500 }, { "epoch": 0.24, "learning_rate": 4.9398139574230697e-05, "loss": 2.6919, "step": 316000 }, { "epoch": 0.24, "learning_rate": 4.939718691673545e-05, "loss": 2.6878, "step": 316500 }, { "epoch": 0.24, "learning_rate": 4.9396234259240206e-05, "loss": 2.6981, "step": 317000 }, { "epoch": 0.24, "learning_rate": 4.939528350705996e-05, "loss": 2.7027, "step": 317500 }, { "epoch": 0.24, "learning_rate": 4.939433084956471e-05, "loss": 2.7368, "step": 318000 }, { "epoch": 0.24, "learning_rate": 4.939337819206947e-05, "loss": 2.6475, "step": 318500 }, { "epoch": 0.24, "learning_rate": 4.939242553457423e-05, "loss": 2.7051, "step": 319000 }, { "epoch": 0.24, "learning_rate": 4.939147478239398e-05, "loss": 2.7129, "step": 319500 }, { "epoch": 0.24, "learning_rate": 4.9390524030213726e-05, "loss": 2.7081, "step": 320000 }, { "epoch": 0.24, "learning_rate": 4.9389571372718484e-05, "loss": 2.6798, "step": 320500 }, { "epoch": 0.24, "learning_rate": 4.938861871522324e-05, "loss": 2.6816, "step": 321000 }, { "epoch": 0.25, "learning_rate": 4.9387666057728e-05, "loss": 2.7074, "step": 321500 }, { "epoch": 0.25, "learning_rate": 4.938671530554775e-05, "loss": 2.6997, "step": 322000 }, { "epoch": 0.25, "learning_rate": 4.93857626480525e-05, "loss": 2.6977, "step": 322500 }, { "epoch": 0.25, "learning_rate": 4.938481189587225e-05, "loss": 2.6978, "step": 323000 }, { "epoch": 0.25, "learning_rate": 4.9383859238377014e-05, "loss": 2.6974, "step": 323500 }, { "epoch": 0.25, "learning_rate": 4.9382906580881766e-05, "loss": 2.718, "step": 324000 }, { "epoch": 0.25, "learning_rate": 4.9381953923386524e-05, "loss": 2.6742, "step": 324500 }, { "epoch": 0.25, "learning_rate": 4.938100126589128e-05, "loss": 2.6692, "step": 325000 }, { "epoch": 0.25, "learning_rate": 4.938004860839604e-05, "loss": 2.7049, "step": 325500 }, { "epoch": 0.25, "learning_rate": 4.937909595090079e-05, "loss": 2.7122, "step": 326000 }, { "epoch": 0.25, "learning_rate": 4.937814329340556e-05, "loss": 2.7056, "step": 326500 }, { "epoch": 0.25, "learning_rate": 4.9377190635910315e-05, "loss": 2.6765, "step": 327000 }, { "epoch": 0.25, "learning_rate": 4.937623797841507e-05, "loss": 2.6906, "step": 327500 }, { "epoch": 0.25, "learning_rate": 4.937528532091983e-05, "loss": 2.6812, "step": 328000 }, { "epoch": 0.25, "learning_rate": 4.937433266342458e-05, "loss": 2.6699, "step": 328500 }, { "epoch": 0.25, "learning_rate": 4.937338191124433e-05, "loss": 2.6788, "step": 329000 }, { "epoch": 0.25, "learning_rate": 4.937242925374909e-05, "loss": 2.6995, "step": 329500 }, { "epoch": 0.25, "learning_rate": 4.937147659625385e-05, "loss": 2.6633, "step": 330000 }, { "epoch": 0.25, "eval_accuracy": 0.5107118901596502, "eval_loss": 2.6264419555664062, "eval_runtime": 9409.4565, "eval_samples_per_second": 29.225, "eval_steps_per_second": 7.306, "step": 330000 }, { "epoch": 0.25, "learning_rate": 4.9370523938758607e-05, "loss": 2.677, "step": 330500 }, { "epoch": 0.25, "learning_rate": 4.9369571281263365e-05, "loss": 2.6793, "step": 331000 }, { "epoch": 0.25, "learning_rate": 4.936861862376812e-05, "loss": 2.6978, "step": 331500 }, { "epoch": 0.25, "learning_rate": 4.936766596627288e-05, "loss": 2.7031, "step": 332000 }, { "epoch": 0.25, "learning_rate": 4.936671330877763e-05, "loss": 2.7086, "step": 332500 }, { "epoch": 0.25, "learning_rate": 4.936576065128239e-05, "loss": 2.6589, "step": 333000 }, { "epoch": 0.25, "learning_rate": 4.936480799378715e-05, "loss": 2.6933, "step": 333500 }, { "epoch": 0.25, "learning_rate": 4.936385533629191e-05, "loss": 2.6995, "step": 334000 }, { "epoch": 0.25, "learning_rate": 4.9362902678796666e-05, "loss": 2.6829, "step": 334500 }, { "epoch": 0.26, "learning_rate": 4.9361950021301424e-05, "loss": 2.6641, "step": 335000 }, { "epoch": 0.26, "learning_rate": 4.936099736380618e-05, "loss": 2.6928, "step": 335500 }, { "epoch": 0.26, "learning_rate": 4.936004661162593e-05, "loss": 2.6569, "step": 336000 }, { "epoch": 0.26, "learning_rate": 4.935909395413069e-05, "loss": 2.6996, "step": 336500 }, { "epoch": 0.26, "learning_rate": 4.935814129663545e-05, "loss": 2.6834, "step": 337000 }, { "epoch": 0.26, "learning_rate": 4.9357192449770186e-05, "loss": 2.6921, "step": 337500 }, { "epoch": 0.26, "learning_rate": 4.935623979227494e-05, "loss": 2.726, "step": 338000 }, { "epoch": 0.26, "learning_rate": 4.9355287134779696e-05, "loss": 2.6999, "step": 338500 }, { "epoch": 0.26, "learning_rate": 4.935433447728446e-05, "loss": 2.6603, "step": 339000 }, { "epoch": 0.26, "learning_rate": 4.935338181978921e-05, "loss": 2.7023, "step": 339500 }, { "epoch": 0.26, "learning_rate": 4.935242916229397e-05, "loss": 2.6995, "step": 340000 }, { "epoch": 0.26, "learning_rate": 4.935147650479873e-05, "loss": 2.7078, "step": 340500 }, { "epoch": 0.26, "learning_rate": 4.935052384730349e-05, "loss": 2.6793, "step": 341000 }, { "epoch": 0.26, "learning_rate": 4.934957118980824e-05, "loss": 2.6941, "step": 341500 }, { "epoch": 0.26, "learning_rate": 4.9348618532313004e-05, "loss": 2.6902, "step": 342000 }, { "epoch": 0.26, "learning_rate": 4.934766587481776e-05, "loss": 2.6861, "step": 342500 }, { "epoch": 0.26, "learning_rate": 4.9346713217322513e-05, "loss": 2.696, "step": 343000 }, { "epoch": 0.26, "learning_rate": 4.934576055982728e-05, "loss": 2.7084, "step": 343500 }, { "epoch": 0.26, "learning_rate": 4.934480980764703e-05, "loss": 2.7097, "step": 344000 }, { "epoch": 0.26, "learning_rate": 4.934385715015178e-05, "loss": 2.6744, "step": 344500 }, { "epoch": 0.26, "learning_rate": 4.9342906397971527e-05, "loss": 2.6656, "step": 345000 }, { "epoch": 0.26, "learning_rate": 4.9341955645791275e-05, "loss": 2.6596, "step": 345500 }, { "epoch": 0.26, "learning_rate": 4.934100298829603e-05, "loss": 2.6957, "step": 346000 }, { "epoch": 0.26, "learning_rate": 4.934005033080079e-05, "loss": 2.6929, "step": 346500 }, { "epoch": 0.26, "learning_rate": 4.933909767330555e-05, "loss": 2.6929, "step": 347000 }, { "epoch": 0.26, "learning_rate": 4.933814501581031e-05, "loss": 2.6926, "step": 347500 }, { "epoch": 0.27, "learning_rate": 4.9337192358315066e-05, "loss": 2.6827, "step": 348000 }, { "epoch": 0.27, "learning_rate": 4.933623970081982e-05, "loss": 2.696, "step": 348500 }, { "epoch": 0.27, "learning_rate": 4.933528704332458e-05, "loss": 2.656, "step": 349000 }, { "epoch": 0.27, "learning_rate": 4.9334334385829334e-05, "loss": 2.6867, "step": 349500 }, { "epoch": 0.27, "learning_rate": 4.933338363364908e-05, "loss": 2.6826, "step": 350000 }, { "epoch": 0.27, "learning_rate": 4.933243097615384e-05, "loss": 2.6991, "step": 350500 }, { "epoch": 0.27, "learning_rate": 4.93314783186586e-05, "loss": 2.6862, "step": 351000 }, { "epoch": 0.27, "learning_rate": 4.933052566116336e-05, "loss": 2.7032, "step": 351500 }, { "epoch": 0.27, "learning_rate": 4.9329574908983106e-05, "loss": 2.6707, "step": 352000 }, { "epoch": 0.27, "learning_rate": 4.9328624156802854e-05, "loss": 2.6744, "step": 352500 }, { "epoch": 0.27, "learning_rate": 4.932767149930761e-05, "loss": 2.6812, "step": 353000 }, { "epoch": 0.27, "learning_rate": 4.932671884181237e-05, "loss": 2.668, "step": 353500 }, { "epoch": 0.27, "learning_rate": 4.932576618431712e-05, "loss": 2.6646, "step": 354000 }, { "epoch": 0.27, "learning_rate": 4.932481352682188e-05, "loss": 2.6964, "step": 354500 }, { "epoch": 0.27, "learning_rate": 4.9323860869326645e-05, "loss": 2.6879, "step": 355000 }, { "epoch": 0.27, "learning_rate": 4.93229082118314e-05, "loss": 2.6811, "step": 355500 }, { "epoch": 0.27, "learning_rate": 4.9321955554336155e-05, "loss": 2.6522, "step": 356000 }, { "epoch": 0.27, "learning_rate": 4.9321002896840914e-05, "loss": 2.6448, "step": 356500 }, { "epoch": 0.27, "learning_rate": 4.932005023934567e-05, "loss": 2.6908, "step": 357000 }, { "epoch": 0.27, "learning_rate": 4.931909948716542e-05, "loss": 2.6609, "step": 357500 }, { "epoch": 0.27, "learning_rate": 4.931814682967018e-05, "loss": 2.6928, "step": 358000 }, { "epoch": 0.27, "learning_rate": 4.931719417217494e-05, "loss": 2.6738, "step": 358500 }, { "epoch": 0.27, "learning_rate": 4.931624151467969e-05, "loss": 2.6662, "step": 359000 }, { "epoch": 0.27, "learning_rate": 4.9315288857184447e-05, "loss": 2.6862, "step": 359500 }, { "epoch": 0.27, "learning_rate": 4.931433619968921e-05, "loss": 2.6845, "step": 360000 }, { "epoch": 0.27, "eval_accuracy": 0.5137789572575466, "eval_loss": 2.605391025543213, "eval_runtime": 9408.3309, "eval_samples_per_second": 29.228, "eval_steps_per_second": 7.307, "step": 360000 }, { "epoch": 0.27, "learning_rate": 4.931338354219396e-05, "loss": 2.6737, "step": 360500 }, { "epoch": 0.28, "learning_rate": 4.931243088469872e-05, "loss": 2.6591, "step": 361000 }, { "epoch": 0.28, "learning_rate": 4.931147822720348e-05, "loss": 2.6797, "step": 361500 }, { "epoch": 0.28, "learning_rate": 4.931052747502323e-05, "loss": 2.689, "step": 362000 }, { "epoch": 0.28, "learning_rate": 4.9309574817527986e-05, "loss": 2.6877, "step": 362500 }, { "epoch": 0.28, "learning_rate": 4.9308622160032745e-05, "loss": 2.677, "step": 363000 }, { "epoch": 0.28, "learning_rate": 4.93076695025375e-05, "loss": 2.6857, "step": 363500 }, { "epoch": 0.28, "learning_rate": 4.930671875035725e-05, "loss": 2.6652, "step": 364000 }, { "epoch": 0.28, "learning_rate": 4.9305767998177e-05, "loss": 2.6717, "step": 364500 }, { "epoch": 0.28, "learning_rate": 4.930481534068175e-05, "loss": 2.675, "step": 365000 }, { "epoch": 0.28, "learning_rate": 4.9303862683186516e-05, "loss": 2.6487, "step": 365500 }, { "epoch": 0.28, "learning_rate": 4.930291002569127e-05, "loss": 2.6674, "step": 366000 }, { "epoch": 0.28, "learning_rate": 4.9301959273511016e-05, "loss": 2.6587, "step": 366500 }, { "epoch": 0.28, "learning_rate": 4.9301006616015774e-05, "loss": 2.6535, "step": 367000 }, { "epoch": 0.28, "learning_rate": 4.930005395852053e-05, "loss": 2.6561, "step": 367500 }, { "epoch": 0.28, "learning_rate": 4.929910130102529e-05, "loss": 2.7063, "step": 368000 }, { "epoch": 0.28, "learning_rate": 4.929814864353004e-05, "loss": 2.6599, "step": 368500 }, { "epoch": 0.28, "learning_rate": 4.929719598603481e-05, "loss": 2.6673, "step": 369000 }, { "epoch": 0.28, "learning_rate": 4.9296245233854555e-05, "loss": 2.6625, "step": 369500 }, { "epoch": 0.28, "learning_rate": 4.929529257635931e-05, "loss": 2.6827, "step": 370000 }, { "epoch": 0.28, "learning_rate": 4.929433991886407e-05, "loss": 2.6537, "step": 370500 }, { "epoch": 0.28, "learning_rate": 4.929338726136883e-05, "loss": 2.6647, "step": 371000 }, { "epoch": 0.28, "learning_rate": 4.929243650918857e-05, "loss": 2.6479, "step": 371500 }, { "epoch": 0.28, "learning_rate": 4.929148575700832e-05, "loss": 2.6511, "step": 372000 }, { "epoch": 0.28, "learning_rate": 4.929053309951308e-05, "loss": 2.6551, "step": 372500 }, { "epoch": 0.28, "learning_rate": 4.928958044201784e-05, "loss": 2.6516, "step": 373000 }, { "epoch": 0.28, "learning_rate": 4.9288627784522595e-05, "loss": 2.6736, "step": 373500 }, { "epoch": 0.29, "learning_rate": 4.928767512702735e-05, "loss": 2.6321, "step": 374000 }, { "epoch": 0.29, "learning_rate": 4.928672246953211e-05, "loss": 2.6526, "step": 374500 }, { "epoch": 0.29, "learning_rate": 4.928576981203687e-05, "loss": 2.6663, "step": 375000 }, { "epoch": 0.29, "learning_rate": 4.928481715454162e-05, "loss": 2.6844, "step": 375500 }, { "epoch": 0.29, "learning_rate": 4.9283864497046386e-05, "loss": 2.6356, "step": 376000 }, { "epoch": 0.29, "learning_rate": 4.928291183955114e-05, "loss": 2.6523, "step": 376500 }, { "epoch": 0.29, "learning_rate": 4.9281961087370886e-05, "loss": 2.6419, "step": 377000 }, { "epoch": 0.29, "learning_rate": 4.9281008429875644e-05, "loss": 2.6547, "step": 377500 }, { "epoch": 0.29, "learning_rate": 4.92800557723804e-05, "loss": 2.6595, "step": 378000 }, { "epoch": 0.29, "learning_rate": 4.927910311488516e-05, "loss": 2.6661, "step": 378500 }, { "epoch": 0.29, "learning_rate": 4.927815045738992e-05, "loss": 2.6265, "step": 379000 }, { "epoch": 0.29, "learning_rate": 4.927719779989468e-05, "loss": 2.6637, "step": 379500 }, { "epoch": 0.29, "learning_rate": 4.9276245142399436e-05, "loss": 2.6869, "step": 380000 }, { "epoch": 0.29, "learning_rate": 4.927529248490419e-05, "loss": 2.6441, "step": 380500 }, { "epoch": 0.29, "learning_rate": 4.927433982740895e-05, "loss": 2.6789, "step": 381000 }, { "epoch": 0.29, "learning_rate": 4.92733890752287e-05, "loss": 2.6469, "step": 381500 }, { "epoch": 0.29, "learning_rate": 4.927243641773345e-05, "loss": 2.66, "step": 382000 }, { "epoch": 0.29, "learning_rate": 4.927148376023821e-05, "loss": 2.6563, "step": 382500 }, { "epoch": 0.29, "learning_rate": 4.9270531102742976e-05, "loss": 2.6577, "step": 383000 }, { "epoch": 0.29, "learning_rate": 4.926957844524773e-05, "loss": 2.6431, "step": 383500 }, { "epoch": 0.29, "learning_rate": 4.9268627693067475e-05, "loss": 2.6332, "step": 384000 }, { "epoch": 0.29, "learning_rate": 4.9267676940887224e-05, "loss": 2.6769, "step": 384500 }, { "epoch": 0.29, "learning_rate": 4.926672618870697e-05, "loss": 2.6478, "step": 385000 }, { "epoch": 0.29, "learning_rate": 4.926577353121173e-05, "loss": 2.6427, "step": 385500 }, { "epoch": 0.29, "learning_rate": 4.926482087371649e-05, "loss": 2.679, "step": 386000 }, { "epoch": 0.29, "learning_rate": 4.926386821622124e-05, "loss": 2.6152, "step": 386500 }, { "epoch": 0.29, "learning_rate": 4.9262915558726005e-05, "loss": 2.6526, "step": 387000 }, { "epoch": 0.3, "learning_rate": 4.926196290123076e-05, "loss": 2.6558, "step": 387500 }, { "epoch": 0.3, "learning_rate": 4.9261010243735515e-05, "loss": 2.614, "step": 388000 }, { "epoch": 0.3, "learning_rate": 4.926005758624028e-05, "loss": 2.6355, "step": 388500 }, { "epoch": 0.3, "learning_rate": 4.925910492874503e-05, "loss": 2.6684, "step": 389000 }, { "epoch": 0.3, "learning_rate": 4.925815227124979e-05, "loss": 2.6417, "step": 389500 }, { "epoch": 0.3, "learning_rate": 4.925720151906954e-05, "loss": 2.6661, "step": 390000 }, { "epoch": 0.3, "eval_accuracy": 0.5160672469262357, "eval_loss": 2.585765838623047, "eval_runtime": 9418.0501, "eval_samples_per_second": 29.198, "eval_steps_per_second": 7.299, "step": 390000 }, { "epoch": 0.3, "learning_rate": 4.9256248861574296e-05, "loss": 2.648, "step": 390500 }, { "epoch": 0.3, "learning_rate": 4.9255296204079055e-05, "loss": 2.6587, "step": 391000 }, { "epoch": 0.3, "learning_rate": 4.9254343546583806e-05, "loss": 2.6226, "step": 391500 }, { "epoch": 0.3, "learning_rate": 4.9253392794403554e-05, "loss": 2.6778, "step": 392000 }, { "epoch": 0.3, "learning_rate": 4.925244013690832e-05, "loss": 2.6372, "step": 392500 }, { "epoch": 0.3, "learning_rate": 4.925148747941307e-05, "loss": 2.6381, "step": 393000 }, { "epoch": 0.3, "learning_rate": 4.925053482191783e-05, "loss": 2.6418, "step": 393500 }, { "epoch": 0.3, "learning_rate": 4.9249584069737584e-05, "loss": 2.6367, "step": 394000 }, { "epoch": 0.3, "learning_rate": 4.9248631412242336e-05, "loss": 2.636, "step": 394500 }, { "epoch": 0.3, "learning_rate": 4.9247678754747094e-05, "loss": 2.6826, "step": 395000 }, { "epoch": 0.3, "learning_rate": 4.924672800256684e-05, "loss": 2.6592, "step": 395500 }, { "epoch": 0.3, "learning_rate": 4.92457753450716e-05, "loss": 2.6342, "step": 396000 }, { "epoch": 0.3, "learning_rate": 4.924482268757636e-05, "loss": 2.6693, "step": 396500 }, { "epoch": 0.3, "learning_rate": 4.924387003008111e-05, "loss": 2.6693, "step": 397000 }, { "epoch": 0.3, "learning_rate": 4.9242917372585876e-05, "loss": 2.622, "step": 397500 }, { "epoch": 0.3, "learning_rate": 4.9241964715090634e-05, "loss": 2.6396, "step": 398000 }, { "epoch": 0.3, "learning_rate": 4.9241012057595385e-05, "loss": 2.6291, "step": 398500 }, { "epoch": 0.3, "learning_rate": 4.9240059400100144e-05, "loss": 2.589, "step": 399000 }, { "epoch": 0.3, "learning_rate": 4.92391067426049e-05, "loss": 2.64, "step": 399500 }, { "epoch": 0.3, "learning_rate": 4.923815599042465e-05, "loss": 2.6394, "step": 400000 }, { "epoch": 0.31, "learning_rate": 4.923720333292941e-05, "loss": 2.6606, "step": 400500 }, { "epoch": 0.31, "learning_rate": 4.923625258074916e-05, "loss": 2.6635, "step": 401000 }, { "epoch": 0.31, "learning_rate": 4.9235299923253915e-05, "loss": 2.6722, "step": 401500 }, { "epoch": 0.31, "learning_rate": 4.9234347265758673e-05, "loss": 2.6528, "step": 402000 }, { "epoch": 0.31, "learning_rate": 4.9233394608263425e-05, "loss": 2.6387, "step": 402500 }, { "epoch": 0.31, "learning_rate": 4.923244385608318e-05, "loss": 2.6654, "step": 403000 }, { "epoch": 0.31, "learning_rate": 4.923149310390293e-05, "loss": 2.6465, "step": 403500 }, { "epoch": 0.31, "learning_rate": 4.9230540446407687e-05, "loss": 2.6379, "step": 404000 }, { "epoch": 0.31, "learning_rate": 4.922958778891244e-05, "loss": 2.6584, "step": 404500 }, { "epoch": 0.31, "learning_rate": 4.92286351314172e-05, "loss": 2.6359, "step": 405000 }, { "epoch": 0.31, "learning_rate": 4.9227682473921955e-05, "loss": 2.6, "step": 405500 }, { "epoch": 0.31, "learning_rate": 4.922672981642671e-05, "loss": 2.6522, "step": 406000 }, { "epoch": 0.31, "learning_rate": 4.922577715893147e-05, "loss": 2.6389, "step": 406500 }, { "epoch": 0.31, "learning_rate": 4.922482450143623e-05, "loss": 2.6525, "step": 407000 }, { "epoch": 0.31, "learning_rate": 4.922387184394099e-05, "loss": 2.6236, "step": 407500 }, { "epoch": 0.31, "learning_rate": 4.9222919186445746e-05, "loss": 2.6291, "step": 408000 }, { "epoch": 0.31, "learning_rate": 4.9221966528950504e-05, "loss": 2.6178, "step": 408500 }, { "epoch": 0.31, "learning_rate": 4.9221013871455256e-05, "loss": 2.6508, "step": 409000 }, { "epoch": 0.31, "learning_rate": 4.9220061213960014e-05, "loss": 2.6516, "step": 409500 }, { "epoch": 0.31, "learning_rate": 4.921910855646478e-05, "loss": 2.6492, "step": 410000 }, { "epoch": 0.31, "learning_rate": 4.921815780428452e-05, "loss": 2.6301, "step": 410500 }, { "epoch": 0.31, "learning_rate": 4.921720514678928e-05, "loss": 2.6367, "step": 411000 }, { "epoch": 0.31, "learning_rate": 4.921625248929404e-05, "loss": 2.6251, "step": 411500 }, { "epoch": 0.31, "learning_rate": 4.9215299831798796e-05, "loss": 2.6083, "step": 412000 }, { "epoch": 0.31, "learning_rate": 4.9214347174303554e-05, "loss": 2.639, "step": 412500 }, { "epoch": 0.31, "learning_rate": 4.921339451680831e-05, "loss": 2.6304, "step": 413000 }, { "epoch": 0.32, "learning_rate": 4.921244376462806e-05, "loss": 2.6551, "step": 413500 }, { "epoch": 0.32, "learning_rate": 4.921149110713282e-05, "loss": 2.6515, "step": 414000 }, { "epoch": 0.32, "learning_rate": 4.921053844963757e-05, "loss": 2.665, "step": 414500 }, { "epoch": 0.32, "learning_rate": 4.9209585792142335e-05, "loss": 2.6419, "step": 415000 }, { "epoch": 0.32, "learning_rate": 4.920863313464709e-05, "loss": 2.6537, "step": 415500 }, { "epoch": 0.32, "learning_rate": 4.9207680477151845e-05, "loss": 2.6293, "step": 416000 }, { "epoch": 0.32, "learning_rate": 4.9206727819656603e-05, "loss": 2.6426, "step": 416500 }, { "epoch": 0.32, "learning_rate": 4.920577706747635e-05, "loss": 2.638, "step": 417000 }, { "epoch": 0.32, "learning_rate": 4.92048263152961e-05, "loss": 2.6342, "step": 417500 }, { "epoch": 0.32, "learning_rate": 4.920387365780086e-05, "loss": 2.6245, "step": 418000 }, { "epoch": 0.32, "learning_rate": 4.920292100030561e-05, "loss": 2.6507, "step": 418500 }, { "epoch": 0.32, "learning_rate": 4.9201970248125365e-05, "loss": 2.639, "step": 419000 }, { "epoch": 0.32, "learning_rate": 4.920101759063012e-05, "loss": 2.5975, "step": 419500 }, { "epoch": 0.32, "learning_rate": 4.9200064933134875e-05, "loss": 2.6178, "step": 420000 }, { "epoch": 0.32, "eval_accuracy": 0.5182928021414346, "eval_loss": 2.5693817138671875, "eval_runtime": 9415.3679, "eval_samples_per_second": 29.206, "eval_steps_per_second": 7.302, "step": 420000 }, { "epoch": 0.32, "learning_rate": 4.919911227563963e-05, "loss": 2.662, "step": 420500 }, { "epoch": 0.32, "learning_rate": 4.919815961814439e-05, "loss": 2.6163, "step": 421000 }, { "epoch": 0.32, "learning_rate": 4.919720696064915e-05, "loss": 2.6503, "step": 421500 }, { "epoch": 0.32, "learning_rate": 4.919625430315391e-05, "loss": 2.6227, "step": 422000 }, { "epoch": 0.32, "learning_rate": 4.9195301645658666e-05, "loss": 2.6163, "step": 422500 }, { "epoch": 0.32, "learning_rate": 4.9194348988163424e-05, "loss": 2.6179, "step": 423000 }, { "epoch": 0.32, "learning_rate": 4.9193396330668176e-05, "loss": 2.6437, "step": 423500 }, { "epoch": 0.32, "learning_rate": 4.919244367317294e-05, "loss": 2.6422, "step": 424000 }, { "epoch": 0.32, "learning_rate": 4.91914910156777e-05, "loss": 2.6444, "step": 424500 }, { "epoch": 0.32, "learning_rate": 4.919053835818245e-05, "loss": 2.63, "step": 425000 }, { "epoch": 0.32, "learning_rate": 4.918958951131719e-05, "loss": 2.6384, "step": 425500 }, { "epoch": 0.32, "learning_rate": 4.9188636853821954e-05, "loss": 2.6253, "step": 426000 }, { "epoch": 0.33, "learning_rate": 4.9187684196326706e-05, "loss": 2.6414, "step": 426500 }, { "epoch": 0.33, "learning_rate": 4.9186731538831464e-05, "loss": 2.6029, "step": 427000 }, { "epoch": 0.33, "learning_rate": 4.918577888133622e-05, "loss": 2.61, "step": 427500 }, { "epoch": 0.33, "learning_rate": 4.918482622384098e-05, "loss": 2.6562, "step": 428000 }, { "epoch": 0.33, "learning_rate": 4.918387356634574e-05, "loss": 2.6401, "step": 428500 }, { "epoch": 0.33, "learning_rate": 4.91829209088505e-05, "loss": 2.6153, "step": 429000 }, { "epoch": 0.33, "learning_rate": 4.9181968251355255e-05, "loss": 2.6088, "step": 429500 }, { "epoch": 0.33, "learning_rate": 4.918101559386001e-05, "loss": 2.6281, "step": 430000 }, { "epoch": 0.33, "learning_rate": 4.9180062936364765e-05, "loss": 2.6414, "step": 430500 }, { "epoch": 0.33, "learning_rate": 4.917911027886953e-05, "loss": 2.6408, "step": 431000 }, { "epoch": 0.33, "learning_rate": 4.917815952668927e-05, "loss": 2.6294, "step": 431500 }, { "epoch": 0.33, "learning_rate": 4.917720686919403e-05, "loss": 2.6377, "step": 432000 }, { "epoch": 0.33, "learning_rate": 4.917625421169879e-05, "loss": 2.6446, "step": 432500 }, { "epoch": 0.33, "learning_rate": 4.9175303459518537e-05, "loss": 2.6278, "step": 433000 }, { "epoch": 0.33, "learning_rate": 4.9174350802023295e-05, "loss": 2.6108, "step": 433500 }, { "epoch": 0.33, "learning_rate": 4.917339814452805e-05, "loss": 2.6756, "step": 434000 }, { "epoch": 0.33, "learning_rate": 4.917244548703281e-05, "loss": 2.6358, "step": 434500 }, { "epoch": 0.33, "learning_rate": 4.917149282953757e-05, "loss": 2.6384, "step": 435000 }, { "epoch": 0.33, "learning_rate": 4.917054017204232e-05, "loss": 2.6002, "step": 435500 }, { "epoch": 0.33, "learning_rate": 4.9169587514547086e-05, "loss": 2.6335, "step": 436000 }, { "epoch": 0.33, "learning_rate": 4.9168636762366835e-05, "loss": 2.6145, "step": 436500 }, { "epoch": 0.33, "learning_rate": 4.9167684104871586e-05, "loss": 2.6213, "step": 437000 }, { "epoch": 0.33, "learning_rate": 4.9166733352691334e-05, "loss": 2.6218, "step": 437500 }, { "epoch": 0.33, "learning_rate": 4.916578069519609e-05, "loss": 2.6064, "step": 438000 }, { "epoch": 0.33, "learning_rate": 4.916482803770085e-05, "loss": 2.612, "step": 438500 }, { "epoch": 0.33, "learning_rate": 4.916387538020561e-05, "loss": 2.6512, "step": 439000 }, { "epoch": 0.33, "learning_rate": 4.916292272271036e-05, "loss": 2.6226, "step": 439500 }, { "epoch": 0.34, "learning_rate": 4.9161970065215126e-05, "loss": 2.6488, "step": 440000 }, { "epoch": 0.34, "learning_rate": 4.9161017407719884e-05, "loss": 2.6272, "step": 440500 }, { "epoch": 0.34, "learning_rate": 4.9160064750224636e-05, "loss": 2.6011, "step": 441000 }, { "epoch": 0.34, "learning_rate": 4.91591120927294e-05, "loss": 2.6259, "step": 441500 }, { "epoch": 0.34, "learning_rate": 4.915815943523415e-05, "loss": 2.6375, "step": 442000 }, { "epoch": 0.34, "learning_rate": 4.915720677773891e-05, "loss": 2.638, "step": 442500 }, { "epoch": 0.34, "learning_rate": 4.915625412024367e-05, "loss": 2.6498, "step": 443000 }, { "epoch": 0.34, "learning_rate": 4.915530146274843e-05, "loss": 2.5921, "step": 443500 }, { "epoch": 0.34, "learning_rate": 4.9154352615883165e-05, "loss": 2.6385, "step": 444000 }, { "epoch": 0.34, "learning_rate": 4.9153399958387924e-05, "loss": 2.6273, "step": 444500 }, { "epoch": 0.34, "learning_rate": 4.915244730089268e-05, "loss": 2.6267, "step": 445000 }, { "epoch": 0.34, "learning_rate": 4.915149464339744e-05, "loss": 2.6169, "step": 445500 }, { "epoch": 0.34, "learning_rate": 4.915054198590219e-05, "loss": 2.6201, "step": 446000 }, { "epoch": 0.34, "learning_rate": 4.914958932840695e-05, "loss": 2.6552, "step": 446500 }, { "epoch": 0.34, "learning_rate": 4.9148636670911715e-05, "loss": 2.6472, "step": 447000 }, { "epoch": 0.34, "learning_rate": 4.9147685918731457e-05, "loss": 2.6187, "step": 447500 }, { "epoch": 0.34, "learning_rate": 4.9146733261236215e-05, "loss": 2.6297, "step": 448000 }, { "epoch": 0.34, "learning_rate": 4.914578060374097e-05, "loss": 2.6266, "step": 448500 }, { "epoch": 0.34, "learning_rate": 4.914482794624573e-05, "loss": 2.6185, "step": 449000 }, { "epoch": 0.34, "learning_rate": 4.914387719406548e-05, "loss": 2.6239, "step": 449500 }, { "epoch": 0.34, "learning_rate": 4.914292453657024e-05, "loss": 2.6243, "step": 450000 }, { "epoch": 0.34, "eval_accuracy": 0.5204107724611845, "eval_loss": 2.5534162521362305, "eval_runtime": 9415.6775, "eval_samples_per_second": 29.205, "eval_steps_per_second": 7.301, "step": 450000 }, { "epoch": 0.34, "learning_rate": 4.9141971879074996e-05, "loss": 2.6308, "step": 450500 }, { "epoch": 0.34, "learning_rate": 4.9141019221579755e-05, "loss": 2.6045, "step": 451000 }, { "epoch": 0.34, "learning_rate": 4.9140066564084506e-05, "loss": 2.5877, "step": 451500 }, { "epoch": 0.34, "learning_rate": 4.913911390658927e-05, "loss": 2.629, "step": 452000 }, { "epoch": 0.34, "learning_rate": 4.913816124909402e-05, "loss": 2.6263, "step": 452500 }, { "epoch": 0.35, "learning_rate": 4.913721049691377e-05, "loss": 2.6047, "step": 453000 }, { "epoch": 0.35, "learning_rate": 4.913625974473352e-05, "loss": 2.5974, "step": 453500 }, { "epoch": 0.35, "learning_rate": 4.913530708723828e-05, "loss": 2.6318, "step": 454000 }, { "epoch": 0.35, "learning_rate": 4.9134354429743036e-05, "loss": 2.6453, "step": 454500 }, { "epoch": 0.35, "learning_rate": 4.9133401772247794e-05, "loss": 2.6236, "step": 455000 }, { "epoch": 0.35, "learning_rate": 4.913244911475255e-05, "loss": 2.6533, "step": 455500 }, { "epoch": 0.35, "learning_rate": 4.913150026788729e-05, "loss": 2.6339, "step": 456000 }, { "epoch": 0.35, "learning_rate": 4.913054761039205e-05, "loss": 2.6059, "step": 456500 }, { "epoch": 0.35, "learning_rate": 4.912959495289681e-05, "loss": 2.6119, "step": 457000 }, { "epoch": 0.35, "learning_rate": 4.912864229540156e-05, "loss": 2.6488, "step": 457500 }, { "epoch": 0.35, "learning_rate": 4.9127689637906324e-05, "loss": 2.6094, "step": 458000 }, { "epoch": 0.35, "learning_rate": 4.9126736980411075e-05, "loss": 2.6583, "step": 458500 }, { "epoch": 0.35, "learning_rate": 4.9125784322915834e-05, "loss": 2.6252, "step": 459000 }, { "epoch": 0.35, "learning_rate": 4.91248316654206e-05, "loss": 2.6173, "step": 459500 }, { "epoch": 0.35, "learning_rate": 4.912387900792535e-05, "loss": 2.624, "step": 460000 }, { "epoch": 0.35, "learning_rate": 4.912292635043011e-05, "loss": 2.6011, "step": 460500 }, { "epoch": 0.35, "learning_rate": 4.912197369293487e-05, "loss": 2.5893, "step": 461000 }, { "epoch": 0.35, "learning_rate": 4.9121021035439625e-05, "loss": 2.6199, "step": 461500 }, { "epoch": 0.35, "learning_rate": 4.9120068377944376e-05, "loss": 2.6008, "step": 462000 }, { "epoch": 0.35, "learning_rate": 4.9119115720449135e-05, "loss": 2.6024, "step": 462500 }, { "epoch": 0.35, "learning_rate": 4.91181630629539e-05, "loss": 2.6559, "step": 463000 }, { "epoch": 0.35, "learning_rate": 4.911721231077364e-05, "loss": 2.6475, "step": 463500 }, { "epoch": 0.35, "learning_rate": 4.91162596532784e-05, "loss": 2.6234, "step": 464000 }, { "epoch": 0.35, "learning_rate": 4.911530699578316e-05, "loss": 2.6034, "step": 464500 }, { "epoch": 0.35, "learning_rate": 4.9114356243602906e-05, "loss": 2.6108, "step": 465000 }, { "epoch": 0.35, "learning_rate": 4.9113403586107664e-05, "loss": 2.6313, "step": 465500 }, { "epoch": 0.36, "learning_rate": 4.911245092861242e-05, "loss": 2.6126, "step": 466000 }, { "epoch": 0.36, "learning_rate": 4.911149827111718e-05, "loss": 2.5916, "step": 466500 }, { "epoch": 0.36, "learning_rate": 4.911054561362194e-05, "loss": 2.6233, "step": 467000 }, { "epoch": 0.36, "learning_rate": 4.910959295612669e-05, "loss": 2.6083, "step": 467500 }, { "epoch": 0.36, "learning_rate": 4.9108640298631456e-05, "loss": 2.5778, "step": 468000 }, { "epoch": 0.36, "learning_rate": 4.9107687641136214e-05, "loss": 2.6258, "step": 468500 }, { "epoch": 0.36, "learning_rate": 4.9106734983640966e-05, "loss": 2.595, "step": 469000 }, { "epoch": 0.36, "learning_rate": 4.9105784231460714e-05, "loss": 2.6095, "step": 469500 }, { "epoch": 0.36, "learning_rate": 4.910483157396547e-05, "loss": 2.6012, "step": 470000 }, { "epoch": 0.36, "learning_rate": 4.910387891647023e-05, "loss": 2.6161, "step": 470500 }, { "epoch": 0.36, "learning_rate": 4.910292625897499e-05, "loss": 2.5923, "step": 471000 }, { "epoch": 0.36, "learning_rate": 4.910197360147975e-05, "loss": 2.6197, "step": 471500 }, { "epoch": 0.36, "learning_rate": 4.9101020943984505e-05, "loss": 2.61, "step": 472000 }, { "epoch": 0.36, "learning_rate": 4.910006828648926e-05, "loss": 2.5844, "step": 472500 }, { "epoch": 0.36, "learning_rate": 4.909911562899402e-05, "loss": 2.6276, "step": 473000 }, { "epoch": 0.36, "learning_rate": 4.909816297149878e-05, "loss": 2.6134, "step": 473500 }, { "epoch": 0.36, "learning_rate": 4.909721221931852e-05, "loss": 2.5853, "step": 474000 }, { "epoch": 0.36, "learning_rate": 4.909626146713827e-05, "loss": 2.638, "step": 474500 }, { "epoch": 0.36, "learning_rate": 4.909530880964303e-05, "loss": 2.5848, "step": 475000 }, { "epoch": 0.36, "learning_rate": 4.909435615214779e-05, "loss": 2.6422, "step": 475500 }, { "epoch": 0.36, "learning_rate": 4.9093405399967535e-05, "loss": 2.6272, "step": 476000 }, { "epoch": 0.36, "learning_rate": 4.909245274247229e-05, "loss": 2.6191, "step": 476500 }, { "epoch": 0.36, "learning_rate": 4.909150008497705e-05, "loss": 2.639, "step": 477000 }, { "epoch": 0.36, "learning_rate": 4.909054742748181e-05, "loss": 2.5862, "step": 477500 }, { "epoch": 0.36, "learning_rate": 4.908959476998657e-05, "loss": 2.6111, "step": 478000 }, { "epoch": 0.36, "learning_rate": 4.9088642112491326e-05, "loss": 2.6244, "step": 478500 }, { "epoch": 0.37, "learning_rate": 4.9087689454996085e-05, "loss": 2.6131, "step": 479000 }, { "epoch": 0.37, "learning_rate": 4.9086736797500836e-05, "loss": 2.6034, "step": 479500 }, { "epoch": 0.37, "learning_rate": 4.9085784140005595e-05, "loss": 2.6093, "step": 480000 }, { "epoch": 0.37, "eval_accuracy": 0.5221639565795111, "eval_loss": 2.540274143218994, "eval_runtime": 9413.8665, "eval_samples_per_second": 29.211, "eval_steps_per_second": 7.303, "step": 480000 }, { "epoch": 0.37, "learning_rate": 4.908483148251035e-05, "loss": 2.5819, "step": 480500 }, { "epoch": 0.37, "learning_rate": 4.908388263564509e-05, "loss": 2.622, "step": 481000 }, { "epoch": 0.37, "learning_rate": 4.908292997814985e-05, "loss": 2.6024, "step": 481500 }, { "epoch": 0.37, "learning_rate": 4.908197732065461e-05, "loss": 2.6016, "step": 482000 }, { "epoch": 0.37, "learning_rate": 4.9081024663159366e-05, "loss": 2.6061, "step": 482500 }, { "epoch": 0.37, "learning_rate": 4.9080072005664124e-05, "loss": 2.6218, "step": 483000 }, { "epoch": 0.37, "learning_rate": 4.9079119348168876e-05, "loss": 2.6072, "step": 483500 }, { "epoch": 0.37, "learning_rate": 4.907816669067364e-05, "loss": 2.5975, "step": 484000 }, { "epoch": 0.37, "learning_rate": 4.90772140331784e-05, "loss": 2.6154, "step": 484500 }, { "epoch": 0.37, "learning_rate": 4.907626137568315e-05, "loss": 2.6064, "step": 485000 }, { "epoch": 0.37, "learning_rate": 4.9075308718187916e-05, "loss": 2.5867, "step": 485500 }, { "epoch": 0.37, "learning_rate": 4.9074357966007664e-05, "loss": 2.6123, "step": 486000 }, { "epoch": 0.37, "learning_rate": 4.9073405308512415e-05, "loss": 2.5768, "step": 486500 }, { "epoch": 0.37, "learning_rate": 4.9072452651017174e-05, "loss": 2.5982, "step": 487000 }, { "epoch": 0.37, "learning_rate": 4.907149999352193e-05, "loss": 2.6092, "step": 487500 }, { "epoch": 0.37, "learning_rate": 4.907054733602669e-05, "loss": 2.635, "step": 488000 }, { "epoch": 0.37, "learning_rate": 4.906959467853144e-05, "loss": 2.6102, "step": 488500 }, { "epoch": 0.37, "learning_rate": 4.906864202103621e-05, "loss": 2.5923, "step": 489000 }, { "epoch": 0.37, "learning_rate": 4.9067689363540965e-05, "loss": 2.6081, "step": 489500 }, { "epoch": 0.37, "learning_rate": 4.9066740516675703e-05, "loss": 2.5921, "step": 490000 }, { "epoch": 0.37, "learning_rate": 4.9065787859180455e-05, "loss": 2.6144, "step": 490500 }, { "epoch": 0.37, "learning_rate": 4.906483520168521e-05, "loss": 2.6352, "step": 491000 }, { "epoch": 0.37, "learning_rate": 4.906388254418997e-05, "loss": 2.5999, "step": 491500 }, { "epoch": 0.37, "learning_rate": 4.906292988669473e-05, "loss": 2.6106, "step": 492000 }, { "epoch": 0.38, "learning_rate": 4.906197722919949e-05, "loss": 2.6154, "step": 492500 }, { "epoch": 0.38, "learning_rate": 4.9061028382334226e-05, "loss": 2.639, "step": 493000 }, { "epoch": 0.38, "learning_rate": 4.9060075724838985e-05, "loss": 2.5698, "step": 493500 }, { "epoch": 0.38, "learning_rate": 4.905912306734374e-05, "loss": 2.6098, "step": 494000 }, { "epoch": 0.38, "learning_rate": 4.9058170409848494e-05, "loss": 2.5431, "step": 494500 }, { "epoch": 0.38, "learning_rate": 4.905721775235326e-05, "loss": 2.6097, "step": 495000 }, { "epoch": 0.38, "learning_rate": 4.905626509485802e-05, "loss": 2.6005, "step": 495500 }, { "epoch": 0.38, "learning_rate": 4.905531434267776e-05, "loss": 2.6057, "step": 496000 }, { "epoch": 0.38, "learning_rate": 4.905436168518252e-05, "loss": 2.598, "step": 496500 }, { "epoch": 0.38, "learning_rate": 4.905340902768728e-05, "loss": 2.6414, "step": 497000 }, { "epoch": 0.38, "learning_rate": 4.9052456370192034e-05, "loss": 2.6055, "step": 497500 }, { "epoch": 0.38, "learning_rate": 4.905150561801178e-05, "loss": 2.6116, "step": 498000 }, { "epoch": 0.38, "learning_rate": 4.905055296051654e-05, "loss": 2.6138, "step": 498500 }, { "epoch": 0.38, "learning_rate": 4.90496003030213e-05, "loss": 2.5881, "step": 499000 }, { "epoch": 0.38, "learning_rate": 4.904864764552606e-05, "loss": 2.591, "step": 499500 }, { "epoch": 0.38, "learning_rate": 4.9047694988030816e-05, "loss": 2.5928, "step": 500000 }, { "epoch": 0.38, "learning_rate": 4.9046742330535574e-05, "loss": 2.5659, "step": 500500 }, { "epoch": 0.38, "learning_rate": 4.9045789673040325e-05, "loss": 2.6043, "step": 501000 }, { "epoch": 0.38, "learning_rate": 4.9044837015545084e-05, "loss": 2.5973, "step": 501500 }, { "epoch": 0.38, "learning_rate": 4.904388435804985e-05, "loss": 2.5438, "step": 502000 }, { "epoch": 0.38, "learning_rate": 4.90429317005546e-05, "loss": 2.6249, "step": 502500 }, { "epoch": 0.38, "learning_rate": 4.904198094837435e-05, "loss": 2.5776, "step": 503000 }, { "epoch": 0.38, "learning_rate": 4.904102829087911e-05, "loss": 2.602, "step": 503500 }, { "epoch": 0.38, "learning_rate": 4.9040075633383865e-05, "loss": 2.6077, "step": 504000 }, { "epoch": 0.38, "learning_rate": 4.9039122975888623e-05, "loss": 2.6264, "step": 504500 }, { "epoch": 0.38, "learning_rate": 4.9038170318393375e-05, "loss": 2.5949, "step": 505000 }, { "epoch": 0.39, "learning_rate": 4.903721766089814e-05, "loss": 2.583, "step": 505500 }, { "epoch": 0.39, "learning_rate": 4.903626500340289e-05, "loss": 2.6052, "step": 506000 }, { "epoch": 0.39, "learning_rate": 4.903531234590765e-05, "loss": 2.5984, "step": 506500 }, { "epoch": 0.39, "learning_rate": 4.9034359688412415e-05, "loss": 2.6108, "step": 507000 }, { "epoch": 0.39, "learning_rate": 4.9033407030917166e-05, "loss": 2.5899, "step": 507500 }, { "epoch": 0.39, "learning_rate": 4.9032456278736915e-05, "loss": 2.6324, "step": 508000 }, { "epoch": 0.39, "learning_rate": 4.903150362124167e-05, "loss": 2.5811, "step": 508500 }, { "epoch": 0.39, "learning_rate": 4.903055096374643e-05, "loss": 2.5767, "step": 509000 }, { "epoch": 0.39, "learning_rate": 4.902959830625119e-05, "loss": 2.6209, "step": 509500 }, { "epoch": 0.39, "learning_rate": 4.902864564875594e-05, "loss": 2.5892, "step": 510000 }, { "epoch": 0.39, "eval_accuracy": 0.5239542630855191, "eval_loss": 2.527653455734253, "eval_runtime": 9416.2978, "eval_samples_per_second": 29.203, "eval_steps_per_second": 7.301, "step": 510000 }, { "epoch": 0.39, "learning_rate": 4.9027692991260706e-05, "loss": 2.5818, "step": 510500 }, { "epoch": 0.39, "learning_rate": 4.9026742239080454e-05, "loss": 2.5787, "step": 511000 }, { "epoch": 0.39, "learning_rate": 4.9025789581585206e-05, "loss": 2.598, "step": 511500 }, { "epoch": 0.39, "learning_rate": 4.9024836924089964e-05, "loss": 2.5852, "step": 512000 }, { "epoch": 0.39, "learning_rate": 4.902388426659472e-05, "loss": 2.5897, "step": 512500 }, { "epoch": 0.39, "learning_rate": 4.902293160909948e-05, "loss": 2.6276, "step": 513000 }, { "epoch": 0.39, "learning_rate": 4.902197895160424e-05, "loss": 2.6128, "step": 513500 }, { "epoch": 0.39, "learning_rate": 4.9021026294109e-05, "loss": 2.6022, "step": 514000 }, { "epoch": 0.39, "learning_rate": 4.9020075541928746e-05, "loss": 2.592, "step": 514500 }, { "epoch": 0.39, "learning_rate": 4.9019122884433504e-05, "loss": 2.6054, "step": 515000 }, { "epoch": 0.39, "learning_rate": 4.901817022693826e-05, "loss": 2.5991, "step": 515500 }, { "epoch": 0.39, "learning_rate": 4.901721947475801e-05, "loss": 2.6033, "step": 516000 }, { "epoch": 0.39, "learning_rate": 4.901626872257776e-05, "loss": 2.5981, "step": 516500 }, { "epoch": 0.39, "learning_rate": 4.901531606508251e-05, "loss": 2.6045, "step": 517000 }, { "epoch": 0.39, "learning_rate": 4.901436340758727e-05, "loss": 2.5973, "step": 517500 }, { "epoch": 0.39, "learning_rate": 4.9013410750092034e-05, "loss": 2.5758, "step": 518000 }, { "epoch": 0.4, "learning_rate": 4.9012458092596785e-05, "loss": 2.6193, "step": 518500 }, { "epoch": 0.4, "learning_rate": 4.9011505435101543e-05, "loss": 2.591, "step": 519000 }, { "epoch": 0.4, "learning_rate": 4.90105527776063e-05, "loss": 2.5944, "step": 519500 }, { "epoch": 0.4, "learning_rate": 4.900960012011106e-05, "loss": 2.5772, "step": 520000 }, { "epoch": 0.4, "learning_rate": 4.900864746261581e-05, "loss": 2.6243, "step": 520500 }, { "epoch": 0.4, "learning_rate": 4.9007694805120577e-05, "loss": 2.6039, "step": 521000 }, { "epoch": 0.4, "learning_rate": 4.9006744052940325e-05, "loss": 2.5787, "step": 521500 }, { "epoch": 0.4, "learning_rate": 4.9005791395445076e-05, "loss": 2.5598, "step": 522000 }, { "epoch": 0.4, "learning_rate": 4.9004838737949835e-05, "loss": 2.6046, "step": 522500 }, { "epoch": 0.4, "learning_rate": 4.90038860804546e-05, "loss": 2.5995, "step": 523000 }, { "epoch": 0.4, "learning_rate": 4.900293342295935e-05, "loss": 2.612, "step": 523500 }, { "epoch": 0.4, "learning_rate": 4.900198076546411e-05, "loss": 2.5767, "step": 524000 }, { "epoch": 0.4, "learning_rate": 4.900102810796887e-05, "loss": 2.5863, "step": 524500 }, { "epoch": 0.4, "learning_rate": 4.9000075450473626e-05, "loss": 2.5699, "step": 525000 }, { "epoch": 0.4, "learning_rate": 4.8999122792978384e-05, "loss": 2.5862, "step": 525500 }, { "epoch": 0.4, "learning_rate": 4.899817204079813e-05, "loss": 2.5888, "step": 526000 }, { "epoch": 0.4, "learning_rate": 4.899722128861788e-05, "loss": 2.5789, "step": 526500 }, { "epoch": 0.4, "learning_rate": 4.899626863112264e-05, "loss": 2.5935, "step": 527000 }, { "epoch": 0.4, "learning_rate": 4.899531597362739e-05, "loss": 2.5866, "step": 527500 }, { "epoch": 0.4, "learning_rate": 4.8994363316132156e-05, "loss": 2.5645, "step": 528000 }, { "epoch": 0.4, "learning_rate": 4.899341065863691e-05, "loss": 2.587, "step": 528500 }, { "epoch": 0.4, "learning_rate": 4.8992458001141666e-05, "loss": 2.5689, "step": 529000 }, { "epoch": 0.4, "learning_rate": 4.8991505343646424e-05, "loss": 2.5819, "step": 529500 }, { "epoch": 0.4, "learning_rate": 4.899055268615118e-05, "loss": 2.6145, "step": 530000 }, { "epoch": 0.4, "learning_rate": 4.898960002865594e-05, "loss": 2.5692, "step": 530500 }, { "epoch": 0.4, "learning_rate": 4.898864927647569e-05, "loss": 2.5872, "step": 531000 }, { "epoch": 0.41, "learning_rate": 4.898769661898045e-05, "loss": 2.593, "step": 531500 }, { "epoch": 0.41, "learning_rate": 4.8986743961485205e-05, "loss": 2.5856, "step": 532000 }, { "epoch": 0.41, "learning_rate": 4.898579130398996e-05, "loss": 2.5836, "step": 532500 }, { "epoch": 0.41, "learning_rate": 4.8984838646494715e-05, "loss": 2.6089, "step": 533000 }, { "epoch": 0.41, "learning_rate": 4.898388598899948e-05, "loss": 2.5723, "step": 533500 }, { "epoch": 0.41, "learning_rate": 4.898293333150423e-05, "loss": 2.5817, "step": 534000 }, { "epoch": 0.41, "learning_rate": 4.898198067400899e-05, "loss": 2.5881, "step": 534500 }, { "epoch": 0.41, "learning_rate": 4.898102801651375e-05, "loss": 2.5845, "step": 535000 }, { "epoch": 0.41, "learning_rate": 4.8980077264333497e-05, "loss": 2.6131, "step": 535500 }, { "epoch": 0.41, "learning_rate": 4.8979124606838255e-05, "loss": 2.5602, "step": 536000 }, { "epoch": 0.41, "learning_rate": 4.897817194934301e-05, "loss": 2.5914, "step": 536500 }, { "epoch": 0.41, "learning_rate": 4.897721929184777e-05, "loss": 2.6114, "step": 537000 }, { "epoch": 0.41, "learning_rate": 4.897626853966752e-05, "loss": 2.5844, "step": 537500 }, { "epoch": 0.41, "learning_rate": 4.897531588217227e-05, "loss": 2.5795, "step": 538000 }, { "epoch": 0.41, "learning_rate": 4.8974363224677036e-05, "loss": 2.5649, "step": 538500 }, { "epoch": 0.41, "learning_rate": 4.897341056718179e-05, "loss": 2.5521, "step": 539000 }, { "epoch": 0.41, "learning_rate": 4.8972457909686546e-05, "loss": 2.567, "step": 539500 }, { "epoch": 0.41, "learning_rate": 4.8971505252191304e-05, "loss": 2.5747, "step": 540000 }, { "epoch": 0.41, "eval_accuracy": 0.5256214468279793, "eval_loss": 2.5165464878082275, "eval_runtime": 9416.0522, "eval_samples_per_second": 29.204, "eval_steps_per_second": 7.301, "step": 540000 }, { "epoch": 0.41, "learning_rate": 4.897055259469606e-05, "loss": 2.5886, "step": 540500 }, { "epoch": 0.41, "learning_rate": 4.896959993720082e-05, "loss": 2.5949, "step": 541000 }, { "epoch": 0.41, "learning_rate": 4.896864727970558e-05, "loss": 2.5847, "step": 541500 }, { "epoch": 0.41, "learning_rate": 4.896769462221034e-05, "loss": 2.5993, "step": 542000 }, { "epoch": 0.41, "learning_rate": 4.8966745775345076e-05, "loss": 2.5967, "step": 542500 }, { "epoch": 0.41, "learning_rate": 4.8965793117849834e-05, "loss": 2.5327, "step": 543000 }, { "epoch": 0.41, "learning_rate": 4.8964840460354586e-05, "loss": 2.5844, "step": 543500 }, { "epoch": 0.41, "learning_rate": 4.896388780285935e-05, "loss": 2.5667, "step": 544000 }, { "epoch": 0.41, "learning_rate": 4.89629351453641e-05, "loss": 2.5514, "step": 544500 }, { "epoch": 0.42, "learning_rate": 4.896198248786886e-05, "loss": 2.5898, "step": 545000 }, { "epoch": 0.42, "learning_rate": 4.896103173568861e-05, "loss": 2.6, "step": 545500 }, { "epoch": 0.42, "learning_rate": 4.896007907819337e-05, "loss": 2.5687, "step": 546000 }, { "epoch": 0.42, "learning_rate": 4.8959126420698125e-05, "loss": 2.5999, "step": 546500 }, { "epoch": 0.42, "learning_rate": 4.8958173763202884e-05, "loss": 2.5746, "step": 547000 }, { "epoch": 0.42, "learning_rate": 4.895722110570764e-05, "loss": 2.5982, "step": 547500 }, { "epoch": 0.42, "learning_rate": 4.895627035352739e-05, "loss": 2.6103, "step": 548000 }, { "epoch": 0.42, "learning_rate": 4.895531960134714e-05, "loss": 2.569, "step": 548500 }, { "epoch": 0.42, "learning_rate": 4.895436694385189e-05, "loss": 2.5567, "step": 549000 }, { "epoch": 0.42, "learning_rate": 4.8953414286356655e-05, "loss": 2.5987, "step": 549500 }, { "epoch": 0.42, "learning_rate": 4.8952461628861407e-05, "loss": 2.592, "step": 550000 }, { "epoch": 0.42, "learning_rate": 4.8951508971366165e-05, "loss": 2.5926, "step": 550500 }, { "epoch": 0.42, "learning_rate": 4.895055631387093e-05, "loss": 2.5759, "step": 551000 }, { "epoch": 0.42, "learning_rate": 4.894960556169067e-05, "loss": 2.5833, "step": 551500 }, { "epoch": 0.42, "learning_rate": 4.894865290419543e-05, "loss": 2.5767, "step": 552000 }, { "epoch": 0.42, "learning_rate": 4.894770024670019e-05, "loss": 2.5764, "step": 552500 }, { "epoch": 0.42, "learning_rate": 4.8946747589204946e-05, "loss": 2.5596, "step": 553000 }, { "epoch": 0.42, "learning_rate": 4.8945794931709705e-05, "loss": 2.5815, "step": 553500 }, { "epoch": 0.42, "learning_rate": 4.894484417952945e-05, "loss": 2.6082, "step": 554000 }, { "epoch": 0.42, "learning_rate": 4.8943893427349194e-05, "loss": 2.5899, "step": 554500 }, { "epoch": 0.42, "learning_rate": 4.894294076985396e-05, "loss": 2.5846, "step": 555000 }, { "epoch": 0.42, "learning_rate": 4.894198811235871e-05, "loss": 2.5914, "step": 555500 }, { "epoch": 0.42, "learning_rate": 4.894103545486347e-05, "loss": 2.5745, "step": 556000 }, { "epoch": 0.42, "learning_rate": 4.894008470268322e-05, "loss": 2.5803, "step": 556500 }, { "epoch": 0.42, "learning_rate": 4.8939132045187976e-05, "loss": 2.5613, "step": 557000 }, { "epoch": 0.42, "learning_rate": 4.8938179387692734e-05, "loss": 2.6123, "step": 557500 }, { "epoch": 0.43, "learning_rate": 4.893722673019749e-05, "loss": 2.5578, "step": 558000 }, { "epoch": 0.43, "learning_rate": 4.893627407270225e-05, "loss": 2.57, "step": 558500 }, { "epoch": 0.43, "learning_rate": 4.893532141520701e-05, "loss": 2.5602, "step": 559000 }, { "epoch": 0.43, "learning_rate": 4.893436875771176e-05, "loss": 2.593, "step": 559500 }, { "epoch": 0.43, "learning_rate": 4.8933416100216525e-05, "loss": 2.577, "step": 560000 }, { "epoch": 0.43, "learning_rate": 4.8932463442721284e-05, "loss": 2.5641, "step": 560500 }, { "epoch": 0.43, "learning_rate": 4.8931510785226035e-05, "loss": 2.5776, "step": 561000 }, { "epoch": 0.43, "learning_rate": 4.8930558127730794e-05, "loss": 2.5898, "step": 561500 }, { "epoch": 0.43, "learning_rate": 4.892960547023555e-05, "loss": 2.5663, "step": 562000 }, { "epoch": 0.43, "learning_rate": 4.89286547180553e-05, "loss": 2.5632, "step": 562500 }, { "epoch": 0.43, "learning_rate": 4.892770206056006e-05, "loss": 2.5737, "step": 563000 }, { "epoch": 0.43, "learning_rate": 4.892674940306482e-05, "loss": 2.5655, "step": 563500 }, { "epoch": 0.43, "learning_rate": 4.8925796745569575e-05, "loss": 2.5296, "step": 564000 }, { "epoch": 0.43, "learning_rate": 4.892484599338932e-05, "loss": 2.5813, "step": 564500 }, { "epoch": 0.43, "learning_rate": 4.8923893335894075e-05, "loss": 2.5744, "step": 565000 }, { "epoch": 0.43, "learning_rate": 4.892294258371383e-05, "loss": 2.5764, "step": 565500 }, { "epoch": 0.43, "learning_rate": 4.892198992621859e-05, "loss": 2.5807, "step": 566000 }, { "epoch": 0.43, "learning_rate": 4.892103726872334e-05, "loss": 2.5906, "step": 566500 }, { "epoch": 0.43, "learning_rate": 4.89200846112281e-05, "loss": 2.5214, "step": 567000 }, { "epoch": 0.43, "learning_rate": 4.8919131953732856e-05, "loss": 2.5861, "step": 567500 }, { "epoch": 0.43, "learning_rate": 4.8918179296237615e-05, "loss": 2.5611, "step": 568000 }, { "epoch": 0.43, "learning_rate": 4.891722663874237e-05, "loss": 2.5639, "step": 568500 }, { "epoch": 0.43, "learning_rate": 4.891627398124713e-05, "loss": 2.5643, "step": 569000 }, { "epoch": 0.43, "learning_rate": 4.891532132375189e-05, "loss": 2.5686, "step": 569500 }, { "epoch": 0.43, "learning_rate": 4.891436866625664e-05, "loss": 2.5763, "step": 570000 }, { "epoch": 0.43, "eval_accuracy": 0.5270845587489317, "eval_loss": 2.505284547805786, "eval_runtime": 9432.2342, "eval_samples_per_second": 29.154, "eval_steps_per_second": 7.289, "step": 570000 }, { "epoch": 0.43, "learning_rate": 4.8913416008761406e-05, "loss": 2.5246, "step": 570500 }, { "epoch": 0.44, "learning_rate": 4.891246335126616e-05, "loss": 2.5753, "step": 571000 }, { "epoch": 0.44, "learning_rate": 4.8911510693770916e-05, "loss": 2.585, "step": 571500 }, { "epoch": 0.44, "learning_rate": 4.8910559941590664e-05, "loss": 2.5768, "step": 572000 }, { "epoch": 0.44, "learning_rate": 4.890960728409542e-05, "loss": 2.6062, "step": 572500 }, { "epoch": 0.44, "learning_rate": 4.890865462660018e-05, "loss": 2.5534, "step": 573000 }, { "epoch": 0.44, "learning_rate": 4.890770387441993e-05, "loss": 2.5247, "step": 573500 }, { "epoch": 0.44, "learning_rate": 4.890675121692469e-05, "loss": 2.5756, "step": 574000 }, { "epoch": 0.44, "learning_rate": 4.8905798559429445e-05, "loss": 2.5494, "step": 574500 }, { "epoch": 0.44, "learning_rate": 4.8904847807249194e-05, "loss": 2.5725, "step": 575000 }, { "epoch": 0.44, "learning_rate": 4.8903895149753945e-05, "loss": 2.5579, "step": 575500 }, { "epoch": 0.44, "learning_rate": 4.890294249225871e-05, "loss": 2.5732, "step": 576000 }, { "epoch": 0.44, "learning_rate": 4.890198983476347e-05, "loss": 2.5871, "step": 576500 }, { "epoch": 0.44, "learning_rate": 4.890103717726822e-05, "loss": 2.5768, "step": 577000 }, { "epoch": 0.44, "learning_rate": 4.890008642508797e-05, "loss": 2.5641, "step": 577500 }, { "epoch": 0.44, "learning_rate": 4.8899133767592733e-05, "loss": 2.5499, "step": 578000 }, { "epoch": 0.44, "learning_rate": 4.8898181110097485e-05, "loss": 2.5572, "step": 578500 }, { "epoch": 0.44, "learning_rate": 4.889722845260224e-05, "loss": 2.5845, "step": 579000 }, { "epoch": 0.44, "learning_rate": 4.8896275795107e-05, "loss": 2.539, "step": 579500 }, { "epoch": 0.44, "learning_rate": 4.889532313761176e-05, "loss": 2.5803, "step": 580000 }, { "epoch": 0.44, "learning_rate": 4.889437048011651e-05, "loss": 2.601, "step": 580500 }, { "epoch": 0.44, "learning_rate": 4.8893417822621276e-05, "loss": 2.5539, "step": 581000 }, { "epoch": 0.44, "learning_rate": 4.8892465165126035e-05, "loss": 2.5432, "step": 581500 }, { "epoch": 0.44, "learning_rate": 4.8891514412945776e-05, "loss": 2.5968, "step": 582000 }, { "epoch": 0.44, "learning_rate": 4.8890561755450535e-05, "loss": 2.5454, "step": 582500 }, { "epoch": 0.44, "learning_rate": 4.88896090979553e-05, "loss": 2.5819, "step": 583000 }, { "epoch": 0.44, "learning_rate": 4.888865644046005e-05, "loss": 2.5688, "step": 583500 }, { "epoch": 0.45, "learning_rate": 4.888770378296481e-05, "loss": 2.5972, "step": 584000 }, { "epoch": 0.45, "learning_rate": 4.888675303078456e-05, "loss": 2.5718, "step": 584500 }, { "epoch": 0.45, "learning_rate": 4.8885800373289316e-05, "loss": 2.5602, "step": 585000 }, { "epoch": 0.45, "learning_rate": 4.8884847715794074e-05, "loss": 2.5643, "step": 585500 }, { "epoch": 0.45, "learning_rate": 4.8883895058298826e-05, "loss": 2.5745, "step": 586000 }, { "epoch": 0.45, "learning_rate": 4.888294240080359e-05, "loss": 2.5657, "step": 586500 }, { "epoch": 0.45, "learning_rate": 4.888198974330834e-05, "loss": 2.5658, "step": 587000 }, { "epoch": 0.45, "learning_rate": 4.88810370858131e-05, "loss": 2.5612, "step": 587500 }, { "epoch": 0.45, "learning_rate": 4.8880084428317866e-05, "loss": 2.5861, "step": 588000 }, { "epoch": 0.45, "learning_rate": 4.887913367613761e-05, "loss": 2.5615, "step": 588500 }, { "epoch": 0.45, "learning_rate": 4.8878181018642365e-05, "loss": 2.5519, "step": 589000 }, { "epoch": 0.45, "learning_rate": 4.8877228361147124e-05, "loss": 2.5352, "step": 589500 }, { "epoch": 0.45, "learning_rate": 4.887627760896687e-05, "loss": 2.6373, "step": 590000 }, { "epoch": 0.45, "learning_rate": 4.887532495147163e-05, "loss": 2.5981, "step": 590500 }, { "epoch": 0.45, "learning_rate": 4.887437229397639e-05, "loss": 2.5953, "step": 591000 }, { "epoch": 0.45, "learning_rate": 4.887341963648115e-05, "loss": 2.5567, "step": 591500 }, { "epoch": 0.45, "learning_rate": 4.8872466978985905e-05, "loss": 2.5826, "step": 592000 }, { "epoch": 0.45, "learning_rate": 4.8871516226805653e-05, "loss": 2.5662, "step": 592500 }, { "epoch": 0.45, "learning_rate": 4.8870563569310405e-05, "loss": 2.582, "step": 593000 }, { "epoch": 0.45, "learning_rate": 4.886961091181517e-05, "loss": 2.5929, "step": 593500 }, { "epoch": 0.45, "learning_rate": 4.886866015963492e-05, "loss": 2.5374, "step": 594000 }, { "epoch": 0.45, "learning_rate": 4.886770750213967e-05, "loss": 2.5856, "step": 594500 }, { "epoch": 0.45, "learning_rate": 4.886675484464443e-05, "loss": 2.5694, "step": 595000 }, { "epoch": 0.45, "learning_rate": 4.8865802187149186e-05, "loss": 2.5857, "step": 595500 }, { "epoch": 0.45, "learning_rate": 4.8864849529653945e-05, "loss": 2.5597, "step": 596000 }, { "epoch": 0.45, "learning_rate": 4.886389877747369e-05, "loss": 2.5636, "step": 596500 }, { "epoch": 0.45, "learning_rate": 4.886294802529344e-05, "loss": 2.5573, "step": 597000 }, { "epoch": 0.46, "learning_rate": 4.88619953677982e-05, "loss": 2.5692, "step": 597500 }, { "epoch": 0.46, "learning_rate": 4.886104271030296e-05, "loss": 2.5766, "step": 598000 }, { "epoch": 0.46, "learning_rate": 4.886009005280771e-05, "loss": 2.5503, "step": 598500 }, { "epoch": 0.46, "learning_rate": 4.885913739531247e-05, "loss": 2.5367, "step": 599000 }, { "epoch": 0.46, "learning_rate": 4.8858184737817226e-05, "loss": 2.5407, "step": 599500 }, { "epoch": 0.46, "learning_rate": 4.8857232080321984e-05, "loss": 2.5422, "step": 600000 }, { "epoch": 0.46, "eval_accuracy": 0.5284035143749843, "eval_loss": 2.4944164752960205, "eval_runtime": 9408.0544, "eval_samples_per_second": 29.229, "eval_steps_per_second": 7.307, "step": 600000 }, { "epoch": 0.46, "learning_rate": 4.885627942282674e-05, "loss": 2.5282, "step": 600500 }, { "epoch": 0.46, "learning_rate": 4.88553267653315e-05, "loss": 2.5668, "step": 601000 }, { "epoch": 0.46, "learning_rate": 4.885437410783626e-05, "loss": 2.5304, "step": 601500 }, { "epoch": 0.46, "learning_rate": 4.885342145034101e-05, "loss": 2.572, "step": 602000 }, { "epoch": 0.46, "learning_rate": 4.8852468792845776e-05, "loss": 2.5565, "step": 602500 }, { "epoch": 0.46, "learning_rate": 4.8851516135350534e-05, "loss": 2.5708, "step": 603000 }, { "epoch": 0.46, "learning_rate": 4.8850563477855285e-05, "loss": 2.5373, "step": 603500 }, { "epoch": 0.46, "learning_rate": 4.8849612725675034e-05, "loss": 2.5501, "step": 604000 }, { "epoch": 0.46, "learning_rate": 4.884866006817979e-05, "loss": 2.5333, "step": 604500 }, { "epoch": 0.46, "learning_rate": 4.884770741068455e-05, "loss": 2.5603, "step": 605000 }, { "epoch": 0.46, "learning_rate": 4.88467566585043e-05, "loss": 2.5624, "step": 605500 }, { "epoch": 0.46, "learning_rate": 4.884580590632405e-05, "loss": 2.5848, "step": 606000 }, { "epoch": 0.46, "learning_rate": 4.8844853248828805e-05, "loss": 2.5545, "step": 606500 }, { "epoch": 0.46, "learning_rate": 4.8843900591333563e-05, "loss": 2.5781, "step": 607000 }, { "epoch": 0.46, "learning_rate": 4.8842947933838315e-05, "loss": 2.5574, "step": 607500 }, { "epoch": 0.46, "learning_rate": 4.884199527634308e-05, "loss": 2.5623, "step": 608000 }, { "epoch": 0.46, "learning_rate": 4.884104261884784e-05, "loss": 2.5391, "step": 608500 }, { "epoch": 0.46, "learning_rate": 4.884008996135259e-05, "loss": 2.5471, "step": 609000 }, { "epoch": 0.46, "learning_rate": 4.8839137303857355e-05, "loss": 2.5576, "step": 609500 }, { "epoch": 0.46, "learning_rate": 4.8838184646362106e-05, "loss": 2.5849, "step": 610000 }, { "epoch": 0.47, "learning_rate": 4.8837231988866865e-05, "loss": 2.5478, "step": 610500 }, { "epoch": 0.47, "learning_rate": 4.883627933137162e-05, "loss": 2.5847, "step": 611000 }, { "epoch": 0.47, "learning_rate": 4.883532667387638e-05, "loss": 2.5785, "step": 611500 }, { "epoch": 0.47, "learning_rate": 4.883437592169613e-05, "loss": 2.5349, "step": 612000 }, { "epoch": 0.47, "learning_rate": 4.883342516951588e-05, "loss": 2.5917, "step": 612500 }, { "epoch": 0.47, "learning_rate": 4.8832472512020636e-05, "loss": 2.5535, "step": 613000 }, { "epoch": 0.47, "learning_rate": 4.8831519854525394e-05, "loss": 2.5452, "step": 613500 }, { "epoch": 0.47, "learning_rate": 4.8830567197030146e-05, "loss": 2.5837, "step": 614000 }, { "epoch": 0.47, "learning_rate": 4.8829614539534904e-05, "loss": 2.5961, "step": 614500 }, { "epoch": 0.47, "learning_rate": 4.882866188203967e-05, "loss": 2.5671, "step": 615000 }, { "epoch": 0.47, "learning_rate": 4.882770922454442e-05, "loss": 2.5642, "step": 615500 }, { "epoch": 0.47, "learning_rate": 4.882675656704918e-05, "loss": 2.5708, "step": 616000 }, { "epoch": 0.47, "learning_rate": 4.882580390955394e-05, "loss": 2.5436, "step": 616500 }, { "epoch": 0.47, "learning_rate": 4.8824853157373686e-05, "loss": 2.5578, "step": 617000 }, { "epoch": 0.47, "learning_rate": 4.8823900499878444e-05, "loss": 2.5543, "step": 617500 }, { "epoch": 0.47, "learning_rate": 4.8822947842383195e-05, "loss": 2.5486, "step": 618000 }, { "epoch": 0.47, "learning_rate": 4.882199518488796e-05, "loss": 2.5571, "step": 618500 }, { "epoch": 0.47, "learning_rate": 4.882104252739272e-05, "loss": 2.5487, "step": 619000 }, { "epoch": 0.47, "learning_rate": 4.882008986989747e-05, "loss": 2.5652, "step": 619500 }, { "epoch": 0.47, "learning_rate": 4.8819137212402235e-05, "loss": 2.5337, "step": 620000 }, { "epoch": 0.47, "learning_rate": 4.881818455490699e-05, "loss": 2.5391, "step": 620500 }, { "epoch": 0.47, "learning_rate": 4.8817231897411745e-05, "loss": 2.5752, "step": 621000 }, { "epoch": 0.47, "learning_rate": 4.8816283050546483e-05, "loss": 2.5687, "step": 621500 }, { "epoch": 0.47, "learning_rate": 4.881533039305125e-05, "loss": 2.598, "step": 622000 }, { "epoch": 0.47, "learning_rate": 4.8814377735556e-05, "loss": 2.5839, "step": 622500 }, { "epoch": 0.47, "learning_rate": 4.881342507806076e-05, "loss": 2.5837, "step": 623000 }, { "epoch": 0.48, "learning_rate": 4.8812472420565517e-05, "loss": 2.5647, "step": 623500 }, { "epoch": 0.48, "learning_rate": 4.8811521668385265e-05, "loss": 2.5452, "step": 624000 }, { "epoch": 0.48, "learning_rate": 4.881056901089002e-05, "loss": 2.551, "step": 624500 }, { "epoch": 0.48, "learning_rate": 4.8809618258709765e-05, "loss": 2.5592, "step": 625000 }, { "epoch": 0.48, "learning_rate": 4.880866560121452e-05, "loss": 2.5408, "step": 625500 }, { "epoch": 0.48, "learning_rate": 4.880771294371929e-05, "loss": 2.5772, "step": 626000 }, { "epoch": 0.48, "learning_rate": 4.880676028622404e-05, "loss": 2.5597, "step": 626500 }, { "epoch": 0.48, "learning_rate": 4.88058076287288e-05, "loss": 2.5607, "step": 627000 }, { "epoch": 0.48, "learning_rate": 4.8804854971233556e-05, "loss": 2.5403, "step": 627500 }, { "epoch": 0.48, "learning_rate": 4.8803902313738314e-05, "loss": 2.5646, "step": 628000 }, { "epoch": 0.48, "learning_rate": 4.880294965624307e-05, "loss": 2.5266, "step": 628500 }, { "epoch": 0.48, "learning_rate": 4.880199890406282e-05, "loss": 2.5615, "step": 629000 }, { "epoch": 0.48, "learning_rate": 4.880104815188257e-05, "loss": 2.5383, "step": 629500 }, { "epoch": 0.48, "learning_rate": 4.880009549438733e-05, "loss": 2.5656, "step": 630000 }, { "epoch": 0.48, "eval_accuracy": 0.5297416443914567, "eval_loss": 2.486067771911621, "eval_runtime": 9415.1025, "eval_samples_per_second": 29.207, "eval_steps_per_second": 7.302, "step": 630000 }, { "epoch": 0.48, "learning_rate": 4.879914283689208e-05, "loss": 2.5756, "step": 630500 }, { "epoch": 0.48, "learning_rate": 4.8798190179396844e-05, "loss": 2.5631, "step": 631000 }, { "epoch": 0.48, "learning_rate": 4.87972375219016e-05, "loss": 2.5694, "step": 631500 }, { "epoch": 0.48, "learning_rate": 4.8796284864406354e-05, "loss": 2.5522, "step": 632000 }, { "epoch": 0.48, "learning_rate": 4.879533220691111e-05, "loss": 2.5608, "step": 632500 }, { "epoch": 0.48, "learning_rate": 4.879437954941587e-05, "loss": 2.5621, "step": 633000 }, { "epoch": 0.48, "learning_rate": 4.879342689192063e-05, "loss": 2.5046, "step": 633500 }, { "epoch": 0.48, "learning_rate": 4.879247423442539e-05, "loss": 2.5398, "step": 634000 }, { "epoch": 0.48, "learning_rate": 4.8791521576930145e-05, "loss": 2.5526, "step": 634500 }, { "epoch": 0.48, "learning_rate": 4.8790568919434904e-05, "loss": 2.5357, "step": 635000 }, { "epoch": 0.48, "learning_rate": 4.878962007256964e-05, "loss": 2.5391, "step": 635500 }, { "epoch": 0.48, "learning_rate": 4.8788667415074393e-05, "loss": 2.5564, "step": 636000 }, { "epoch": 0.49, "learning_rate": 4.878771475757916e-05, "loss": 2.5525, "step": 636500 }, { "epoch": 0.49, "learning_rate": 4.878676210008391e-05, "loss": 2.5343, "step": 637000 }, { "epoch": 0.49, "learning_rate": 4.878580944258867e-05, "loss": 2.5426, "step": 637500 }, { "epoch": 0.49, "learning_rate": 4.878485678509343e-05, "loss": 2.5558, "step": 638000 }, { "epoch": 0.49, "learning_rate": 4.8783904127598185e-05, "loss": 2.5494, "step": 638500 }, { "epoch": 0.49, "learning_rate": 4.878295147010294e-05, "loss": 2.5582, "step": 639000 }, { "epoch": 0.49, "learning_rate": 4.87819988126077e-05, "loss": 2.565, "step": 639500 }, { "epoch": 0.49, "learning_rate": 4.878104806042745e-05, "loss": 2.5377, "step": 640000 }, { "epoch": 0.49, "learning_rate": 4.878009540293221e-05, "loss": 2.5619, "step": 640500 }, { "epoch": 0.49, "learning_rate": 4.877914274543696e-05, "loss": 2.5582, "step": 641000 }, { "epoch": 0.49, "learning_rate": 4.8778190087941725e-05, "loss": 2.541, "step": 641500 }, { "epoch": 0.49, "learning_rate": 4.877723933576147e-05, "loss": 2.5546, "step": 642000 }, { "epoch": 0.49, "learning_rate": 4.8776288583581214e-05, "loss": 2.5695, "step": 642500 }, { "epoch": 0.49, "learning_rate": 4.877533592608597e-05, "loss": 2.5482, "step": 643000 }, { "epoch": 0.49, "learning_rate": 4.877438517390572e-05, "loss": 2.5599, "step": 643500 }, { "epoch": 0.49, "learning_rate": 4.877343251641048e-05, "loss": 2.5551, "step": 644000 }, { "epoch": 0.49, "learning_rate": 4.877247985891524e-05, "loss": 2.5748, "step": 644500 }, { "epoch": 0.49, "learning_rate": 4.8771527201419996e-05, "loss": 2.5434, "step": 645000 }, { "epoch": 0.49, "learning_rate": 4.8770574543924754e-05, "loss": 2.5653, "step": 645500 }, { "epoch": 0.49, "learning_rate": 4.876962188642951e-05, "loss": 2.5685, "step": 646000 }, { "epoch": 0.49, "learning_rate": 4.8768669228934264e-05, "loss": 2.5399, "step": 646500 }, { "epoch": 0.49, "learning_rate": 4.876771657143903e-05, "loss": 2.5867, "step": 647000 }, { "epoch": 0.49, "learning_rate": 4.876676391394379e-05, "loss": 2.5336, "step": 647500 }, { "epoch": 0.49, "learning_rate": 4.876581316176353e-05, "loss": 2.5478, "step": 648000 }, { "epoch": 0.49, "learning_rate": 4.876486050426829e-05, "loss": 2.5523, "step": 648500 }, { "epoch": 0.49, "learning_rate": 4.876390784677305e-05, "loss": 2.5737, "step": 649000 }, { "epoch": 0.5, "learning_rate": 4.8762957094592794e-05, "loss": 2.5375, "step": 649500 }, { "epoch": 0.5, "learning_rate": 4.876200443709755e-05, "loss": 2.5383, "step": 650000 }, { "epoch": 0.5, "learning_rate": 4.876105177960231e-05, "loss": 2.5543, "step": 650500 }, { "epoch": 0.5, "learning_rate": 4.876009912210707e-05, "loss": 2.5613, "step": 651000 }, { "epoch": 0.5, "learning_rate": 4.875914646461183e-05, "loss": 2.567, "step": 651500 }, { "epoch": 0.5, "learning_rate": 4.875819380711658e-05, "loss": 2.5231, "step": 652000 }, { "epoch": 0.5, "learning_rate": 4.875724114962134e-05, "loss": 2.4986, "step": 652500 }, { "epoch": 0.5, "learning_rate": 4.8756288492126095e-05, "loss": 2.5637, "step": 653000 }, { "epoch": 0.5, "learning_rate": 4.875533583463085e-05, "loss": 2.5403, "step": 653500 }, { "epoch": 0.5, "learning_rate": 4.875438317713562e-05, "loss": 2.529, "step": 654000 }, { "epoch": 0.5, "learning_rate": 4.875343242495536e-05, "loss": 2.5484, "step": 654500 }, { "epoch": 0.5, "learning_rate": 4.875247976746012e-05, "loss": 2.5776, "step": 655000 }, { "epoch": 0.5, "learning_rate": 4.8751527109964876e-05, "loss": 2.5572, "step": 655500 }, { "epoch": 0.5, "learning_rate": 4.8750574452469635e-05, "loss": 2.5701, "step": 656000 }, { "epoch": 0.5, "learning_rate": 4.874962179497439e-05, "loss": 2.5078, "step": 656500 }, { "epoch": 0.5, "learning_rate": 4.8748669137479144e-05, "loss": 2.5622, "step": 657000 }, { "epoch": 0.5, "learning_rate": 4.874771647998391e-05, "loss": 2.5526, "step": 657500 }, { "epoch": 0.5, "learning_rate": 4.874676382248866e-05, "loss": 2.5706, "step": 658000 }, { "epoch": 0.5, "learning_rate": 4.874581116499342e-05, "loss": 2.5637, "step": 658500 }, { "epoch": 0.5, "learning_rate": 4.874486041281317e-05, "loss": 2.5495, "step": 659000 }, { "epoch": 0.5, "learning_rate": 4.874390966063292e-05, "loss": 2.5396, "step": 659500 }, { "epoch": 0.5, "learning_rate": 4.8742957003137674e-05, "loss": 2.5344, "step": 660000 }, { "epoch": 0.5, "eval_accuracy": 0.5306867278424278, "eval_loss": 2.4773244857788086, "eval_runtime": 9425.3328, "eval_samples_per_second": 29.175, "eval_steps_per_second": 7.294, "step": 660000 }, { "epoch": 0.5, "learning_rate": 4.874200434564243e-05, "loss": 2.5531, "step": 660500 }, { "epoch": 0.5, "learning_rate": 4.874105168814719e-05, "loss": 2.5221, "step": 661000 }, { "epoch": 0.5, "learning_rate": 4.874009903065195e-05, "loss": 2.59, "step": 661500 }, { "epoch": 0.5, "learning_rate": 4.873914637315671e-05, "loss": 2.567, "step": 662000 }, { "epoch": 0.5, "learning_rate": 4.873819562097645e-05, "loss": 2.5654, "step": 662500 }, { "epoch": 0.51, "learning_rate": 4.8737242963481214e-05, "loss": 2.5486, "step": 663000 }, { "epoch": 0.51, "learning_rate": 4.873629030598597e-05, "loss": 2.5288, "step": 663500 }, { "epoch": 0.51, "learning_rate": 4.8735337648490724e-05, "loss": 2.5396, "step": 664000 }, { "epoch": 0.51, "learning_rate": 4.873438499099549e-05, "loss": 2.5294, "step": 664500 }, { "epoch": 0.51, "learning_rate": 4.873343423881524e-05, "loss": 2.5545, "step": 665000 }, { "epoch": 0.51, "learning_rate": 4.873248158131999e-05, "loss": 2.5236, "step": 665500 }, { "epoch": 0.51, "learning_rate": 4.873152892382475e-05, "loss": 2.524, "step": 666000 }, { "epoch": 0.51, "learning_rate": 4.8730576266329505e-05, "loss": 2.5508, "step": 666500 }, { "epoch": 0.51, "learning_rate": 4.872962360883426e-05, "loss": 2.548, "step": 667000 }, { "epoch": 0.51, "learning_rate": 4.8728670951339015e-05, "loss": 2.552, "step": 667500 }, { "epoch": 0.51, "learning_rate": 4.872772019915876e-05, "loss": 2.5529, "step": 668000 }, { "epoch": 0.51, "learning_rate": 4.872676754166353e-05, "loss": 2.4969, "step": 668500 }, { "epoch": 0.51, "learning_rate": 4.8725816789483276e-05, "loss": 2.5471, "step": 669000 }, { "epoch": 0.51, "learning_rate": 4.872486413198803e-05, "loss": 2.5424, "step": 669500 }, { "epoch": 0.51, "learning_rate": 4.8723911474492786e-05, "loss": 2.5373, "step": 670000 }, { "epoch": 0.51, "learning_rate": 4.8722958816997545e-05, "loss": 2.5443, "step": 670500 }, { "epoch": 0.51, "learning_rate": 4.87220061595023e-05, "loss": 2.5299, "step": 671000 }, { "epoch": 0.51, "learning_rate": 4.872105540732205e-05, "loss": 2.5615, "step": 671500 }, { "epoch": 0.51, "learning_rate": 4.872010274982681e-05, "loss": 2.5374, "step": 672000 }, { "epoch": 0.51, "learning_rate": 4.871915009233157e-05, "loss": 2.5591, "step": 672500 }, { "epoch": 0.51, "learning_rate": 4.8718197434836326e-05, "loss": 2.5736, "step": 673000 }, { "epoch": 0.51, "learning_rate": 4.8717244777341084e-05, "loss": 2.5431, "step": 673500 }, { "epoch": 0.51, "learning_rate": 4.871629211984584e-05, "loss": 2.5586, "step": 674000 }, { "epoch": 0.51, "learning_rate": 4.8715339462350594e-05, "loss": 2.5191, "step": 674500 }, { "epoch": 0.51, "learning_rate": 4.871438680485535e-05, "loss": 2.5578, "step": 675000 }, { "epoch": 0.51, "learning_rate": 4.871343605267511e-05, "loss": 2.5528, "step": 675500 }, { "epoch": 0.52, "learning_rate": 4.871248339517986e-05, "loss": 2.5669, "step": 676000 }, { "epoch": 0.52, "learning_rate": 4.871153073768462e-05, "loss": 2.5569, "step": 676500 }, { "epoch": 0.52, "learning_rate": 4.8710578080189375e-05, "loss": 2.5039, "step": 677000 }, { "epoch": 0.52, "learning_rate": 4.8709625422694134e-05, "loss": 2.5063, "step": 677500 }, { "epoch": 0.52, "learning_rate": 4.870867276519889e-05, "loss": 2.5292, "step": 678000 }, { "epoch": 0.52, "learning_rate": 4.870772010770365e-05, "loss": 2.5737, "step": 678500 }, { "epoch": 0.52, "learning_rate": 4.870676745020841e-05, "loss": 2.5232, "step": 679000 }, { "epoch": 0.52, "learning_rate": 4.870581479271316e-05, "loss": 2.5348, "step": 679500 }, { "epoch": 0.52, "learning_rate": 4.870486404053291e-05, "loss": 2.5561, "step": 680000 }, { "epoch": 0.52, "learning_rate": 4.8703911383037673e-05, "loss": 2.5313, "step": 680500 }, { "epoch": 0.52, "learning_rate": 4.870296063085742e-05, "loss": 2.573, "step": 681000 }, { "epoch": 0.52, "learning_rate": 4.870200797336217e-05, "loss": 2.5602, "step": 681500 }, { "epoch": 0.52, "learning_rate": 4.870105531586693e-05, "loss": 2.513, "step": 682000 }, { "epoch": 0.52, "learning_rate": 4.870010265837169e-05, "loss": 2.5733, "step": 682500 }, { "epoch": 0.52, "learning_rate": 4.869915190619144e-05, "loss": 2.5505, "step": 683000 }, { "epoch": 0.52, "learning_rate": 4.8698199248696196e-05, "loss": 2.5238, "step": 683500 }, { "epoch": 0.52, "learning_rate": 4.869724659120095e-05, "loss": 2.5421, "step": 684000 }, { "epoch": 0.52, "learning_rate": 4.869629393370571e-05, "loss": 2.5464, "step": 684500 }, { "epoch": 0.52, "learning_rate": 4.869534318152546e-05, "loss": 2.5229, "step": 685000 }, { "epoch": 0.52, "learning_rate": 4.869439052403021e-05, "loss": 2.527, "step": 685500 }, { "epoch": 0.52, "learning_rate": 4.869343977184996e-05, "loss": 2.5014, "step": 686000 }, { "epoch": 0.52, "learning_rate": 4.8692487114354726e-05, "loss": 2.5508, "step": 686500 }, { "epoch": 0.52, "learning_rate": 4.869153445685948e-05, "loss": 2.5354, "step": 687000 }, { "epoch": 0.52, "learning_rate": 4.8690581799364236e-05, "loss": 2.5611, "step": 687500 }, { "epoch": 0.52, "learning_rate": 4.8689629141868994e-05, "loss": 2.5014, "step": 688000 }, { "epoch": 0.52, "learning_rate": 4.868867648437375e-05, "loss": 2.5469, "step": 688500 }, { "epoch": 0.53, "learning_rate": 4.86877257321935e-05, "loss": 2.5727, "step": 689000 }, { "epoch": 0.53, "learning_rate": 4.868677307469825e-05, "loss": 2.5306, "step": 689500 }, { "epoch": 0.53, "learning_rate": 4.868582041720302e-05, "loss": 2.5202, "step": 690000 }, { "epoch": 0.53, "eval_accuracy": 0.532116129947965, "eval_loss": 2.4686408042907715, "eval_runtime": 9410.3379, "eval_samples_per_second": 29.222, "eval_steps_per_second": 7.305, "step": 690000 }, { "epoch": 0.53, "learning_rate": 4.8684867759707776e-05, "loss": 2.5555, "step": 690500 }, { "epoch": 0.53, "learning_rate": 4.868391700752752e-05, "loss": 2.5307, "step": 691000 }, { "epoch": 0.53, "learning_rate": 4.8682964350032275e-05, "loss": 2.5264, "step": 691500 }, { "epoch": 0.53, "learning_rate": 4.868201169253704e-05, "loss": 2.5466, "step": 692000 }, { "epoch": 0.53, "learning_rate": 4.868105903504179e-05, "loss": 2.5391, "step": 692500 }, { "epoch": 0.53, "learning_rate": 4.868010637754655e-05, "loss": 2.5694, "step": 693000 }, { "epoch": 0.53, "learning_rate": 4.867915372005131e-05, "loss": 2.5597, "step": 693500 }, { "epoch": 0.53, "learning_rate": 4.867820296787106e-05, "loss": 2.5412, "step": 694000 }, { "epoch": 0.53, "learning_rate": 4.8677250310375815e-05, "loss": 2.5648, "step": 694500 }, { "epoch": 0.53, "learning_rate": 4.8676297652880573e-05, "loss": 2.5592, "step": 695000 }, { "epoch": 0.53, "learning_rate": 4.867534499538533e-05, "loss": 2.5439, "step": 695500 }, { "epoch": 0.53, "learning_rate": 4.867439233789008e-05, "loss": 2.5169, "step": 696000 }, { "epoch": 0.53, "learning_rate": 4.867343968039484e-05, "loss": 2.5515, "step": 696500 }, { "epoch": 0.53, "learning_rate": 4.8672488928214597e-05, "loss": 2.5607, "step": 697000 }, { "epoch": 0.53, "learning_rate": 4.867153627071935e-05, "loss": 2.5531, "step": 697500 }, { "epoch": 0.53, "learning_rate": 4.8670583613224106e-05, "loss": 2.5409, "step": 698000 }, { "epoch": 0.53, "learning_rate": 4.8669630955728865e-05, "loss": 2.5239, "step": 698500 }, { "epoch": 0.53, "learning_rate": 4.866867829823362e-05, "loss": 2.5463, "step": 699000 }, { "epoch": 0.53, "learning_rate": 4.866772754605337e-05, "loss": 2.5302, "step": 699500 }, { "epoch": 0.53, "learning_rate": 4.866677488855813e-05, "loss": 2.5167, "step": 700000 }, { "epoch": 0.53, "learning_rate": 4.866582223106289e-05, "loss": 2.5443, "step": 700500 }, { "epoch": 0.53, "learning_rate": 4.8664869573567646e-05, "loss": 2.5615, "step": 701000 }, { "epoch": 0.53, "learning_rate": 4.8663918821387394e-05, "loss": 2.552, "step": 701500 }, { "epoch": 0.54, "learning_rate": 4.8662968069207136e-05, "loss": 2.5655, "step": 702000 }, { "epoch": 0.54, "learning_rate": 4.86620154117119e-05, "loss": 2.5419, "step": 702500 }, { "epoch": 0.54, "learning_rate": 4.866106275421666e-05, "loss": 2.5475, "step": 703000 }, { "epoch": 0.54, "learning_rate": 4.866011009672141e-05, "loss": 2.5282, "step": 703500 }, { "epoch": 0.54, "learning_rate": 4.865915743922617e-05, "loss": 2.5345, "step": 704000 }, { "epoch": 0.54, "learning_rate": 4.865820478173093e-05, "loss": 2.5373, "step": 704500 }, { "epoch": 0.54, "learning_rate": 4.8657252124235686e-05, "loss": 2.5229, "step": 705000 }, { "epoch": 0.54, "learning_rate": 4.865629946674044e-05, "loss": 2.5327, "step": 705500 }, { "epoch": 0.54, "learning_rate": 4.86553468092452e-05, "loss": 2.526, "step": 706000 }, { "epoch": 0.54, "learning_rate": 4.865439605706495e-05, "loss": 2.5441, "step": 706500 }, { "epoch": 0.54, "learning_rate": 4.86534433995697e-05, "loss": 2.5054, "step": 707000 }, { "epoch": 0.54, "learning_rate": 4.865249074207446e-05, "loss": 2.4997, "step": 707500 }, { "epoch": 0.54, "learning_rate": 4.8651538084579225e-05, "loss": 2.5454, "step": 708000 }, { "epoch": 0.54, "learning_rate": 4.865058542708398e-05, "loss": 2.5189, "step": 708500 }, { "epoch": 0.54, "learning_rate": 4.8649632769588735e-05, "loss": 2.5273, "step": 709000 }, { "epoch": 0.54, "learning_rate": 4.8648680112093493e-05, "loss": 2.4977, "step": 709500 }, { "epoch": 0.54, "learning_rate": 4.864772935991324e-05, "loss": 2.5801, "step": 710000 }, { "epoch": 0.54, "learning_rate": 4.8646776702418e-05, "loss": 2.5558, "step": 710500 }, { "epoch": 0.54, "learning_rate": 4.864582404492276e-05, "loss": 2.5305, "step": 711000 }, { "epoch": 0.54, "learning_rate": 4.8644871387427517e-05, "loss": 2.4888, "step": 711500 }, { "epoch": 0.54, "learning_rate": 4.8643920635247265e-05, "loss": 2.534, "step": 712000 }, { "epoch": 0.54, "learning_rate": 4.8642967977752016e-05, "loss": 2.5377, "step": 712500 }, { "epoch": 0.54, "learning_rate": 4.864201532025678e-05, "loss": 2.5441, "step": 713000 }, { "epoch": 0.54, "learning_rate": 4.864106266276153e-05, "loss": 2.5437, "step": 713500 }, { "epoch": 0.54, "learning_rate": 4.864011000526629e-05, "loss": 2.5334, "step": 714000 }, { "epoch": 0.54, "learning_rate": 4.863915734777105e-05, "loss": 2.5425, "step": 714500 }, { "epoch": 0.54, "learning_rate": 4.863820469027581e-05, "loss": 2.5258, "step": 715000 }, { "epoch": 0.55, "learning_rate": 4.8637252032780566e-05, "loss": 2.5401, "step": 715500 }, { "epoch": 0.55, "learning_rate": 4.8636299375285324e-05, "loss": 2.5349, "step": 716000 }, { "epoch": 0.55, "learning_rate": 4.863534862310507e-05, "loss": 2.5199, "step": 716500 }, { "epoch": 0.55, "learning_rate": 4.863439596560983e-05, "loss": 2.5394, "step": 717000 }, { "epoch": 0.55, "learning_rate": 4.863344330811458e-05, "loss": 2.5368, "step": 717500 }, { "epoch": 0.55, "learning_rate": 4.863249065061935e-05, "loss": 2.5311, "step": 718000 }, { "epoch": 0.55, "learning_rate": 4.86315379931241e-05, "loss": 2.5372, "step": 718500 }, { "epoch": 0.55, "learning_rate": 4.863058533562886e-05, "loss": 2.5266, "step": 719000 }, { "epoch": 0.55, "learning_rate": 4.8629634583448606e-05, "loss": 2.5009, "step": 719500 }, { "epoch": 0.55, "learning_rate": 4.8628681925953364e-05, "loss": 2.5152, "step": 720000 }, { "epoch": 0.55, "eval_accuracy": 0.5331774896893751, "eval_loss": 2.461303949356079, "eval_runtime": 9436.303, "eval_samples_per_second": 29.141, "eval_steps_per_second": 7.285, "step": 720000 }, { "epoch": 0.55, "learning_rate": 4.862772926845812e-05, "loss": 2.5644, "step": 720500 }, { "epoch": 0.55, "learning_rate": 4.862677661096288e-05, "loss": 2.5311, "step": 721000 }, { "epoch": 0.55, "learning_rate": 4.862582395346764e-05, "loss": 2.5312, "step": 721500 }, { "epoch": 0.55, "learning_rate": 4.862487320128739e-05, "loss": 2.5506, "step": 722000 }, { "epoch": 0.55, "learning_rate": 4.8623920543792145e-05, "loss": 2.5207, "step": 722500 }, { "epoch": 0.55, "learning_rate": 4.86229678862969e-05, "loss": 2.5311, "step": 723000 }, { "epoch": 0.55, "learning_rate": 4.862201713411665e-05, "loss": 2.5135, "step": 723500 }, { "epoch": 0.55, "learning_rate": 4.862106447662141e-05, "loss": 2.5328, "step": 724000 }, { "epoch": 0.55, "learning_rate": 4.862011181912616e-05, "loss": 2.5216, "step": 724500 }, { "epoch": 0.55, "learning_rate": 4.861915916163092e-05, "loss": 2.5389, "step": 725000 }, { "epoch": 0.55, "learning_rate": 4.861820650413568e-05, "loss": 2.5443, "step": 725500 }, { "epoch": 0.55, "learning_rate": 4.8617253846640437e-05, "loss": 2.5346, "step": 726000 }, { "epoch": 0.55, "learning_rate": 4.8616301189145195e-05, "loss": 2.4951, "step": 726500 }, { "epoch": 0.55, "learning_rate": 4.861534853164995e-05, "loss": 2.5471, "step": 727000 }, { "epoch": 0.55, "learning_rate": 4.86143977794697e-05, "loss": 2.5386, "step": 727500 }, { "epoch": 0.55, "learning_rate": 4.861344512197445e-05, "loss": 2.5442, "step": 728000 }, { "epoch": 0.56, "learning_rate": 4.861249246447921e-05, "loss": 2.5288, "step": 728500 }, { "epoch": 0.56, "learning_rate": 4.8611539806983976e-05, "loss": 2.5318, "step": 729000 }, { "epoch": 0.56, "learning_rate": 4.861058905480372e-05, "loss": 2.5224, "step": 729500 }, { "epoch": 0.56, "learning_rate": 4.8609636397308476e-05, "loss": 2.5255, "step": 730000 }, { "epoch": 0.56, "learning_rate": 4.860868373981324e-05, "loss": 2.5103, "step": 730500 }, { "epoch": 0.56, "learning_rate": 4.860773108231799e-05, "loss": 2.5233, "step": 731000 }, { "epoch": 0.56, "learning_rate": 4.860677842482275e-05, "loss": 2.5143, "step": 731500 }, { "epoch": 0.56, "learning_rate": 4.860582576732751e-05, "loss": 2.4888, "step": 732000 }, { "epoch": 0.56, "learning_rate": 4.860487310983227e-05, "loss": 2.5293, "step": 732500 }, { "epoch": 0.56, "learning_rate": 4.8603920452337026e-05, "loss": 2.5312, "step": 733000 }, { "epoch": 0.56, "learning_rate": 4.860296779484178e-05, "loss": 2.5259, "step": 733500 }, { "epoch": 0.56, "learning_rate": 4.860201513734654e-05, "loss": 2.533, "step": 734000 }, { "epoch": 0.56, "learning_rate": 4.860106438516629e-05, "loss": 2.529, "step": 734500 }, { "epoch": 0.56, "learning_rate": 4.860011172767104e-05, "loss": 2.5312, "step": 735000 }, { "epoch": 0.56, "learning_rate": 4.85991590701758e-05, "loss": 2.5207, "step": 735500 }, { "epoch": 0.56, "learning_rate": 4.859820641268056e-05, "loss": 2.5198, "step": 736000 }, { "epoch": 0.56, "learning_rate": 4.859725375518532e-05, "loss": 2.4997, "step": 736500 }, { "epoch": 0.56, "learning_rate": 4.8596303003005065e-05, "loss": 2.5366, "step": 737000 }, { "epoch": 0.56, "learning_rate": 4.8595350345509824e-05, "loss": 2.5173, "step": 737500 }, { "epoch": 0.56, "learning_rate": 4.859439768801458e-05, "loss": 2.5266, "step": 738000 }, { "epoch": 0.56, "learning_rate": 4.8593445030519333e-05, "loss": 2.5798, "step": 738500 }, { "epoch": 0.56, "learning_rate": 4.85924923730241e-05, "loss": 2.5424, "step": 739000 }, { "epoch": 0.56, "learning_rate": 4.859154162084385e-05, "loss": 2.5759, "step": 739500 }, { "epoch": 0.56, "learning_rate": 4.85905889633486e-05, "loss": 2.5127, "step": 740000 }, { "epoch": 0.56, "learning_rate": 4.8589636305853357e-05, "loss": 2.5299, "step": 740500 }, { "epoch": 0.56, "learning_rate": 4.858868364835812e-05, "loss": 2.5405, "step": 741000 }, { "epoch": 0.57, "learning_rate": 4.858773099086287e-05, "loss": 2.5254, "step": 741500 }, { "epoch": 0.57, "learning_rate": 4.858677833336763e-05, "loss": 2.5503, "step": 742000 }, { "epoch": 0.57, "learning_rate": 4.858582567587239e-05, "loss": 2.5077, "step": 742500 }, { "epoch": 0.57, "learning_rate": 4.858487492369214e-05, "loss": 2.5035, "step": 743000 }, { "epoch": 0.57, "learning_rate": 4.8583922266196896e-05, "loss": 2.5126, "step": 743500 }, { "epoch": 0.57, "learning_rate": 4.858296960870165e-05, "loss": 2.5186, "step": 744000 }, { "epoch": 0.57, "learning_rate": 4.858201695120641e-05, "loss": 2.5173, "step": 744500 }, { "epoch": 0.57, "learning_rate": 4.8581064293711164e-05, "loss": 2.5233, "step": 745000 }, { "epoch": 0.57, "learning_rate": 4.858011163621592e-05, "loss": 2.5409, "step": 745500 }, { "epoch": 0.57, "learning_rate": 4.857915897872069e-05, "loss": 2.5061, "step": 746000 }, { "epoch": 0.57, "learning_rate": 4.857820822654043e-05, "loss": 2.5157, "step": 746500 }, { "epoch": 0.57, "learning_rate": 4.857725556904519e-05, "loss": 2.5046, "step": 747000 }, { "epoch": 0.57, "learning_rate": 4.8576302911549946e-05, "loss": 2.5362, "step": 747500 }, { "epoch": 0.57, "learning_rate": 4.8575350254054704e-05, "loss": 2.529, "step": 748000 }, { "epoch": 0.57, "learning_rate": 4.857439759655946e-05, "loss": 2.5313, "step": 748500 }, { "epoch": 0.57, "learning_rate": 4.8573444939064214e-05, "loss": 2.5468, "step": 749000 }, { "epoch": 0.57, "learning_rate": 4.857249228156898e-05, "loss": 2.5012, "step": 749500 }, { "epoch": 0.57, "learning_rate": 4.857154152938873e-05, "loss": 2.501, "step": 750000 }, { "epoch": 0.57, "eval_accuracy": 0.5342951817565138, "eval_loss": 2.4543516635894775, "eval_runtime": 9417.671, "eval_samples_per_second": 29.199, "eval_steps_per_second": 7.3, "step": 750000 }, { "epoch": 0.57, "learning_rate": 4.857058887189348e-05, "loss": 2.4986, "step": 750500 }, { "epoch": 0.57, "learning_rate": 4.856963621439824e-05, "loss": 2.5288, "step": 751000 }, { "epoch": 0.57, "learning_rate": 4.8568683556902995e-05, "loss": 2.5196, "step": 751500 }, { "epoch": 0.57, "learning_rate": 4.8567732804722744e-05, "loss": 2.4986, "step": 752000 }, { "epoch": 0.57, "learning_rate": 4.85667801472275e-05, "loss": 2.4932, "step": 752500 }, { "epoch": 0.57, "learning_rate": 4.856582748973226e-05, "loss": 2.5125, "step": 753000 }, { "epoch": 0.57, "learning_rate": 4.856487483223702e-05, "loss": 2.5352, "step": 753500 }, { "epoch": 0.57, "learning_rate": 4.856392217474178e-05, "loss": 2.5109, "step": 754000 }, { "epoch": 0.58, "learning_rate": 4.856297142256152e-05, "loss": 2.5321, "step": 754500 }, { "epoch": 0.58, "learning_rate": 4.856201876506628e-05, "loss": 2.5216, "step": 755000 }, { "epoch": 0.58, "learning_rate": 4.856106610757104e-05, "loss": 2.5253, "step": 755500 }, { "epoch": 0.58, "learning_rate": 4.856011345007579e-05, "loss": 2.5162, "step": 756000 }, { "epoch": 0.58, "learning_rate": 4.855916079258055e-05, "loss": 2.5036, "step": 756500 }, { "epoch": 0.58, "learning_rate": 4.855820813508531e-05, "loss": 2.556, "step": 757000 }, { "epoch": 0.58, "learning_rate": 4.855725738290506e-05, "loss": 2.542, "step": 757500 }, { "epoch": 0.58, "learning_rate": 4.8556304725409816e-05, "loss": 2.5479, "step": 758000 }, { "epoch": 0.58, "learning_rate": 4.8555352067914575e-05, "loss": 2.548, "step": 758500 }, { "epoch": 0.58, "learning_rate": 4.855440131573432e-05, "loss": 2.5394, "step": 759000 }, { "epoch": 0.58, "learning_rate": 4.855344865823908e-05, "loss": 2.5406, "step": 759500 }, { "epoch": 0.58, "learning_rate": 4.855249600074383e-05, "loss": 2.5179, "step": 760000 }, { "epoch": 0.58, "learning_rate": 4.85515433432486e-05, "loss": 2.5343, "step": 760500 }, { "epoch": 0.58, "learning_rate": 4.855059068575335e-05, "loss": 2.4971, "step": 761000 }, { "epoch": 0.58, "learning_rate": 4.854963802825811e-05, "loss": 2.5338, "step": 761500 }, { "epoch": 0.58, "learning_rate": 4.854868537076287e-05, "loss": 2.5017, "step": 762000 }, { "epoch": 0.58, "learning_rate": 4.8547732713267624e-05, "loss": 2.5356, "step": 762500 }, { "epoch": 0.58, "learning_rate": 4.854678005577238e-05, "loss": 2.5034, "step": 763000 }, { "epoch": 0.58, "learning_rate": 4.854582739827714e-05, "loss": 2.5402, "step": 763500 }, { "epoch": 0.58, "learning_rate": 4.85448747407819e-05, "loss": 2.5489, "step": 764000 }, { "epoch": 0.58, "learning_rate": 4.854392398860165e-05, "loss": 2.5176, "step": 764500 }, { "epoch": 0.58, "learning_rate": 4.85429713311064e-05, "loss": 2.5373, "step": 765000 }, { "epoch": 0.58, "learning_rate": 4.8542018673611164e-05, "loss": 2.5225, "step": 765500 }, { "epoch": 0.58, "learning_rate": 4.854106601611592e-05, "loss": 2.5459, "step": 766000 }, { "epoch": 0.58, "learning_rate": 4.8540113358620674e-05, "loss": 2.5122, "step": 766500 }, { "epoch": 0.58, "learning_rate": 4.853916070112544e-05, "loss": 2.5329, "step": 767000 }, { "epoch": 0.58, "learning_rate": 4.853820804363019e-05, "loss": 2.5147, "step": 767500 }, { "epoch": 0.59, "learning_rate": 4.853725538613495e-05, "loss": 2.5107, "step": 768000 }, { "epoch": 0.59, "learning_rate": 4.85363046339547e-05, "loss": 2.5306, "step": 768500 }, { "epoch": 0.59, "learning_rate": 4.8535351976459455e-05, "loss": 2.5514, "step": 769000 }, { "epoch": 0.59, "learning_rate": 4.853439931896421e-05, "loss": 2.5209, "step": 769500 }, { "epoch": 0.59, "learning_rate": 4.8533446661468965e-05, "loss": 2.5557, "step": 770000 }, { "epoch": 0.59, "learning_rate": 4.853249400397373e-05, "loss": 2.5025, "step": 770500 }, { "epoch": 0.59, "learning_rate": 4.853154325179348e-05, "loss": 2.5243, "step": 771000 }, { "epoch": 0.59, "learning_rate": 4.8530592499613226e-05, "loss": 2.5324, "step": 771500 }, { "epoch": 0.59, "learning_rate": 4.852963984211798e-05, "loss": 2.5651, "step": 772000 }, { "epoch": 0.59, "learning_rate": 4.852868718462274e-05, "loss": 2.5142, "step": 772500 }, { "epoch": 0.59, "learning_rate": 4.852773643244249e-05, "loss": 2.5296, "step": 773000 }, { "epoch": 0.59, "learning_rate": 4.852678377494724e-05, "loss": 2.5006, "step": 773500 }, { "epoch": 0.59, "learning_rate": 4.8525831117452e-05, "loss": 2.5112, "step": 774000 }, { "epoch": 0.59, "learning_rate": 4.852487845995676e-05, "loss": 2.5575, "step": 774500 }, { "epoch": 0.59, "learning_rate": 4.852392580246152e-05, "loss": 2.5241, "step": 775000 }, { "epoch": 0.59, "learning_rate": 4.8522975050281266e-05, "loss": 2.5181, "step": 775500 }, { "epoch": 0.59, "learning_rate": 4.852202239278602e-05, "loss": 2.4925, "step": 776000 }, { "epoch": 0.59, "learning_rate": 4.852106973529078e-05, "loss": 2.5104, "step": 776500 }, { "epoch": 0.59, "learning_rate": 4.8520117077795534e-05, "loss": 2.5167, "step": 777000 }, { "epoch": 0.59, "learning_rate": 4.851916442030029e-05, "loss": 2.5447, "step": 777500 }, { "epoch": 0.59, "learning_rate": 4.851821176280506e-05, "loss": 2.5358, "step": 778000 }, { "epoch": 0.59, "learning_rate": 4.851725910530981e-05, "loss": 2.5225, "step": 778500 }, { "epoch": 0.59, "learning_rate": 4.851630644781457e-05, "loss": 2.5213, "step": 779000 }, { "epoch": 0.59, "learning_rate": 4.8515353790319325e-05, "loss": 2.5208, "step": 779500 }, { "epoch": 0.59, "learning_rate": 4.8514403038139074e-05, "loss": 2.5117, "step": 780000 }, { "epoch": 0.59, "eval_accuracy": 0.5350785554939919, "eval_loss": 2.4482388496398926, "eval_runtime": 9410.4392, "eval_samples_per_second": 29.222, "eval_steps_per_second": 7.305, "step": 780000 }, { "epoch": 0.59, "learning_rate": 4.851345228595882e-05, "loss": 2.4966, "step": 780500 }, { "epoch": 0.6, "learning_rate": 4.851249962846358e-05, "loss": 2.5239, "step": 781000 }, { "epoch": 0.6, "learning_rate": 4.851154697096834e-05, "loss": 2.5053, "step": 781500 }, { "epoch": 0.6, "learning_rate": 4.85105943134731e-05, "loss": 2.5289, "step": 782000 }, { "epoch": 0.6, "learning_rate": 4.850964165597785e-05, "loss": 2.4993, "step": 782500 }, { "epoch": 0.6, "learning_rate": 4.850868899848261e-05, "loss": 2.4985, "step": 783000 }, { "epoch": 0.6, "learning_rate": 4.850773824630236e-05, "loss": 2.5329, "step": 783500 }, { "epoch": 0.6, "learning_rate": 4.850678558880711e-05, "loss": 2.4752, "step": 784000 }, { "epoch": 0.6, "learning_rate": 4.850583293131187e-05, "loss": 2.5238, "step": 784500 }, { "epoch": 0.6, "learning_rate": 4.850488027381663e-05, "loss": 2.525, "step": 785000 }, { "epoch": 0.6, "learning_rate": 4.850392761632139e-05, "loss": 2.5306, "step": 785500 }, { "epoch": 0.6, "learning_rate": 4.8502974958826146e-05, "loss": 2.5225, "step": 786000 }, { "epoch": 0.6, "learning_rate": 4.8502022301330905e-05, "loss": 2.5329, "step": 786500 }, { "epoch": 0.6, "learning_rate": 4.850106964383566e-05, "loss": 2.5193, "step": 787000 }, { "epoch": 0.6, "learning_rate": 4.8500116986340415e-05, "loss": 2.5076, "step": 787500 }, { "epoch": 0.6, "learning_rate": 4.849916432884517e-05, "loss": 2.5274, "step": 788000 }, { "epoch": 0.6, "learning_rate": 4.849821167134994e-05, "loss": 2.5147, "step": 788500 }, { "epoch": 0.6, "learning_rate": 4.849725901385469e-05, "loss": 2.5143, "step": 789000 }, { "epoch": 0.6, "learning_rate": 4.849630826167444e-05, "loss": 2.52, "step": 789500 }, { "epoch": 0.6, "learning_rate": 4.8495355604179196e-05, "loss": 2.518, "step": 790000 }, { "epoch": 0.6, "learning_rate": 4.8494402946683954e-05, "loss": 2.5346, "step": 790500 }, { "epoch": 0.6, "learning_rate": 4.84934521945037e-05, "loss": 2.509, "step": 791000 }, { "epoch": 0.6, "learning_rate": 4.849249953700846e-05, "loss": 2.5212, "step": 791500 }, { "epoch": 0.6, "learning_rate": 4.849154687951322e-05, "loss": 2.5395, "step": 792000 }, { "epoch": 0.6, "learning_rate": 4.849059422201798e-05, "loss": 2.5189, "step": 792500 }, { "epoch": 0.6, "learning_rate": 4.848964156452273e-05, "loss": 2.5435, "step": 793000 }, { "epoch": 0.6, "learning_rate": 4.848869081234248e-05, "loss": 2.5293, "step": 793500 }, { "epoch": 0.61, "learning_rate": 4.848774006016223e-05, "loss": 2.4901, "step": 794000 }, { "epoch": 0.61, "learning_rate": 4.8486787402666984e-05, "loss": 2.5383, "step": 794500 }, { "epoch": 0.61, "learning_rate": 4.848583474517174e-05, "loss": 2.5224, "step": 795000 }, { "epoch": 0.61, "learning_rate": 4.84848820876765e-05, "loss": 2.5309, "step": 795500 }, { "epoch": 0.61, "learning_rate": 4.848392943018126e-05, "loss": 2.533, "step": 796000 }, { "epoch": 0.61, "learning_rate": 4.848297677268602e-05, "loss": 2.4852, "step": 796500 }, { "epoch": 0.61, "learning_rate": 4.848202411519077e-05, "loss": 2.5018, "step": 797000 }, { "epoch": 0.61, "learning_rate": 4.8481071457695533e-05, "loss": 2.5226, "step": 797500 }, { "epoch": 0.61, "learning_rate": 4.848012070551528e-05, "loss": 2.525, "step": 798000 }, { "epoch": 0.61, "learning_rate": 4.847916804802003e-05, "loss": 2.4942, "step": 798500 }, { "epoch": 0.61, "learning_rate": 4.847821729583978e-05, "loss": 2.511, "step": 799000 }, { "epoch": 0.61, "learning_rate": 4.8477264638344547e-05, "loss": 2.4991, "step": 799500 }, { "epoch": 0.61, "learning_rate": 4.84763119808493e-05, "loss": 2.568, "step": 800000 }, { "epoch": 0.61, "learning_rate": 4.8475359323354056e-05, "loss": 2.5072, "step": 800500 }, { "epoch": 0.61, "learning_rate": 4.8474406665858815e-05, "loss": 2.5354, "step": 801000 }, { "epoch": 0.61, "learning_rate": 4.847345400836357e-05, "loss": 2.5272, "step": 801500 }, { "epoch": 0.61, "learning_rate": 4.847250325618332e-05, "loss": 2.5244, "step": 802000 }, { "epoch": 0.61, "learning_rate": 4.847155059868808e-05, "loss": 2.5137, "step": 802500 }, { "epoch": 0.61, "learning_rate": 4.847059794119284e-05, "loss": 2.5394, "step": 803000 }, { "epoch": 0.61, "learning_rate": 4.8469645283697596e-05, "loss": 2.4671, "step": 803500 }, { "epoch": 0.61, "learning_rate": 4.846869453151734e-05, "loss": 2.4784, "step": 804000 }, { "epoch": 0.61, "learning_rate": 4.8467741874022096e-05, "loss": 2.5371, "step": 804500 }, { "epoch": 0.61, "learning_rate": 4.846678921652686e-05, "loss": 2.5387, "step": 805000 }, { "epoch": 0.61, "learning_rate": 4.846583655903161e-05, "loss": 2.4954, "step": 805500 }, { "epoch": 0.61, "learning_rate": 4.846488580685136e-05, "loss": 2.5118, "step": 806000 }, { "epoch": 0.61, "learning_rate": 4.846393314935612e-05, "loss": 2.5114, "step": 806500 }, { "epoch": 0.62, "learning_rate": 4.846298049186088e-05, "loss": 2.5082, "step": 807000 }, { "epoch": 0.62, "learning_rate": 4.8462027834365636e-05, "loss": 2.5428, "step": 807500 }, { "epoch": 0.62, "learning_rate": 4.8461075176870394e-05, "loss": 2.5115, "step": 808000 }, { "epoch": 0.62, "learning_rate": 4.846012251937515e-05, "loss": 2.5144, "step": 808500 }, { "epoch": 0.62, "learning_rate": 4.845916986187991e-05, "loss": 2.4977, "step": 809000 }, { "epoch": 0.62, "learning_rate": 4.845821720438466e-05, "loss": 2.5214, "step": 809500 }, { "epoch": 0.62, "learning_rate": 4.845726454688943e-05, "loss": 2.533, "step": 810000 }, { "epoch": 0.62, "eval_accuracy": 0.5359159402971051, "eval_loss": 2.441509962081909, "eval_runtime": 9410.4578, "eval_samples_per_second": 29.222, "eval_steps_per_second": 7.305, "step": 810000 }, { "epoch": 0.62, "learning_rate": 4.845631188939418e-05, "loss": 2.5166, "step": 810500 }, { "epoch": 0.62, "learning_rate": 4.845535923189894e-05, "loss": 2.5196, "step": 811000 }, { "epoch": 0.62, "learning_rate": 4.8454406574403695e-05, "loss": 2.5294, "step": 811500 }, { "epoch": 0.62, "learning_rate": 4.8453453916908453e-05, "loss": 2.5061, "step": 812000 }, { "epoch": 0.62, "learning_rate": 4.84525031647282e-05, "loss": 2.4878, "step": 812500 }, { "epoch": 0.62, "learning_rate": 4.845155050723296e-05, "loss": 2.5158, "step": 813000 }, { "epoch": 0.62, "learning_rate": 4.845059975505271e-05, "loss": 2.4952, "step": 813500 }, { "epoch": 0.62, "learning_rate": 4.8449647097557467e-05, "loss": 2.5196, "step": 814000 }, { "epoch": 0.62, "learning_rate": 4.844869444006222e-05, "loss": 2.5308, "step": 814500 }, { "epoch": 0.62, "learning_rate": 4.844774178256698e-05, "loss": 2.53, "step": 815000 }, { "epoch": 0.62, "learning_rate": 4.844679103038673e-05, "loss": 2.4882, "step": 815500 }, { "epoch": 0.62, "learning_rate": 4.844583837289148e-05, "loss": 2.5313, "step": 816000 }, { "epoch": 0.62, "learning_rate": 4.844488571539624e-05, "loss": 2.4912, "step": 816500 }, { "epoch": 0.62, "learning_rate": 4.8443933057901006e-05, "loss": 2.4797, "step": 817000 }, { "epoch": 0.62, "learning_rate": 4.844298040040576e-05, "loss": 2.4845, "step": 817500 }, { "epoch": 0.62, "learning_rate": 4.8442027742910516e-05, "loss": 2.5227, "step": 818000 }, { "epoch": 0.62, "learning_rate": 4.8441075085415274e-05, "loss": 2.5142, "step": 818500 }, { "epoch": 0.62, "learning_rate": 4.844012433323502e-05, "loss": 2.5353, "step": 819000 }, { "epoch": 0.62, "learning_rate": 4.843917167573978e-05, "loss": 2.5149, "step": 819500 }, { "epoch": 0.62, "learning_rate": 4.843821901824453e-05, "loss": 2.4849, "step": 820000 }, { "epoch": 0.63, "learning_rate": 4.84372663607493e-05, "loss": 2.4997, "step": 820500 }, { "epoch": 0.63, "learning_rate": 4.843631370325405e-05, "loss": 2.5269, "step": 821000 }, { "epoch": 0.63, "learning_rate": 4.843536104575881e-05, "loss": 2.5204, "step": 821500 }, { "epoch": 0.63, "learning_rate": 4.843440838826357e-05, "loss": 2.4866, "step": 822000 }, { "epoch": 0.63, "learning_rate": 4.8433455730768324e-05, "loss": 2.5257, "step": 822500 }, { "epoch": 0.63, "learning_rate": 4.843250307327308e-05, "loss": 2.5165, "step": 823000 }, { "epoch": 0.63, "learning_rate": 4.843155041577784e-05, "loss": 2.5304, "step": 823500 }, { "epoch": 0.63, "learning_rate": 4.84305977582826e-05, "loss": 2.5032, "step": 824000 }, { "epoch": 0.63, "learning_rate": 4.842964700610235e-05, "loss": 2.5299, "step": 824500 }, { "epoch": 0.63, "learning_rate": 4.84286943486071e-05, "loss": 2.5513, "step": 825000 }, { "epoch": 0.63, "learning_rate": 4.8427741691111864e-05, "loss": 2.5273, "step": 825500 }, { "epoch": 0.63, "learning_rate": 4.8426789033616615e-05, "loss": 2.4934, "step": 826000 }, { "epoch": 0.63, "learning_rate": 4.8425836376121373e-05, "loss": 2.4948, "step": 826500 }, { "epoch": 0.63, "learning_rate": 4.842488371862613e-05, "loss": 2.4867, "step": 827000 }, { "epoch": 0.63, "learning_rate": 4.842393296644588e-05, "loss": 2.4848, "step": 827500 }, { "epoch": 0.63, "learning_rate": 4.842298030895064e-05, "loss": 2.504, "step": 828000 }, { "epoch": 0.63, "learning_rate": 4.8422027651455397e-05, "loss": 2.52, "step": 828500 }, { "epoch": 0.63, "learning_rate": 4.8421074993960155e-05, "loss": 2.4617, "step": 829000 }, { "epoch": 0.63, "learning_rate": 4.842012233646491e-05, "loss": 2.5031, "step": 829500 }, { "epoch": 0.63, "learning_rate": 4.841917158428466e-05, "loss": 2.5218, "step": 830000 }, { "epoch": 0.63, "learning_rate": 4.84182208321044e-05, "loss": 2.4915, "step": 830500 }, { "epoch": 0.63, "learning_rate": 4.841726817460917e-05, "loss": 2.5277, "step": 831000 }, { "epoch": 0.63, "learning_rate": 4.8416317422428916e-05, "loss": 2.5073, "step": 831500 }, { "epoch": 0.63, "learning_rate": 4.841536476493367e-05, "loss": 2.4946, "step": 832000 }, { "epoch": 0.63, "learning_rate": 4.8414412107438426e-05, "loss": 2.4955, "step": 832500 }, { "epoch": 0.63, "learning_rate": 4.841345944994319e-05, "loss": 2.4991, "step": 833000 }, { "epoch": 0.64, "learning_rate": 4.841250679244794e-05, "loss": 2.5253, "step": 833500 }, { "epoch": 0.64, "learning_rate": 4.84115541349527e-05, "loss": 2.5169, "step": 834000 }, { "epoch": 0.64, "learning_rate": 4.841060147745746e-05, "loss": 2.5177, "step": 834500 }, { "epoch": 0.64, "learning_rate": 4.840964881996222e-05, "loss": 2.502, "step": 835000 }, { "epoch": 0.64, "learning_rate": 4.840869616246697e-05, "loss": 2.5051, "step": 835500 }, { "epoch": 0.64, "learning_rate": 4.8407743504971734e-05, "loss": 2.5146, "step": 836000 }, { "epoch": 0.64, "learning_rate": 4.840679084747649e-05, "loss": 2.5162, "step": 836500 }, { "epoch": 0.64, "learning_rate": 4.8405838189981244e-05, "loss": 2.4826, "step": 837000 }, { "epoch": 0.64, "learning_rate": 4.8404885532486e-05, "loss": 2.5228, "step": 837500 }, { "epoch": 0.64, "learning_rate": 4.840393478030576e-05, "loss": 2.493, "step": 838000 }, { "epoch": 0.64, "learning_rate": 4.840298212281051e-05, "loss": 2.4975, "step": 838500 }, { "epoch": 0.64, "learning_rate": 4.840203137063026e-05, "loss": 2.4956, "step": 839000 }, { "epoch": 0.64, "learning_rate": 4.8401078713135015e-05, "loss": 2.4693, "step": 839500 }, { "epoch": 0.64, "learning_rate": 4.8400126055639774e-05, "loss": 2.5084, "step": 840000 }, { "epoch": 0.64, "eval_accuracy": 0.5369741576282202, "eval_loss": 2.4342472553253174, "eval_runtime": 9412.8058, "eval_samples_per_second": 29.214, "eval_steps_per_second": 7.304, "step": 840000 }, { "epoch": 0.64, "learning_rate": 4.839917339814453e-05, "loss": 2.5141, "step": 840500 }, { "epoch": 0.64, "learning_rate": 4.8398220740649283e-05, "loss": 2.4913, "step": 841000 }, { "epoch": 0.64, "learning_rate": 4.839726808315405e-05, "loss": 2.4952, "step": 841500 }, { "epoch": 0.64, "learning_rate": 4.839631542565881e-05, "loss": 2.506, "step": 842000 }, { "epoch": 0.64, "learning_rate": 4.839536276816356e-05, "loss": 2.5072, "step": 842500 }, { "epoch": 0.64, "learning_rate": 4.839441011066832e-05, "loss": 2.4864, "step": 843000 }, { "epoch": 0.64, "learning_rate": 4.8393457453173075e-05, "loss": 2.5058, "step": 843500 }, { "epoch": 0.64, "learning_rate": 4.839250479567783e-05, "loss": 2.4961, "step": 844000 }, { "epoch": 0.64, "learning_rate": 4.839155213818259e-05, "loss": 2.4822, "step": 844500 }, { "epoch": 0.64, "learning_rate": 4.839059948068735e-05, "loss": 2.5, "step": 845000 }, { "epoch": 0.64, "learning_rate": 4.838965063382209e-05, "loss": 2.5315, "step": 845500 }, { "epoch": 0.64, "learning_rate": 4.8388697976326846e-05, "loss": 2.4885, "step": 846000 }, { "epoch": 0.65, "learning_rate": 4.83877453188316e-05, "loss": 2.5117, "step": 846500 }, { "epoch": 0.65, "learning_rate": 4.838679266133636e-05, "loss": 2.4866, "step": 847000 }, { "epoch": 0.65, "learning_rate": 4.8385840003841114e-05, "loss": 2.506, "step": 847500 }, { "epoch": 0.65, "learning_rate": 4.838488734634587e-05, "loss": 2.5338, "step": 848000 }, { "epoch": 0.65, "learning_rate": 4.838393468885064e-05, "loss": 2.5288, "step": 848500 }, { "epoch": 0.65, "learning_rate": 4.838298203135539e-05, "loss": 2.4938, "step": 849000 }, { "epoch": 0.65, "learning_rate": 4.838203127917514e-05, "loss": 2.5137, "step": 849500 }, { "epoch": 0.65, "learning_rate": 4.8381078621679896e-05, "loss": 2.4738, "step": 850000 }, { "epoch": 0.65, "learning_rate": 4.8380125964184654e-05, "loss": 2.4973, "step": 850500 }, { "epoch": 0.65, "learning_rate": 4.837917330668941e-05, "loss": 2.5173, "step": 851000 }, { "epoch": 0.65, "learning_rate": 4.8378220649194164e-05, "loss": 2.4681, "step": 851500 }, { "epoch": 0.65, "learning_rate": 4.837726799169893e-05, "loss": 2.5334, "step": 852000 }, { "epoch": 0.65, "learning_rate": 4.837631723951868e-05, "loss": 2.5, "step": 852500 }, { "epoch": 0.65, "learning_rate": 4.837536458202343e-05, "loss": 2.526, "step": 853000 }, { "epoch": 0.65, "learning_rate": 4.837441192452819e-05, "loss": 2.5256, "step": 853500 }, { "epoch": 0.65, "learning_rate": 4.8373459267032945e-05, "loss": 2.487, "step": 854000 }, { "epoch": 0.65, "learning_rate": 4.8372506609537704e-05, "loss": 2.4844, "step": 854500 }, { "epoch": 0.65, "learning_rate": 4.837155585735745e-05, "loss": 2.5446, "step": 855000 }, { "epoch": 0.65, "learning_rate": 4.83706051051772e-05, "loss": 2.4951, "step": 855500 }, { "epoch": 0.65, "learning_rate": 4.836965244768196e-05, "loss": 2.511, "step": 856000 }, { "epoch": 0.65, "learning_rate": 4.836869979018672e-05, "loss": 2.4738, "step": 856500 }, { "epoch": 0.65, "learning_rate": 4.836774713269147e-05, "loss": 2.4937, "step": 857000 }, { "epoch": 0.65, "learning_rate": 4.836679447519623e-05, "loss": 2.52, "step": 857500 }, { "epoch": 0.65, "learning_rate": 4.836584181770099e-05, "loss": 2.483, "step": 858000 }, { "epoch": 0.65, "learning_rate": 4.836488916020574e-05, "loss": 2.5091, "step": 858500 }, { "epoch": 0.65, "learning_rate": 4.836393650271051e-05, "loss": 2.4984, "step": 859000 }, { "epoch": 0.66, "learning_rate": 4.836298384521526e-05, "loss": 2.5226, "step": 859500 }, { "epoch": 0.66, "learning_rate": 4.836203309303501e-05, "loss": 2.4624, "step": 860000 }, { "epoch": 0.66, "learning_rate": 4.8361082340854756e-05, "loss": 2.4854, "step": 860500 }, { "epoch": 0.66, "learning_rate": 4.8360129683359515e-05, "loss": 2.5103, "step": 861000 }, { "epoch": 0.66, "learning_rate": 4.835917702586427e-05, "loss": 2.5037, "step": 861500 }, { "epoch": 0.66, "learning_rate": 4.835822436836903e-05, "loss": 2.4906, "step": 862000 }, { "epoch": 0.66, "learning_rate": 4.835727171087379e-05, "loss": 2.5154, "step": 862500 }, { "epoch": 0.66, "learning_rate": 4.835632095869354e-05, "loss": 2.4859, "step": 863000 }, { "epoch": 0.66, "learning_rate": 4.8355368301198296e-05, "loss": 2.5066, "step": 863500 }, { "epoch": 0.66, "learning_rate": 4.835441564370305e-05, "loss": 2.4989, "step": 864000 }, { "epoch": 0.66, "learning_rate": 4.835346298620781e-05, "loss": 2.4893, "step": 864500 }, { "epoch": 0.66, "learning_rate": 4.8352510328712564e-05, "loss": 2.4604, "step": 865000 }, { "epoch": 0.66, "learning_rate": 4.835155767121732e-05, "loss": 2.4778, "step": 865500 }, { "epoch": 0.66, "learning_rate": 4.835060501372208e-05, "loss": 2.4728, "step": 866000 }, { "epoch": 0.66, "learning_rate": 4.834965235622684e-05, "loss": 2.5163, "step": 866500 }, { "epoch": 0.66, "learning_rate": 4.83486996987316e-05, "loss": 2.4842, "step": 867000 }, { "epoch": 0.66, "learning_rate": 4.834774704123635e-05, "loss": 2.4792, "step": 867500 }, { "epoch": 0.66, "learning_rate": 4.8346794383741114e-05, "loss": 2.5151, "step": 868000 }, { "epoch": 0.66, "learning_rate": 4.8345841726245865e-05, "loss": 2.5082, "step": 868500 }, { "epoch": 0.66, "learning_rate": 4.8344889068750624e-05, "loss": 2.4805, "step": 869000 }, { "epoch": 0.66, "learning_rate": 4.834393831657037e-05, "loss": 2.4789, "step": 869500 }, { "epoch": 0.66, "learning_rate": 4.834298565907513e-05, "loss": 2.494, "step": 870000 }, { "epoch": 0.66, "eval_accuracy": 0.5376838909000078, "eval_loss": 2.4287471771240234, "eval_runtime": 9411.4474, "eval_samples_per_second": 29.218, "eval_steps_per_second": 7.305, "step": 870000 }, { "epoch": 0.66, "learning_rate": 4.834203300157989e-05, "loss": 2.5188, "step": 870500 }, { "epoch": 0.66, "learning_rate": 4.834108224939964e-05, "loss": 2.4801, "step": 871000 }, { "epoch": 0.66, "learning_rate": 4.8340129591904395e-05, "loss": 2.4803, "step": 871500 }, { "epoch": 0.66, "learning_rate": 4.833917693440915e-05, "loss": 2.4882, "step": 872000 }, { "epoch": 0.66, "learning_rate": 4.833822427691391e-05, "loss": 2.4573, "step": 872500 }, { "epoch": 0.67, "learning_rate": 4.833727352473365e-05, "loss": 2.4912, "step": 873000 }, { "epoch": 0.67, "learning_rate": 4.833632086723842e-05, "loss": 2.4955, "step": 873500 }, { "epoch": 0.67, "learning_rate": 4.8335368209743176e-05, "loss": 2.5414, "step": 874000 }, { "epoch": 0.67, "learning_rate": 4.833441555224793e-05, "loss": 2.4965, "step": 874500 }, { "epoch": 0.67, "learning_rate": 4.8333464800067676e-05, "loss": 2.4903, "step": 875000 }, { "epoch": 0.67, "learning_rate": 4.833251214257244e-05, "loss": 2.4883, "step": 875500 }, { "epoch": 0.67, "learning_rate": 4.833155948507719e-05, "loss": 2.4873, "step": 876000 }, { "epoch": 0.67, "learning_rate": 4.833060873289694e-05, "loss": 2.4918, "step": 876500 }, { "epoch": 0.67, "learning_rate": 4.83296560754017e-05, "loss": 2.4747, "step": 877000 }, { "epoch": 0.67, "learning_rate": 4.832870341790646e-05, "loss": 2.4503, "step": 877500 }, { "epoch": 0.67, "learning_rate": 4.8327750760411216e-05, "loss": 2.4926, "step": 878000 }, { "epoch": 0.67, "learning_rate": 4.8326800008230964e-05, "loss": 2.4869, "step": 878500 }, { "epoch": 0.67, "learning_rate": 4.832584735073572e-05, "loss": 2.4935, "step": 879000 }, { "epoch": 0.67, "learning_rate": 4.832489469324048e-05, "loss": 2.4595, "step": 879500 }, { "epoch": 0.67, "learning_rate": 4.832394203574523e-05, "loss": 2.5026, "step": 880000 }, { "epoch": 0.67, "learning_rate": 4.832298937825e-05, "loss": 2.4938, "step": 880500 }, { "epoch": 0.67, "learning_rate": 4.832203672075475e-05, "loss": 2.5015, "step": 881000 }, { "epoch": 0.67, "learning_rate": 4.83210859685745e-05, "loss": 2.5084, "step": 881500 }, { "epoch": 0.67, "learning_rate": 4.8320133311079255e-05, "loss": 2.5152, "step": 882000 }, { "epoch": 0.67, "learning_rate": 4.8319180653584014e-05, "loss": 2.477, "step": 882500 }, { "epoch": 0.67, "learning_rate": 4.831822799608877e-05, "loss": 2.4759, "step": 883000 }, { "epoch": 0.67, "learning_rate": 4.831727533859353e-05, "loss": 2.5024, "step": 883500 }, { "epoch": 0.67, "learning_rate": 4.831632268109829e-05, "loss": 2.508, "step": 884000 }, { "epoch": 0.67, "learning_rate": 4.831537002360305e-05, "loss": 2.4888, "step": 884500 }, { "epoch": 0.67, "learning_rate": 4.83144173661078e-05, "loss": 2.4926, "step": 885000 }, { "epoch": 0.67, "learning_rate": 4.8313464708612564e-05, "loss": 2.4664, "step": 885500 }, { "epoch": 0.68, "learning_rate": 4.8312512051117315e-05, "loss": 2.4748, "step": 886000 }, { "epoch": 0.68, "learning_rate": 4.831155939362207e-05, "loss": 2.5276, "step": 886500 }, { "epoch": 0.68, "learning_rate": 4.831060673612683e-05, "loss": 2.4911, "step": 887000 }, { "epoch": 0.68, "learning_rate": 4.830965407863159e-05, "loss": 2.5154, "step": 887500 }, { "epoch": 0.68, "learning_rate": 4.830870142113635e-05, "loss": 2.4843, "step": 888000 }, { "epoch": 0.68, "learning_rate": 4.8307750668956096e-05, "loss": 2.5503, "step": 888500 }, { "epoch": 0.68, "learning_rate": 4.8306798011460855e-05, "loss": 2.4833, "step": 889000 }, { "epoch": 0.68, "learning_rate": 4.83058472592806e-05, "loss": 2.4943, "step": 889500 }, { "epoch": 0.68, "learning_rate": 4.830489460178536e-05, "loss": 2.5046, "step": 890000 }, { "epoch": 0.68, "learning_rate": 4.830394194429011e-05, "loss": 2.5074, "step": 890500 }, { "epoch": 0.68, "learning_rate": 4.830298928679488e-05, "loss": 2.4619, "step": 891000 }, { "epoch": 0.68, "learning_rate": 4.830203662929963e-05, "loss": 2.4918, "step": 891500 }, { "epoch": 0.68, "learning_rate": 4.830108397180439e-05, "loss": 2.491, "step": 892000 }, { "epoch": 0.68, "learning_rate": 4.8300131314309146e-05, "loss": 2.4875, "step": 892500 }, { "epoch": 0.68, "learning_rate": 4.8299178656813904e-05, "loss": 2.4513, "step": 893000 }, { "epoch": 0.68, "learning_rate": 4.829822599931866e-05, "loss": 2.4885, "step": 893500 }, { "epoch": 0.68, "learning_rate": 4.829727334182342e-05, "loss": 2.5036, "step": 894000 }, { "epoch": 0.68, "learning_rate": 4.829632068432818e-05, "loss": 2.5035, "step": 894500 }, { "epoch": 0.68, "learning_rate": 4.829536802683293e-05, "loss": 2.5075, "step": 895000 }, { "epoch": 0.68, "learning_rate": 4.829441727465268e-05, "loss": 2.4598, "step": 895500 }, { "epoch": 0.68, "learning_rate": 4.8293464617157444e-05, "loss": 2.4967, "step": 896000 }, { "epoch": 0.68, "learning_rate": 4.8292511959662196e-05, "loss": 2.5064, "step": 896500 }, { "epoch": 0.68, "learning_rate": 4.8291559302166954e-05, "loss": 2.525, "step": 897000 }, { "epoch": 0.68, "learning_rate": 4.829060664467171e-05, "loss": 2.4545, "step": 897500 }, { "epoch": 0.68, "learning_rate": 4.828965398717647e-05, "loss": 2.516, "step": 898000 }, { "epoch": 0.68, "learning_rate": 4.828870323499622e-05, "loss": 2.4708, "step": 898500 }, { "epoch": 0.69, "learning_rate": 4.828775057750098e-05, "loss": 2.5151, "step": 899000 }, { "epoch": 0.69, "learning_rate": 4.8286797920005735e-05, "loss": 2.4571, "step": 899500 }, { "epoch": 0.69, "learning_rate": 4.8285845262510494e-05, "loss": 2.5175, "step": 900000 }, { "epoch": 0.69, "eval_accuracy": 0.5388566249139872, "eval_loss": 2.4221484661102295, "eval_runtime": 9409.0191, "eval_samples_per_second": 29.226, "eval_steps_per_second": 7.307, "step": 900000 }, { "epoch": 0.69, "learning_rate": 4.8284894510330235e-05, "loss": 2.4905, "step": 900500 }, { "epoch": 0.69, "learning_rate": 4.828394185283499e-05, "loss": 2.4983, "step": 901000 }, { "epoch": 0.69, "learning_rate": 4.828298919533976e-05, "loss": 2.4935, "step": 901500 }, { "epoch": 0.69, "learning_rate": 4.828203653784451e-05, "loss": 2.4928, "step": 902000 }, { "epoch": 0.69, "learning_rate": 4.828108388034927e-05, "loss": 2.4976, "step": 902500 }, { "epoch": 0.69, "learning_rate": 4.8280133128169016e-05, "loss": 2.4794, "step": 903000 }, { "epoch": 0.69, "learning_rate": 4.8279180470673775e-05, "loss": 2.4856, "step": 903500 }, { "epoch": 0.69, "learning_rate": 4.827822781317853e-05, "loss": 2.4948, "step": 904000 }, { "epoch": 0.69, "learning_rate": 4.827727515568329e-05, "loss": 2.511, "step": 904500 }, { "epoch": 0.69, "learning_rate": 4.827632249818805e-05, "loss": 2.4867, "step": 905000 }, { "epoch": 0.69, "learning_rate": 4.827536984069281e-05, "loss": 2.5173, "step": 905500 }, { "epoch": 0.69, "learning_rate": 4.827441718319756e-05, "loss": 2.4996, "step": 906000 }, { "epoch": 0.69, "learning_rate": 4.8273464525702324e-05, "loss": 2.4811, "step": 906500 }, { "epoch": 0.69, "learning_rate": 4.827251377352207e-05, "loss": 2.5069, "step": 907000 }, { "epoch": 0.69, "learning_rate": 4.8271561116026824e-05, "loss": 2.4965, "step": 907500 }, { "epoch": 0.69, "learning_rate": 4.827060845853158e-05, "loss": 2.4813, "step": 908000 }, { "epoch": 0.69, "learning_rate": 4.826965580103634e-05, "loss": 2.4841, "step": 908500 }, { "epoch": 0.69, "learning_rate": 4.826870504885609e-05, "loss": 2.4887, "step": 909000 }, { "epoch": 0.69, "learning_rate": 4.826775239136085e-05, "loss": 2.5073, "step": 909500 }, { "epoch": 0.69, "learning_rate": 4.8266799733865606e-05, "loss": 2.4886, "step": 910000 }, { "epoch": 0.69, "learning_rate": 4.8265847076370364e-05, "loss": 2.5093, "step": 910500 }, { "epoch": 0.69, "learning_rate": 4.8264894418875115e-05, "loss": 2.4743, "step": 911000 }, { "epoch": 0.69, "learning_rate": 4.8263943666694864e-05, "loss": 2.4868, "step": 911500 }, { "epoch": 0.7, "learning_rate": 4.826299100919963e-05, "loss": 2.5044, "step": 912000 }, { "epoch": 0.7, "learning_rate": 4.826203835170438e-05, "loss": 2.4792, "step": 912500 }, { "epoch": 0.7, "learning_rate": 4.826108569420914e-05, "loss": 2.5448, "step": 913000 }, { "epoch": 0.7, "learning_rate": 4.8260133036713904e-05, "loss": 2.5065, "step": 913500 }, { "epoch": 0.7, "learning_rate": 4.8259180379218655e-05, "loss": 2.4822, "step": 914000 }, { "epoch": 0.7, "learning_rate": 4.8258227721723414e-05, "loss": 2.4932, "step": 914500 }, { "epoch": 0.7, "learning_rate": 4.825727506422817e-05, "loss": 2.4814, "step": 915000 }, { "epoch": 0.7, "learning_rate": 4.825632240673293e-05, "loss": 2.4913, "step": 915500 }, { "epoch": 0.7, "learning_rate": 4.825536974923768e-05, "loss": 2.4926, "step": 916000 }, { "epoch": 0.7, "learning_rate": 4.825441709174244e-05, "loss": 2.5007, "step": 916500 }, { "epoch": 0.7, "learning_rate": 4.8253466339562195e-05, "loss": 2.4949, "step": 917000 }, { "epoch": 0.7, "learning_rate": 4.8252513682066946e-05, "loss": 2.4632, "step": 917500 }, { "epoch": 0.7, "learning_rate": 4.8251561024571705e-05, "loss": 2.4737, "step": 918000 }, { "epoch": 0.7, "learning_rate": 4.825060836707646e-05, "loss": 2.5082, "step": 918500 }, { "epoch": 0.7, "learning_rate": 4.824965570958122e-05, "loss": 2.5111, "step": 919000 }, { "epoch": 0.7, "learning_rate": 4.824870305208598e-05, "loss": 2.4833, "step": 919500 }, { "epoch": 0.7, "learning_rate": 4.824775039459074e-05, "loss": 2.4857, "step": 920000 }, { "epoch": 0.7, "learning_rate": 4.8246797737095496e-05, "loss": 2.4953, "step": 920500 }, { "epoch": 0.7, "learning_rate": 4.8245846984915244e-05, "loss": 2.4999, "step": 921000 }, { "epoch": 0.7, "learning_rate": 4.8244894327419996e-05, "loss": 2.4709, "step": 921500 }, { "epoch": 0.7, "learning_rate": 4.824394166992476e-05, "loss": 2.4988, "step": 922000 }, { "epoch": 0.7, "learning_rate": 4.824298901242951e-05, "loss": 2.5042, "step": 922500 }, { "epoch": 0.7, "learning_rate": 4.824203826024926e-05, "loss": 2.508, "step": 923000 }, { "epoch": 0.7, "learning_rate": 4.824108560275402e-05, "loss": 2.5063, "step": 923500 }, { "epoch": 0.7, "learning_rate": 4.824013294525878e-05, "loss": 2.5009, "step": 924000 }, { "epoch": 0.7, "learning_rate": 4.8239180287763536e-05, "loss": 2.4955, "step": 924500 }, { "epoch": 0.7, "learning_rate": 4.8238227630268294e-05, "loss": 2.4691, "step": 925000 }, { "epoch": 0.71, "learning_rate": 4.823727497277305e-05, "loss": 2.5176, "step": 925500 }, { "epoch": 0.71, "learning_rate": 4.823632231527781e-05, "loss": 2.5087, "step": 926000 }, { "epoch": 0.71, "learning_rate": 4.823536965778256e-05, "loss": 2.4921, "step": 926500 }, { "epoch": 0.71, "learning_rate": 4.823441890560231e-05, "loss": 2.5024, "step": 927000 }, { "epoch": 0.71, "learning_rate": 4.8233466248107075e-05, "loss": 2.5081, "step": 927500 }, { "epoch": 0.71, "learning_rate": 4.823251359061183e-05, "loss": 2.4908, "step": 928000 }, { "epoch": 0.71, "learning_rate": 4.8231560933116585e-05, "loss": 2.5146, "step": 928500 }, { "epoch": 0.71, "learning_rate": 4.823060827562135e-05, "loss": 2.4671, "step": 929000 }, { "epoch": 0.71, "learning_rate": 4.82296556181261e-05, "loss": 2.49, "step": 929500 }, { "epoch": 0.71, "learning_rate": 4.822870296063086e-05, "loss": 2.5036, "step": 930000 }, { "epoch": 0.71, "eval_accuracy": 0.539393191471851, "eval_loss": 2.4172141551971436, "eval_runtime": 9413.1379, "eval_samples_per_second": 29.213, "eval_steps_per_second": 7.303, "step": 930000 }, { "epoch": 0.71, "learning_rate": 4.822775030313562e-05, "loss": 2.4828, "step": 930500 }, { "epoch": 0.71, "learning_rate": 4.822679955095537e-05, "loss": 2.5049, "step": 931000 }, { "epoch": 0.71, "learning_rate": 4.8225846893460125e-05, "loss": 2.4641, "step": 931500 }, { "epoch": 0.71, "learning_rate": 4.8224894235964876e-05, "loss": 2.4949, "step": 932000 }, { "epoch": 0.71, "learning_rate": 4.822394348378463e-05, "loss": 2.4619, "step": 932500 }, { "epoch": 0.71, "learning_rate": 4.822299082628939e-05, "loss": 2.4722, "step": 933000 }, { "epoch": 0.71, "learning_rate": 4.822203816879414e-05, "loss": 2.4838, "step": 933500 }, { "epoch": 0.71, "learning_rate": 4.82210855112989e-05, "loss": 2.4456, "step": 934000 }, { "epoch": 0.71, "learning_rate": 4.822013285380366e-05, "loss": 2.4731, "step": 934500 }, { "epoch": 0.71, "learning_rate": 4.8219182101623406e-05, "loss": 2.5111, "step": 935000 }, { "epoch": 0.71, "learning_rate": 4.8218229444128164e-05, "loss": 2.5076, "step": 935500 }, { "epoch": 0.71, "learning_rate": 4.821727678663292e-05, "loss": 2.529, "step": 936000 }, { "epoch": 0.71, "learning_rate": 4.821632603445267e-05, "loss": 2.4612, "step": 936500 }, { "epoch": 0.71, "learning_rate": 4.821537337695743e-05, "loss": 2.4864, "step": 937000 }, { "epoch": 0.71, "learning_rate": 4.821442071946218e-05, "loss": 2.5097, "step": 937500 }, { "epoch": 0.71, "learning_rate": 4.8213468061966946e-05, "loss": 2.4934, "step": 938000 }, { "epoch": 0.72, "learning_rate": 4.8212515404471704e-05, "loss": 2.4869, "step": 938500 }, { "epoch": 0.72, "learning_rate": 4.8211562746976456e-05, "loss": 2.4571, "step": 939000 }, { "epoch": 0.72, "learning_rate": 4.821061008948122e-05, "loss": 2.4786, "step": 939500 }, { "epoch": 0.72, "learning_rate": 4.820965743198597e-05, "loss": 2.4997, "step": 940000 }, { "epoch": 0.72, "learning_rate": 4.820870667980572e-05, "loss": 2.4797, "step": 940500 }, { "epoch": 0.72, "learning_rate": 4.820775402231048e-05, "loss": 2.4914, "step": 941000 }, { "epoch": 0.72, "learning_rate": 4.820680136481524e-05, "loss": 2.4935, "step": 941500 }, { "epoch": 0.72, "learning_rate": 4.8205848707319995e-05, "loss": 2.4896, "step": 942000 }, { "epoch": 0.72, "learning_rate": 4.820489604982475e-05, "loss": 2.4567, "step": 942500 }, { "epoch": 0.72, "learning_rate": 4.8203945297644495e-05, "loss": 2.514, "step": 943000 }, { "epoch": 0.72, "learning_rate": 4.820299264014926e-05, "loss": 2.4889, "step": 943500 }, { "epoch": 0.72, "learning_rate": 4.820203998265401e-05, "loss": 2.4947, "step": 944000 }, { "epoch": 0.72, "learning_rate": 4.820108732515877e-05, "loss": 2.4895, "step": 944500 }, { "epoch": 0.72, "learning_rate": 4.8200134667663535e-05, "loss": 2.4772, "step": 945000 }, { "epoch": 0.72, "learning_rate": 4.819918201016829e-05, "loss": 2.5238, "step": 945500 }, { "epoch": 0.72, "learning_rate": 4.8198229352673045e-05, "loss": 2.4753, "step": 946000 }, { "epoch": 0.72, "learning_rate": 4.81972766951778e-05, "loss": 2.4789, "step": 946500 }, { "epoch": 0.72, "learning_rate": 4.819632594299755e-05, "loss": 2.4676, "step": 947000 }, { "epoch": 0.72, "learning_rate": 4.819537328550231e-05, "loss": 2.4754, "step": 947500 }, { "epoch": 0.72, "learning_rate": 4.819442062800706e-05, "loss": 2.5048, "step": 948000 }, { "epoch": 0.72, "learning_rate": 4.8193467970511826e-05, "loss": 2.471, "step": 948500 }, { "epoch": 0.72, "learning_rate": 4.819251531301658e-05, "loss": 2.4683, "step": 949000 }, { "epoch": 0.72, "learning_rate": 4.8191564560836326e-05, "loss": 2.4646, "step": 949500 }, { "epoch": 0.72, "learning_rate": 4.8190611903341084e-05, "loss": 2.4823, "step": 950000 }, { "epoch": 0.72, "learning_rate": 4.818965924584584e-05, "loss": 2.4908, "step": 950500 }, { "epoch": 0.72, "learning_rate": 4.81887065883506e-05, "loss": 2.4582, "step": 951000 }, { "epoch": 0.73, "learning_rate": 4.818775583617035e-05, "loss": 2.5086, "step": 951500 }, { "epoch": 0.73, "learning_rate": 4.818680317867511e-05, "loss": 2.4951, "step": 952000 }, { "epoch": 0.73, "learning_rate": 4.8185852426494856e-05, "loss": 2.4763, "step": 952500 }, { "epoch": 0.73, "learning_rate": 4.8184899768999614e-05, "loss": 2.4668, "step": 953000 }, { "epoch": 0.73, "learning_rate": 4.8183947111504366e-05, "loss": 2.5116, "step": 953500 }, { "epoch": 0.73, "learning_rate": 4.818299445400913e-05, "loss": 2.4724, "step": 954000 }, { "epoch": 0.73, "learning_rate": 4.818204179651389e-05, "loss": 2.469, "step": 954500 }, { "epoch": 0.73, "learning_rate": 4.818108913901864e-05, "loss": 2.4679, "step": 955000 }, { "epoch": 0.73, "learning_rate": 4.818013838683839e-05, "loss": 2.4678, "step": 955500 }, { "epoch": 0.73, "learning_rate": 4.8179185729343154e-05, "loss": 2.4845, "step": 956000 }, { "epoch": 0.73, "learning_rate": 4.8178233071847905e-05, "loss": 2.4754, "step": 956500 }, { "epoch": 0.73, "learning_rate": 4.8177280414352664e-05, "loss": 2.4891, "step": 957000 }, { "epoch": 0.73, "learning_rate": 4.817632775685742e-05, "loss": 2.4855, "step": 957500 }, { "epoch": 0.73, "learning_rate": 4.817537700467717e-05, "loss": 2.4583, "step": 958000 }, { "epoch": 0.73, "learning_rate": 4.817442434718193e-05, "loss": 2.4874, "step": 958500 }, { "epoch": 0.73, "learning_rate": 4.817347168968668e-05, "loss": 2.5114, "step": 959000 }, { "epoch": 0.73, "learning_rate": 4.8172519032191445e-05, "loss": 2.4895, "step": 959500 }, { "epoch": 0.73, "learning_rate": 4.817156828001119e-05, "loss": 2.4737, "step": 960000 }, { "epoch": 0.73, "eval_accuracy": 0.5402253401093937, "eval_loss": 2.412201166152954, "eval_runtime": 9426.0905, "eval_samples_per_second": 29.173, "eval_steps_per_second": 7.293, "step": 960000 }, { "epoch": 0.73, "learning_rate": 4.8170615622515945e-05, "loss": 2.4677, "step": 960500 }, { "epoch": 0.73, "learning_rate": 4.81696629650207e-05, "loss": 2.4539, "step": 961000 }, { "epoch": 0.73, "learning_rate": 4.816871030752546e-05, "loss": 2.4774, "step": 961500 }, { "epoch": 0.73, "learning_rate": 4.816775765003022e-05, "loss": 2.4727, "step": 962000 }, { "epoch": 0.73, "learning_rate": 4.816680499253498e-05, "loss": 2.499, "step": 962500 }, { "epoch": 0.73, "learning_rate": 4.8165852335039736e-05, "loss": 2.4723, "step": 963000 }, { "epoch": 0.73, "learning_rate": 4.8164899677544495e-05, "loss": 2.4551, "step": 963500 }, { "epoch": 0.73, "learning_rate": 4.8163947020049246e-05, "loss": 2.4807, "step": 964000 }, { "epoch": 0.74, "learning_rate": 4.8162996267869e-05, "loss": 2.5056, "step": 964500 }, { "epoch": 0.74, "learning_rate": 4.816204361037376e-05, "loss": 2.5135, "step": 965000 }, { "epoch": 0.74, "learning_rate": 4.816109285819351e-05, "loss": 2.4659, "step": 965500 }, { "epoch": 0.74, "learning_rate": 4.816014210601325e-05, "loss": 2.4747, "step": 966000 }, { "epoch": 0.74, "learning_rate": 4.815918944851801e-05, "loss": 2.4545, "step": 966500 }, { "epoch": 0.74, "learning_rate": 4.815823679102277e-05, "loss": 2.5005, "step": 967000 }, { "epoch": 0.74, "learning_rate": 4.8157284133527524e-05, "loss": 2.4939, "step": 967500 }, { "epoch": 0.74, "learning_rate": 4.815633147603228e-05, "loss": 2.4551, "step": 968000 }, { "epoch": 0.74, "learning_rate": 4.815537881853704e-05, "loss": 2.4803, "step": 968500 }, { "epoch": 0.74, "learning_rate": 4.81544261610418e-05, "loss": 2.5045, "step": 969000 }, { "epoch": 0.74, "learning_rate": 4.815347350354655e-05, "loss": 2.5134, "step": 969500 }, { "epoch": 0.74, "learning_rate": 4.8152520846051316e-05, "loss": 2.4397, "step": 970000 }, { "epoch": 0.74, "learning_rate": 4.8151570093871064e-05, "loss": 2.476, "step": 970500 }, { "epoch": 0.74, "learning_rate": 4.8150617436375815e-05, "loss": 2.4476, "step": 971000 }, { "epoch": 0.74, "learning_rate": 4.8149664778880574e-05, "loss": 2.4668, "step": 971500 }, { "epoch": 0.74, "learning_rate": 4.814871402670033e-05, "loss": 2.499, "step": 972000 }, { "epoch": 0.74, "learning_rate": 4.814776136920508e-05, "loss": 2.4625, "step": 972500 }, { "epoch": 0.74, "learning_rate": 4.814680871170984e-05, "loss": 2.489, "step": 973000 }, { "epoch": 0.74, "learning_rate": 4.81458560542146e-05, "loss": 2.4892, "step": 973500 }, { "epoch": 0.74, "learning_rate": 4.8144903396719355e-05, "loss": 2.5068, "step": 974000 }, { "epoch": 0.74, "learning_rate": 4.814395073922411e-05, "loss": 2.4924, "step": 974500 }, { "epoch": 0.74, "learning_rate": 4.814299808172887e-05, "loss": 2.4525, "step": 975000 }, { "epoch": 0.74, "learning_rate": 4.814204542423363e-05, "loss": 2.4971, "step": 975500 }, { "epoch": 0.74, "learning_rate": 4.814109467205338e-05, "loss": 2.4405, "step": 976000 }, { "epoch": 0.74, "learning_rate": 4.814014201455813e-05, "loss": 2.4834, "step": 976500 }, { "epoch": 0.74, "learning_rate": 4.8139189357062895e-05, "loss": 2.4788, "step": 977000 }, { "epoch": 0.74, "learning_rate": 4.8138236699567646e-05, "loss": 2.4718, "step": 977500 }, { "epoch": 0.75, "learning_rate": 4.8137284042072405e-05, "loss": 2.499, "step": 978000 }, { "epoch": 0.75, "learning_rate": 4.813633328989215e-05, "loss": 2.4602, "step": 978500 }, { "epoch": 0.75, "learning_rate": 4.813538063239691e-05, "loss": 2.4698, "step": 979000 }, { "epoch": 0.75, "learning_rate": 4.813442797490167e-05, "loss": 2.4682, "step": 979500 }, { "epoch": 0.75, "learning_rate": 4.813347531740643e-05, "loss": 2.492, "step": 980000 }, { "epoch": 0.75, "learning_rate": 4.8132522659911186e-05, "loss": 2.4744, "step": 980500 }, { "epoch": 0.75, "learning_rate": 4.8131571907730934e-05, "loss": 2.5006, "step": 981000 }, { "epoch": 0.75, "learning_rate": 4.813061925023569e-05, "loss": 2.5044, "step": 981500 }, { "epoch": 0.75, "learning_rate": 4.8129666592740444e-05, "loss": 2.4985, "step": 982000 }, { "epoch": 0.75, "learning_rate": 4.812871393524521e-05, "loss": 2.4751, "step": 982500 }, { "epoch": 0.75, "learning_rate": 4.812776127774996e-05, "loss": 2.4424, "step": 983000 }, { "epoch": 0.75, "learning_rate": 4.812681052556971e-05, "loss": 2.4881, "step": 983500 }, { "epoch": 0.75, "learning_rate": 4.812585786807447e-05, "loss": 2.4833, "step": 984000 }, { "epoch": 0.75, "learning_rate": 4.8124907115894216e-05, "loss": 2.483, "step": 984500 }, { "epoch": 0.75, "learning_rate": 4.8123954458398974e-05, "loss": 2.4597, "step": 985000 }, { "epoch": 0.75, "learning_rate": 4.812300180090373e-05, "loss": 2.4662, "step": 985500 }, { "epoch": 0.75, "learning_rate": 4.812204914340849e-05, "loss": 2.4616, "step": 986000 }, { "epoch": 0.75, "learning_rate": 4.812109648591325e-05, "loss": 2.5064, "step": 986500 }, { "epoch": 0.75, "learning_rate": 4.8120143828418e-05, "loss": 2.4773, "step": 987000 }, { "epoch": 0.75, "learning_rate": 4.811919117092276e-05, "loss": 2.4767, "step": 987500 }, { "epoch": 0.75, "learning_rate": 4.8118238513427524e-05, "loss": 2.4788, "step": 988000 }, { "epoch": 0.75, "learning_rate": 4.8117285855932275e-05, "loss": 2.4874, "step": 988500 }, { "epoch": 0.75, "learning_rate": 4.811633319843703e-05, "loss": 2.4651, "step": 989000 }, { "epoch": 0.75, "learning_rate": 4.811538054094179e-05, "loss": 2.4646, "step": 989500 }, { "epoch": 0.75, "learning_rate": 4.811442978876154e-05, "loss": 2.4655, "step": 990000 }, { "epoch": 0.75, "eval_accuracy": 0.5409409316302379, "eval_loss": 2.4072861671447754, "eval_runtime": 9433.0912, "eval_samples_per_second": 29.151, "eval_steps_per_second": 7.288, "step": 990000 }, { "epoch": 0.75, "learning_rate": 4.81134771312663e-05, "loss": 2.4483, "step": 990500 }, { "epoch": 0.76, "learning_rate": 4.8112524473771057e-05, "loss": 2.4964, "step": 991000 }, { "epoch": 0.76, "learning_rate": 4.8111571816275815e-05, "loss": 2.4624, "step": 991500 }, { "epoch": 0.76, "learning_rate": 4.8110619158780566e-05, "loss": 2.4773, "step": 992000 }, { "epoch": 0.76, "learning_rate": 4.8109666501285325e-05, "loss": 2.4647, "step": 992500 }, { "epoch": 0.76, "learning_rate": 4.810871384379009e-05, "loss": 2.4689, "step": 993000 }, { "epoch": 0.76, "learning_rate": 4.810776118629484e-05, "loss": 2.4664, "step": 993500 }, { "epoch": 0.76, "learning_rate": 4.81068085287996e-05, "loss": 2.4742, "step": 994000 }, { "epoch": 0.76, "learning_rate": 4.810585968193434e-05, "loss": 2.4475, "step": 994500 }, { "epoch": 0.76, "learning_rate": 4.8104907024439096e-05, "loss": 2.4938, "step": 995000 }, { "epoch": 0.76, "learning_rate": 4.8103954366943854e-05, "loss": 2.5089, "step": 995500 }, { "epoch": 0.76, "learning_rate": 4.81030036147636e-05, "loss": 2.4848, "step": 996000 }, { "epoch": 0.76, "learning_rate": 4.810205095726836e-05, "loss": 2.4562, "step": 996500 }, { "epoch": 0.76, "learning_rate": 4.810109829977312e-05, "loss": 2.4675, "step": 997000 }, { "epoch": 0.76, "learning_rate": 4.810014564227788e-05, "loss": 2.4573, "step": 997500 }, { "epoch": 0.76, "learning_rate": 4.809919298478263e-05, "loss": 2.4664, "step": 998000 }, { "epoch": 0.76, "learning_rate": 4.8098240327287394e-05, "loss": 2.4773, "step": 998500 }, { "epoch": 0.76, "learning_rate": 4.8097287669792146e-05, "loss": 2.4876, "step": 999000 }, { "epoch": 0.76, "learning_rate": 4.8096335012296904e-05, "loss": 2.4869, "step": 999500 }, { "epoch": 0.76, "learning_rate": 4.809538235480166e-05, "loss": 2.443, "step": 1000000 }, { "epoch": 0.76, "learning_rate": 4.809442969730642e-05, "loss": 2.4626, "step": 1000500 }, { "epoch": 0.76, "learning_rate": 4.809347703981118e-05, "loss": 2.4709, "step": 1001000 }, { "epoch": 0.76, "learning_rate": 4.809252438231594e-05, "loss": 2.4591, "step": 1001500 }, { "epoch": 0.76, "learning_rate": 4.8091573630135685e-05, "loss": 2.5046, "step": 1002000 }, { "epoch": 0.76, "learning_rate": 4.8090620972640444e-05, "loss": 2.5069, "step": 1002500 }, { "epoch": 0.76, "learning_rate": 4.8089668315145195e-05, "loss": 2.4912, "step": 1003000 }, { "epoch": 0.76, "learning_rate": 4.808871756296494e-05, "loss": 2.471, "step": 1003500 }, { "epoch": 0.77, "learning_rate": 4.808776490546971e-05, "loss": 2.4829, "step": 1004000 }, { "epoch": 0.77, "learning_rate": 4.808681224797446e-05, "loss": 2.4365, "step": 1004500 }, { "epoch": 0.77, "learning_rate": 4.808585959047922e-05, "loss": 2.4508, "step": 1005000 }, { "epoch": 0.77, "learning_rate": 4.8084906932983976e-05, "loss": 2.4546, "step": 1005500 }, { "epoch": 0.77, "learning_rate": 4.8083956180803725e-05, "loss": 2.4639, "step": 1006000 }, { "epoch": 0.77, "learning_rate": 4.808300352330848e-05, "loss": 2.478, "step": 1006500 }, { "epoch": 0.77, "learning_rate": 4.808205086581324e-05, "loss": 2.4844, "step": 1007000 }, { "epoch": 0.77, "learning_rate": 4.8081098208318e-05, "loss": 2.499, "step": 1007500 }, { "epoch": 0.77, "learning_rate": 4.808014555082275e-05, "loss": 2.4942, "step": 1008000 }, { "epoch": 0.77, "learning_rate": 4.80791947986425e-05, "loss": 2.4731, "step": 1008500 }, { "epoch": 0.77, "learning_rate": 4.8078242141147264e-05, "loss": 2.4879, "step": 1009000 }, { "epoch": 0.77, "learning_rate": 4.8077289483652016e-05, "loss": 2.4472, "step": 1009500 }, { "epoch": 0.77, "learning_rate": 4.8076336826156774e-05, "loss": 2.4394, "step": 1010000 }, { "epoch": 0.77, "learning_rate": 4.807538607397652e-05, "loss": 2.4794, "step": 1010500 }, { "epoch": 0.77, "learning_rate": 4.807443341648128e-05, "loss": 2.4854, "step": 1011000 }, { "epoch": 0.77, "learning_rate": 4.807348075898604e-05, "loss": 2.4698, "step": 1011500 }, { "epoch": 0.77, "learning_rate": 4.80725281014908e-05, "loss": 2.4573, "step": 1012000 }, { "epoch": 0.77, "learning_rate": 4.8071575443995556e-05, "loss": 2.5029, "step": 1012500 }, { "epoch": 0.77, "learning_rate": 4.8070622786500314e-05, "loss": 2.4642, "step": 1013000 }, { "epoch": 0.77, "learning_rate": 4.8069670129005066e-05, "loss": 2.4686, "step": 1013500 }, { "epoch": 0.77, "learning_rate": 4.8068719376824814e-05, "loss": 2.4545, "step": 1014000 }, { "epoch": 0.77, "learning_rate": 4.806776671932958e-05, "loss": 2.458, "step": 1014500 }, { "epoch": 0.77, "learning_rate": 4.806681596714933e-05, "loss": 2.4629, "step": 1015000 }, { "epoch": 0.77, "learning_rate": 4.806586521496907e-05, "loss": 2.4895, "step": 1015500 }, { "epoch": 0.77, "learning_rate": 4.806491255747383e-05, "loss": 2.4846, "step": 1016000 }, { "epoch": 0.77, "learning_rate": 4.806395989997859e-05, "loss": 2.486, "step": 1016500 }, { "epoch": 0.78, "learning_rate": 4.8063007242483344e-05, "loss": 2.5048, "step": 1017000 }, { "epoch": 0.78, "learning_rate": 4.80620545849881e-05, "loss": 2.4903, "step": 1017500 }, { "epoch": 0.78, "learning_rate": 4.806110192749286e-05, "loss": 2.4722, "step": 1018000 }, { "epoch": 0.78, "learning_rate": 4.806014926999762e-05, "loss": 2.4848, "step": 1018500 }, { "epoch": 0.78, "learning_rate": 4.805919661250237e-05, "loss": 2.4748, "step": 1019000 }, { "epoch": 0.78, "learning_rate": 4.805824586032212e-05, "loss": 2.4389, "step": 1019500 }, { "epoch": 0.78, "learning_rate": 4.805729320282688e-05, "loss": 2.4693, "step": 1020000 }, { "epoch": 0.78, "eval_accuracy": 0.5417482253664431, "eval_loss": 2.403618812561035, "eval_runtime": 9433.7308, "eval_samples_per_second": 29.149, "eval_steps_per_second": 7.287, "step": 1020000 }, { "epoch": 0.78, "learning_rate": 4.8056340545331635e-05, "loss": 2.4557, "step": 1020500 }, { "epoch": 0.78, "learning_rate": 4.805538788783639e-05, "loss": 2.4343, "step": 1021000 }, { "epoch": 0.78, "learning_rate": 4.805443523034116e-05, "loss": 2.4373, "step": 1021500 }, { "epoch": 0.78, "learning_rate": 4.805348257284591e-05, "loss": 2.4647, "step": 1022000 }, { "epoch": 0.78, "learning_rate": 4.805252991535067e-05, "loss": 2.4548, "step": 1022500 }, { "epoch": 0.78, "learning_rate": 4.8051577257855426e-05, "loss": 2.446, "step": 1023000 }, { "epoch": 0.78, "learning_rate": 4.8050624600360184e-05, "loss": 2.5157, "step": 1023500 }, { "epoch": 0.78, "learning_rate": 4.804967194286494e-05, "loss": 2.4779, "step": 1024000 }, { "epoch": 0.78, "learning_rate": 4.80487192853697e-05, "loss": 2.4501, "step": 1024500 }, { "epoch": 0.78, "learning_rate": 4.804776662787446e-05, "loss": 2.4923, "step": 1025000 }, { "epoch": 0.78, "learning_rate": 4.804681397037921e-05, "loss": 2.4818, "step": 1025500 }, { "epoch": 0.78, "learning_rate": 4.804586131288397e-05, "loss": 2.4651, "step": 1026000 }, { "epoch": 0.78, "learning_rate": 4.804490865538873e-05, "loss": 2.4953, "step": 1026500 }, { "epoch": 0.78, "learning_rate": 4.8043955997893486e-05, "loss": 2.453, "step": 1027000 }, { "epoch": 0.78, "learning_rate": 4.8043003340398244e-05, "loss": 2.4559, "step": 1027500 }, { "epoch": 0.78, "learning_rate": 4.804205258821799e-05, "loss": 2.4869, "step": 1028000 }, { "epoch": 0.78, "learning_rate": 4.804109993072275e-05, "loss": 2.4966, "step": 1028500 }, { "epoch": 0.78, "learning_rate": 4.804014727322751e-05, "loss": 2.4683, "step": 1029000 }, { "epoch": 0.78, "learning_rate": 4.803919652104725e-05, "loss": 2.4765, "step": 1029500 }, { "epoch": 0.78, "learning_rate": 4.8038243863552015e-05, "loss": 2.5122, "step": 1030000 }, { "epoch": 0.79, "learning_rate": 4.8037291206056774e-05, "loss": 2.4904, "step": 1030500 }, { "epoch": 0.79, "learning_rate": 4.8036338548561525e-05, "loss": 2.5092, "step": 1031000 }, { "epoch": 0.79, "learning_rate": 4.8035385891066284e-05, "loss": 2.4733, "step": 1031500 }, { "epoch": 0.79, "learning_rate": 4.803443323357104e-05, "loss": 2.4707, "step": 1032000 }, { "epoch": 0.79, "learning_rate": 4.803348248139079e-05, "loss": 2.4936, "step": 1032500 }, { "epoch": 0.79, "learning_rate": 4.803252982389555e-05, "loss": 2.4895, "step": 1033000 }, { "epoch": 0.79, "learning_rate": 4.803157716640031e-05, "loss": 2.4742, "step": 1033500 }, { "epoch": 0.79, "learning_rate": 4.8030624508905065e-05, "loss": 2.4679, "step": 1034000 }, { "epoch": 0.79, "learning_rate": 4.8029671851409816e-05, "loss": 2.4937, "step": 1034500 }, { "epoch": 0.79, "learning_rate": 4.8028721099229565e-05, "loss": 2.4666, "step": 1035000 }, { "epoch": 0.79, "learning_rate": 4.802776844173433e-05, "loss": 2.4896, "step": 1035500 }, { "epoch": 0.79, "learning_rate": 4.802681578423908e-05, "loss": 2.4912, "step": 1036000 }, { "epoch": 0.79, "learning_rate": 4.802586312674384e-05, "loss": 2.4853, "step": 1036500 }, { "epoch": 0.79, "learning_rate": 4.8024910469248605e-05, "loss": 2.4839, "step": 1037000 }, { "epoch": 0.79, "learning_rate": 4.8023959717068346e-05, "loss": 2.4711, "step": 1037500 }, { "epoch": 0.79, "learning_rate": 4.8023007059573104e-05, "loss": 2.4715, "step": 1038000 }, { "epoch": 0.79, "learning_rate": 4.802205440207786e-05, "loss": 2.4911, "step": 1038500 }, { "epoch": 0.79, "learning_rate": 4.802110174458262e-05, "loss": 2.4726, "step": 1039000 }, { "epoch": 0.79, "learning_rate": 4.802014908708738e-05, "loss": 2.4622, "step": 1039500 }, { "epoch": 0.79, "learning_rate": 4.801919642959213e-05, "loss": 2.4334, "step": 1040000 }, { "epoch": 0.79, "learning_rate": 4.8018245677411886e-05, "loss": 2.4867, "step": 1040500 }, { "epoch": 0.79, "learning_rate": 4.8017293019916644e-05, "loss": 2.4656, "step": 1041000 }, { "epoch": 0.79, "learning_rate": 4.8016340362421396e-05, "loss": 2.4652, "step": 1041500 }, { "epoch": 0.79, "learning_rate": 4.8015387704926154e-05, "loss": 2.4633, "step": 1042000 }, { "epoch": 0.79, "learning_rate": 4.801443504743091e-05, "loss": 2.4289, "step": 1042500 }, { "epoch": 0.79, "learning_rate": 4.801348429525066e-05, "loss": 2.5151, "step": 1043000 }, { "epoch": 0.8, "learning_rate": 4.801253163775542e-05, "loss": 2.478, "step": 1043500 }, { "epoch": 0.8, "learning_rate": 4.801157898026018e-05, "loss": 2.4469, "step": 1044000 }, { "epoch": 0.8, "learning_rate": 4.8010628228079925e-05, "loss": 2.4742, "step": 1044500 }, { "epoch": 0.8, "learning_rate": 4.8009675570584684e-05, "loss": 2.4527, "step": 1045000 }, { "epoch": 0.8, "learning_rate": 4.8008722913089435e-05, "loss": 2.4618, "step": 1045500 }, { "epoch": 0.8, "learning_rate": 4.80077702555942e-05, "loss": 2.4466, "step": 1046000 }, { "epoch": 0.8, "learning_rate": 4.800681759809896e-05, "loss": 2.4846, "step": 1046500 }, { "epoch": 0.8, "learning_rate": 4.800586494060371e-05, "loss": 2.4428, "step": 1047000 }, { "epoch": 0.8, "learning_rate": 4.8004912283108475e-05, "loss": 2.4663, "step": 1047500 }, { "epoch": 0.8, "learning_rate": 4.800395962561323e-05, "loss": 2.4806, "step": 1048000 }, { "epoch": 0.8, "learning_rate": 4.8003006968117985e-05, "loss": 2.4414, "step": 1048500 }, { "epoch": 0.8, "learning_rate": 4.800205621593773e-05, "loss": 2.4551, "step": 1049000 }, { "epoch": 0.8, "learning_rate": 4.800110355844249e-05, "loss": 2.459, "step": 1049500 }, { "epoch": 0.8, "learning_rate": 4.800015090094725e-05, "loss": 2.4836, "step": 1050000 }, { "epoch": 0.8, "eval_accuracy": 0.5422612451756714, "eval_loss": 2.3978164196014404, "eval_runtime": 9409.8053, "eval_samples_per_second": 29.224, "eval_steps_per_second": 7.306, "step": 1050000 }, { "epoch": 0.8, "learning_rate": 4.7999198243452e-05, "loss": 2.4703, "step": 1050500 }, { "epoch": 0.8, "learning_rate": 4.799824749127175e-05, "loss": 2.4687, "step": 1051000 }, { "epoch": 0.8, "learning_rate": 4.7997294833776515e-05, "loss": 2.4701, "step": 1051500 }, { "epoch": 0.8, "learning_rate": 4.7996342176281266e-05, "loss": 2.4913, "step": 1052000 }, { "epoch": 0.8, "learning_rate": 4.7995389518786024e-05, "loss": 2.4866, "step": 1052500 }, { "epoch": 0.8, "learning_rate": 4.799443686129079e-05, "loss": 2.4522, "step": 1053000 }, { "epoch": 0.8, "learning_rate": 4.799348420379554e-05, "loss": 2.458, "step": 1053500 }, { "epoch": 0.8, "learning_rate": 4.799253345161529e-05, "loss": 2.4775, "step": 1054000 }, { "epoch": 0.8, "learning_rate": 4.799158079412005e-05, "loss": 2.4731, "step": 1054500 }, { "epoch": 0.8, "learning_rate": 4.7990628136624806e-05, "loss": 2.4765, "step": 1055000 }, { "epoch": 0.8, "learning_rate": 4.7989675479129564e-05, "loss": 2.4676, "step": 1055500 }, { "epoch": 0.8, "learning_rate": 4.7988722821634316e-05, "loss": 2.4659, "step": 1056000 }, { "epoch": 0.81, "learning_rate": 4.798777016413908e-05, "loss": 2.4868, "step": 1056500 }, { "epoch": 0.81, "learning_rate": 4.798681750664383e-05, "loss": 2.4644, "step": 1057000 }, { "epoch": 0.81, "learning_rate": 4.798586484914859e-05, "loss": 2.4655, "step": 1057500 }, { "epoch": 0.81, "learning_rate": 4.798491409696834e-05, "loss": 2.4758, "step": 1058000 }, { "epoch": 0.81, "learning_rate": 4.798396525010308e-05, "loss": 2.4683, "step": 1058500 }, { "epoch": 0.81, "learning_rate": 4.798301259260784e-05, "loss": 2.4612, "step": 1059000 }, { "epoch": 0.81, "learning_rate": 4.7982059935112594e-05, "loss": 2.4633, "step": 1059500 }, { "epoch": 0.81, "learning_rate": 4.798110727761735e-05, "loss": 2.4523, "step": 1060000 }, { "epoch": 0.81, "learning_rate": 4.798015462012211e-05, "loss": 2.4712, "step": 1060500 }, { "epoch": 0.81, "learning_rate": 4.797920196262687e-05, "loss": 2.5003, "step": 1061000 }, { "epoch": 0.81, "learning_rate": 4.797824930513162e-05, "loss": 2.4579, "step": 1061500 }, { "epoch": 0.81, "learning_rate": 4.7977296647636385e-05, "loss": 2.4768, "step": 1062000 }, { "epoch": 0.81, "learning_rate": 4.7976343990141143e-05, "loss": 2.4944, "step": 1062500 }, { "epoch": 0.81, "learning_rate": 4.7975391332645895e-05, "loss": 2.4808, "step": 1063000 }, { "epoch": 0.81, "learning_rate": 4.797444058046564e-05, "loss": 2.4921, "step": 1063500 }, { "epoch": 0.81, "learning_rate": 4.79734898282854e-05, "loss": 2.4625, "step": 1064000 }, { "epoch": 0.81, "learning_rate": 4.797253717079015e-05, "loss": 2.4526, "step": 1064500 }, { "epoch": 0.81, "learning_rate": 4.797158451329491e-05, "loss": 2.4814, "step": 1065000 }, { "epoch": 0.81, "learning_rate": 4.7970631855799666e-05, "loss": 2.4245, "step": 1065500 }, { "epoch": 0.81, "learning_rate": 4.7969679198304425e-05, "loss": 2.4921, "step": 1066000 }, { "epoch": 0.81, "learning_rate": 4.796872654080918e-05, "loss": 2.4531, "step": 1066500 }, { "epoch": 0.81, "learning_rate": 4.796777388331394e-05, "loss": 2.4505, "step": 1067000 }, { "epoch": 0.81, "learning_rate": 4.796682313113369e-05, "loss": 2.4607, "step": 1067500 }, { "epoch": 0.81, "learning_rate": 4.796587047363845e-05, "loss": 2.4782, "step": 1068000 }, { "epoch": 0.81, "learning_rate": 4.79649178161432e-05, "loss": 2.4652, "step": 1068500 }, { "epoch": 0.81, "learning_rate": 4.7963965158647964e-05, "loss": 2.4647, "step": 1069000 }, { "epoch": 0.82, "learning_rate": 4.7963012501152716e-05, "loss": 2.4741, "step": 1069500 }, { "epoch": 0.82, "learning_rate": 4.7962059843657474e-05, "loss": 2.4717, "step": 1070000 }, { "epoch": 0.82, "learning_rate": 4.796110718616223e-05, "loss": 2.4616, "step": 1070500 }, { "epoch": 0.82, "learning_rate": 4.796015452866699e-05, "loss": 2.4862, "step": 1071000 }, { "epoch": 0.82, "learning_rate": 4.795920377648674e-05, "loss": 2.4684, "step": 1071500 }, { "epoch": 0.82, "learning_rate": 4.79582511189915e-05, "loss": 2.4619, "step": 1072000 }, { "epoch": 0.82, "learning_rate": 4.7957298461496256e-05, "loss": 2.4633, "step": 1072500 }, { "epoch": 0.82, "learning_rate": 4.7956345804001014e-05, "loss": 2.4753, "step": 1073000 }, { "epoch": 0.82, "learning_rate": 4.7955393146505765e-05, "loss": 2.4488, "step": 1073500 }, { "epoch": 0.82, "learning_rate": 4.7954440489010524e-05, "loss": 2.4815, "step": 1074000 }, { "epoch": 0.82, "learning_rate": 4.795348783151528e-05, "loss": 2.4365, "step": 1074500 }, { "epoch": 0.82, "learning_rate": 4.795253707933503e-05, "loss": 2.4861, "step": 1075000 }, { "epoch": 0.82, "learning_rate": 4.795158442183979e-05, "loss": 2.4586, "step": 1075500 }, { "epoch": 0.82, "learning_rate": 4.795063176434455e-05, "loss": 2.4764, "step": 1076000 }, { "epoch": 0.82, "learning_rate": 4.7949679106849305e-05, "loss": 2.4341, "step": 1076500 }, { "epoch": 0.82, "learning_rate": 4.7948726449354063e-05, "loss": 2.4677, "step": 1077000 }, { "epoch": 0.82, "learning_rate": 4.7947775697173805e-05, "loss": 2.4554, "step": 1077500 }, { "epoch": 0.82, "learning_rate": 4.794682303967857e-05, "loss": 2.4678, "step": 1078000 }, { "epoch": 0.82, "learning_rate": 4.794587038218333e-05, "loss": 2.4601, "step": 1078500 }, { "epoch": 0.82, "learning_rate": 4.794491772468808e-05, "loss": 2.4675, "step": 1079000 }, { "epoch": 0.82, "learning_rate": 4.7943965067192845e-05, "loss": 2.4539, "step": 1079500 }, { "epoch": 0.82, "learning_rate": 4.7943012409697596e-05, "loss": 2.4806, "step": 1080000 }, { "epoch": 0.82, "eval_accuracy": 0.5427739343919616, "eval_loss": 2.394293785095215, "eval_runtime": 9414.9166, "eval_samples_per_second": 29.208, "eval_steps_per_second": 7.302, "step": 1080000 }, { "epoch": 0.82, "learning_rate": 4.7942059752202355e-05, "loss": 2.4776, "step": 1080500 }, { "epoch": 0.82, "learning_rate": 4.794110709470711e-05, "loss": 2.4622, "step": 1081000 }, { "epoch": 0.82, "learning_rate": 4.794015443721187e-05, "loss": 2.4604, "step": 1081500 }, { "epoch": 0.82, "learning_rate": 4.793920177971663e-05, "loss": 2.4715, "step": 1082000 }, { "epoch": 0.83, "learning_rate": 4.793825102753638e-05, "loss": 2.4878, "step": 1082500 }, { "epoch": 0.83, "learning_rate": 4.7937298370041136e-05, "loss": 2.4794, "step": 1083000 }, { "epoch": 0.83, "learning_rate": 4.7936345712545894e-05, "loss": 2.4649, "step": 1083500 }, { "epoch": 0.83, "learning_rate": 4.7935393055050646e-05, "loss": 2.4654, "step": 1084000 }, { "epoch": 0.83, "learning_rate": 4.793444039755541e-05, "loss": 2.4496, "step": 1084500 }, { "epoch": 0.83, "learning_rate": 4.793348964537516e-05, "loss": 2.4605, "step": 1085000 }, { "epoch": 0.83, "learning_rate": 4.793253698787991e-05, "loss": 2.4658, "step": 1085500 }, { "epoch": 0.83, "learning_rate": 4.793158433038467e-05, "loss": 2.464, "step": 1086000 }, { "epoch": 0.83, "learning_rate": 4.793063167288943e-05, "loss": 2.4869, "step": 1086500 }, { "epoch": 0.83, "learning_rate": 4.7929680920709176e-05, "loss": 2.4393, "step": 1087000 }, { "epoch": 0.83, "learning_rate": 4.7928728263213934e-05, "loss": 2.4718, "step": 1087500 }, { "epoch": 0.83, "learning_rate": 4.792777560571869e-05, "loss": 2.4957, "step": 1088000 }, { "epoch": 0.83, "learning_rate": 4.792682294822345e-05, "loss": 2.4557, "step": 1088500 }, { "epoch": 0.83, "learning_rate": 4.792587029072821e-05, "loss": 2.4624, "step": 1089000 }, { "epoch": 0.83, "learning_rate": 4.792491953854795e-05, "loss": 2.4763, "step": 1089500 }, { "epoch": 0.83, "learning_rate": 4.7923966881052715e-05, "loss": 2.5094, "step": 1090000 }, { "epoch": 0.83, "learning_rate": 4.7923014223557474e-05, "loss": 2.4537, "step": 1090500 }, { "epoch": 0.83, "learning_rate": 4.7922061566062225e-05, "loss": 2.4459, "step": 1091000 }, { "epoch": 0.83, "learning_rate": 4.792110890856698e-05, "loss": 2.4685, "step": 1091500 }, { "epoch": 0.83, "learning_rate": 4.792015625107174e-05, "loss": 2.4108, "step": 1092000 }, { "epoch": 0.83, "learning_rate": 4.79192035935765e-05, "loss": 2.4233, "step": 1092500 }, { "epoch": 0.83, "learning_rate": 4.791825093608125e-05, "loss": 2.422, "step": 1093000 }, { "epoch": 0.83, "learning_rate": 4.7917300183901007e-05, "loss": 2.4647, "step": 1093500 }, { "epoch": 0.83, "learning_rate": 4.7916347526405765e-05, "loss": 2.4625, "step": 1094000 }, { "epoch": 0.83, "learning_rate": 4.7915394868910516e-05, "loss": 2.4868, "step": 1094500 }, { "epoch": 0.83, "learning_rate": 4.7914442211415275e-05, "loss": 2.431, "step": 1095000 }, { "epoch": 0.83, "learning_rate": 4.791349145923503e-05, "loss": 2.5103, "step": 1095500 }, { "epoch": 0.84, "learning_rate": 4.791254070705478e-05, "loss": 2.4664, "step": 1096000 }, { "epoch": 0.84, "learning_rate": 4.791158804955953e-05, "loss": 2.484, "step": 1096500 }, { "epoch": 0.84, "learning_rate": 4.791063539206429e-05, "loss": 2.4987, "step": 1097000 }, { "epoch": 0.84, "learning_rate": 4.7909682734569046e-05, "loss": 2.4712, "step": 1097500 }, { "epoch": 0.84, "learning_rate": 4.7908730077073804e-05, "loss": 2.461, "step": 1098000 }, { "epoch": 0.84, "learning_rate": 4.790777932489355e-05, "loss": 2.449, "step": 1098500 }, { "epoch": 0.84, "learning_rate": 4.790682666739831e-05, "loss": 2.445, "step": 1099000 }, { "epoch": 0.84, "learning_rate": 4.790587400990307e-05, "loss": 2.4625, "step": 1099500 }, { "epoch": 0.84, "learning_rate": 4.790492135240783e-05, "loss": 2.4803, "step": 1100000 }, { "epoch": 0.84, "learning_rate": 4.790396869491258e-05, "loss": 2.4724, "step": 1100500 }, { "epoch": 0.84, "learning_rate": 4.7903017942732334e-05, "loss": 2.4759, "step": 1101000 }, { "epoch": 0.84, "learning_rate": 4.790206528523709e-05, "loss": 2.4644, "step": 1101500 }, { "epoch": 0.84, "learning_rate": 4.7901112627741844e-05, "loss": 2.4567, "step": 1102000 }, { "epoch": 0.84, "learning_rate": 4.79001599702466e-05, "loss": 2.43, "step": 1102500 }, { "epoch": 0.84, "learning_rate": 4.789920731275136e-05, "loss": 2.4679, "step": 1103000 }, { "epoch": 0.84, "learning_rate": 4.789825465525612e-05, "loss": 2.4397, "step": 1103500 }, { "epoch": 0.84, "learning_rate": 4.789730199776088e-05, "loss": 2.458, "step": 1104000 }, { "epoch": 0.84, "learning_rate": 4.7896349340265635e-05, "loss": 2.4941, "step": 1104500 }, { "epoch": 0.84, "learning_rate": 4.7895396682770394e-05, "loss": 2.4389, "step": 1105000 }, { "epoch": 0.84, "learning_rate": 4.7894445930590135e-05, "loss": 2.469, "step": 1105500 }, { "epoch": 0.84, "learning_rate": 4.78934932730949e-05, "loss": 2.4282, "step": 1106000 }, { "epoch": 0.84, "learning_rate": 4.789254061559966e-05, "loss": 2.4757, "step": 1106500 }, { "epoch": 0.84, "learning_rate": 4.789158795810441e-05, "loss": 2.4651, "step": 1107000 }, { "epoch": 0.84, "learning_rate": 4.789063720592416e-05, "loss": 2.4845, "step": 1107500 }, { "epoch": 0.84, "learning_rate": 4.788968454842892e-05, "loss": 2.4633, "step": 1108000 }, { "epoch": 0.84, "learning_rate": 4.7888731890933675e-05, "loss": 2.4726, "step": 1108500 }, { "epoch": 0.85, "learning_rate": 4.788777923343843e-05, "loss": 2.4672, "step": 1109000 }, { "epoch": 0.85, "learning_rate": 4.788682657594319e-05, "loss": 2.4586, "step": 1109500 }, { "epoch": 0.85, "learning_rate": 4.788587582376294e-05, "loss": 2.4515, "step": 1110000 }, { "epoch": 0.85, "eval_accuracy": 0.5432032964139937, "eval_loss": 2.391061544418335, "eval_runtime": 9406.8304, "eval_samples_per_second": 29.233, "eval_steps_per_second": 7.308, "step": 1110000 }, { "epoch": 0.85, "learning_rate": 4.78849231662677e-05, "loss": 2.4367, "step": 1110500 }, { "epoch": 0.85, "learning_rate": 4.788397050877245e-05, "loss": 2.4391, "step": 1111000 }, { "epoch": 0.85, "learning_rate": 4.7883017851277215e-05, "loss": 2.4522, "step": 1111500 }, { "epoch": 0.85, "learning_rate": 4.7882065193781966e-05, "loss": 2.4586, "step": 1112000 }, { "epoch": 0.85, "learning_rate": 4.7881114441601714e-05, "loss": 2.4483, "step": 1112500 }, { "epoch": 0.85, "learning_rate": 4.788016178410647e-05, "loss": 2.4518, "step": 1113000 }, { "epoch": 0.85, "learning_rate": 4.787921103192623e-05, "loss": 2.4534, "step": 1113500 }, { "epoch": 0.85, "learning_rate": 4.787825837443098e-05, "loss": 2.4442, "step": 1114000 }, { "epoch": 0.85, "learning_rate": 4.787730571693574e-05, "loss": 2.4764, "step": 1114500 }, { "epoch": 0.85, "learning_rate": 4.7876353059440496e-05, "loss": 2.4609, "step": 1115000 }, { "epoch": 0.85, "learning_rate": 4.7875400401945254e-05, "loss": 2.4787, "step": 1115500 }, { "epoch": 0.85, "learning_rate": 4.787444774445001e-05, "loss": 2.4552, "step": 1116000 }, { "epoch": 0.85, "learning_rate": 4.7873496992269754e-05, "loss": 2.4662, "step": 1116500 }, { "epoch": 0.85, "learning_rate": 4.787254433477452e-05, "loss": 2.457, "step": 1117000 }, { "epoch": 0.85, "learning_rate": 4.787159167727928e-05, "loss": 2.4292, "step": 1117500 }, { "epoch": 0.85, "learning_rate": 4.787063901978403e-05, "loss": 2.4751, "step": 1118000 }, { "epoch": 0.85, "learning_rate": 4.786968636228879e-05, "loss": 2.4481, "step": 1118500 }, { "epoch": 0.85, "learning_rate": 4.786873561010854e-05, "loss": 2.4504, "step": 1119000 }, { "epoch": 0.85, "learning_rate": 4.7867782952613294e-05, "loss": 2.4468, "step": 1119500 }, { "epoch": 0.85, "learning_rate": 4.786683029511805e-05, "loss": 2.4582, "step": 1120000 }, { "epoch": 0.85, "learning_rate": 4.786587763762281e-05, "loss": 2.4614, "step": 1120500 }, { "epoch": 0.85, "learning_rate": 4.786492498012757e-05, "loss": 2.4775, "step": 1121000 }, { "epoch": 0.85, "learning_rate": 4.786397232263232e-05, "loss": 2.4417, "step": 1121500 }, { "epoch": 0.86, "learning_rate": 4.7863019665137085e-05, "loss": 2.4511, "step": 1122000 }, { "epoch": 0.86, "learning_rate": 4.786206700764184e-05, "loss": 2.462, "step": 1122500 }, { "epoch": 0.86, "learning_rate": 4.7861116255461585e-05, "loss": 2.4773, "step": 1123000 }, { "epoch": 0.86, "learning_rate": 4.786016550328133e-05, "loss": 2.4427, "step": 1123500 }, { "epoch": 0.86, "learning_rate": 4.785921284578609e-05, "loss": 2.4562, "step": 1124000 }, { "epoch": 0.86, "learning_rate": 4.785826018829085e-05, "loss": 2.4459, "step": 1124500 }, { "epoch": 0.86, "learning_rate": 4.785730753079561e-05, "loss": 2.4546, "step": 1125000 }, { "epoch": 0.86, "learning_rate": 4.7856354873300366e-05, "loss": 2.4492, "step": 1125500 }, { "epoch": 0.86, "learning_rate": 4.7855402215805124e-05, "loss": 2.4348, "step": 1126000 }, { "epoch": 0.86, "learning_rate": 4.785444955830988e-05, "loss": 2.4667, "step": 1126500 }, { "epoch": 0.86, "learning_rate": 4.7853496900814634e-05, "loss": 2.4562, "step": 1127000 }, { "epoch": 0.86, "learning_rate": 4.78525442433194e-05, "loss": 2.4696, "step": 1127500 }, { "epoch": 0.86, "learning_rate": 4.785159158582415e-05, "loss": 2.4712, "step": 1128000 }, { "epoch": 0.86, "learning_rate": 4.78506408336439e-05, "loss": 2.4574, "step": 1128500 }, { "epoch": 0.86, "learning_rate": 4.784968817614866e-05, "loss": 2.4461, "step": 1129000 }, { "epoch": 0.86, "learning_rate": 4.7848735518653416e-05, "loss": 2.4691, "step": 1129500 }, { "epoch": 0.86, "learning_rate": 4.7847782861158174e-05, "loss": 2.4558, "step": 1130000 }, { "epoch": 0.86, "learning_rate": 4.784683020366293e-05, "loss": 2.4493, "step": 1130500 }, { "epoch": 0.86, "learning_rate": 4.784587754616769e-05, "loss": 2.4758, "step": 1131000 }, { "epoch": 0.86, "learning_rate": 4.784492679398744e-05, "loss": 2.474, "step": 1131500 }, { "epoch": 0.86, "learning_rate": 4.78439741364922e-05, "loss": 2.4538, "step": 1132000 }, { "epoch": 0.86, "learning_rate": 4.7843021478996955e-05, "loss": 2.4499, "step": 1132500 }, { "epoch": 0.86, "learning_rate": 4.7842068821501714e-05, "loss": 2.4515, "step": 1133000 }, { "epoch": 0.86, "learning_rate": 4.7841116164006465e-05, "loss": 2.4718, "step": 1133500 }, { "epoch": 0.86, "learning_rate": 4.7840165411826214e-05, "loss": 2.4298, "step": 1134000 }, { "epoch": 0.86, "learning_rate": 4.783921275433098e-05, "loss": 2.47, "step": 1134500 }, { "epoch": 0.87, "learning_rate": 4.783826009683573e-05, "loss": 2.4324, "step": 1135000 }, { "epoch": 0.87, "learning_rate": 4.783730743934049e-05, "loss": 2.4586, "step": 1135500 }, { "epoch": 0.87, "learning_rate": 4.783635478184525e-05, "loss": 2.4645, "step": 1136000 }, { "epoch": 0.87, "learning_rate": 4.7835402124350005e-05, "loss": 2.4315, "step": 1136500 }, { "epoch": 0.87, "learning_rate": 4.783444946685476e-05, "loss": 2.4378, "step": 1137000 }, { "epoch": 0.87, "learning_rate": 4.7833496809359515e-05, "loss": 2.4169, "step": 1137500 }, { "epoch": 0.87, "learning_rate": 4.783254415186428e-05, "loss": 2.4285, "step": 1138000 }, { "epoch": 0.87, "learning_rate": 4.783159530499902e-05, "loss": 2.4878, "step": 1138500 }, { "epoch": 0.87, "learning_rate": 4.783064264750377e-05, "loss": 2.4743, "step": 1139000 }, { "epoch": 0.87, "learning_rate": 4.782968999000853e-05, "loss": 2.4721, "step": 1139500 }, { "epoch": 0.87, "learning_rate": 4.782873733251329e-05, "loss": 2.4267, "step": 1140000 }, { "epoch": 0.87, "eval_accuracy": 0.5439521534049061, "eval_loss": 2.385629415512085, "eval_runtime": 9413.2002, "eval_samples_per_second": 29.213, "eval_steps_per_second": 7.303, "step": 1140000 }, { "epoch": 0.87, "learning_rate": 4.7827784675018044e-05, "loss": 2.4325, "step": 1140500 }, { "epoch": 0.87, "learning_rate": 4.78268320175228e-05, "loss": 2.4674, "step": 1141000 }, { "epoch": 0.87, "learning_rate": 4.782587936002756e-05, "loss": 2.4581, "step": 1141500 }, { "epoch": 0.87, "learning_rate": 4.782492670253232e-05, "loss": 2.4251, "step": 1142000 }, { "epoch": 0.87, "learning_rate": 4.782397595035207e-05, "loss": 2.4928, "step": 1142500 }, { "epoch": 0.87, "learning_rate": 4.782302329285682e-05, "loss": 2.4455, "step": 1143000 }, { "epoch": 0.87, "learning_rate": 4.7822070635361584e-05, "loss": 2.4517, "step": 1143500 }, { "epoch": 0.87, "learning_rate": 4.7821117977866336e-05, "loss": 2.4544, "step": 1144000 }, { "epoch": 0.87, "learning_rate": 4.7820165320371094e-05, "loss": 2.4457, "step": 1144500 }, { "epoch": 0.87, "learning_rate": 4.781921456819084e-05, "loss": 2.4503, "step": 1145000 }, { "epoch": 0.87, "learning_rate": 4.78182638160106e-05, "loss": 2.4293, "step": 1145500 }, { "epoch": 0.87, "learning_rate": 4.781731115851535e-05, "loss": 2.4274, "step": 1146000 }, { "epoch": 0.87, "learning_rate": 4.781635850102011e-05, "loss": 2.4641, "step": 1146500 }, { "epoch": 0.87, "learning_rate": 4.7815407748839855e-05, "loss": 2.4524, "step": 1147000 }, { "epoch": 0.87, "learning_rate": 4.7814455091344614e-05, "loss": 2.4499, "step": 1147500 }, { "epoch": 0.87, "learning_rate": 4.781350243384937e-05, "loss": 2.4756, "step": 1148000 }, { "epoch": 0.88, "learning_rate": 4.7812549776354123e-05, "loss": 2.4268, "step": 1148500 }, { "epoch": 0.88, "learning_rate": 4.781159711885889e-05, "loss": 2.4259, "step": 1149000 }, { "epoch": 0.88, "learning_rate": 4.781064446136365e-05, "loss": 2.4636, "step": 1149500 }, { "epoch": 0.88, "learning_rate": 4.78096918038684e-05, "loss": 2.4621, "step": 1150000 }, { "epoch": 0.88, "learning_rate": 4.7808739146373163e-05, "loss": 2.4458, "step": 1150500 }, { "epoch": 0.88, "learning_rate": 4.7807786488877915e-05, "loss": 2.4375, "step": 1151000 }, { "epoch": 0.88, "learning_rate": 4.780683573669766e-05, "loss": 2.4406, "step": 1151500 }, { "epoch": 0.88, "learning_rate": 4.780588307920242e-05, "loss": 2.4774, "step": 1152000 }, { "epoch": 0.88, "learning_rate": 4.780493042170718e-05, "loss": 2.4625, "step": 1152500 }, { "epoch": 0.88, "learning_rate": 4.780397776421194e-05, "loss": 2.4336, "step": 1153000 }, { "epoch": 0.88, "learning_rate": 4.780302510671669e-05, "loss": 2.4659, "step": 1153500 }, { "epoch": 0.88, "learning_rate": 4.7802072449221455e-05, "loss": 2.483, "step": 1154000 }, { "epoch": 0.88, "learning_rate": 4.780111979172621e-05, "loss": 2.4625, "step": 1154500 }, { "epoch": 0.88, "learning_rate": 4.7800167134230964e-05, "loss": 2.4835, "step": 1155000 }, { "epoch": 0.88, "learning_rate": 4.779921638205071e-05, "loss": 2.4456, "step": 1155500 }, { "epoch": 0.88, "learning_rate": 4.779826372455548e-05, "loss": 2.4641, "step": 1156000 }, { "epoch": 0.88, "learning_rate": 4.779731106706023e-05, "loss": 2.4305, "step": 1156500 }, { "epoch": 0.88, "learning_rate": 4.779635840956499e-05, "loss": 2.4385, "step": 1157000 }, { "epoch": 0.88, "learning_rate": 4.7795405752069746e-05, "loss": 2.4703, "step": 1157500 }, { "epoch": 0.88, "learning_rate": 4.7794454999889494e-05, "loss": 2.4794, "step": 1158000 }, { "epoch": 0.88, "learning_rate": 4.779350234239425e-05, "loss": 2.4139, "step": 1158500 }, { "epoch": 0.88, "learning_rate": 4.7792549684899004e-05, "loss": 2.4586, "step": 1159000 }, { "epoch": 0.88, "learning_rate": 4.779159702740377e-05, "loss": 2.4661, "step": 1159500 }, { "epoch": 0.88, "learning_rate": 4.779064436990852e-05, "loss": 2.4846, "step": 1160000 }, { "epoch": 0.88, "learning_rate": 4.778969361772827e-05, "loss": 2.4414, "step": 1160500 }, { "epoch": 0.88, "learning_rate": 4.778874286554802e-05, "loss": 2.4364, "step": 1161000 }, { "epoch": 0.89, "learning_rate": 4.778779020805278e-05, "loss": 2.4949, "step": 1161500 }, { "epoch": 0.89, "learning_rate": 4.7786837550557534e-05, "loss": 2.46, "step": 1162000 }, { "epoch": 0.89, "learning_rate": 4.778588489306229e-05, "loss": 2.465, "step": 1162500 }, { "epoch": 0.89, "learning_rate": 4.778493223556705e-05, "loss": 2.4527, "step": 1163000 }, { "epoch": 0.89, "learning_rate": 4.77839814833868e-05, "loss": 2.4326, "step": 1163500 }, { "epoch": 0.89, "learning_rate": 4.778302882589156e-05, "loss": 2.4477, "step": 1164000 }, { "epoch": 0.89, "learning_rate": 4.778207616839631e-05, "loss": 2.46, "step": 1164500 }, { "epoch": 0.89, "learning_rate": 4.7781123510901073e-05, "loss": 2.4465, "step": 1165000 }, { "epoch": 0.89, "learning_rate": 4.778017085340583e-05, "loss": 2.4404, "step": 1165500 }, { "epoch": 0.89, "learning_rate": 4.777921819591058e-05, "loss": 2.452, "step": 1166000 }, { "epoch": 0.89, "learning_rate": 4.777826553841535e-05, "loss": 2.4442, "step": 1166500 }, { "epoch": 0.89, "learning_rate": 4.77773128809201e-05, "loss": 2.4324, "step": 1167000 }, { "epoch": 0.89, "learning_rate": 4.777636022342486e-05, "loss": 2.4679, "step": 1167500 }, { "epoch": 0.89, "learning_rate": 4.7775409471244606e-05, "loss": 2.4462, "step": 1168000 }, { "epoch": 0.89, "learning_rate": 4.7774456813749365e-05, "loss": 2.4241, "step": 1168500 }, { "epoch": 0.89, "learning_rate": 4.777350606156911e-05, "loss": 2.4454, "step": 1169000 }, { "epoch": 0.89, "learning_rate": 4.777255340407387e-05, "loss": 2.4423, "step": 1169500 }, { "epoch": 0.89, "learning_rate": 4.777160074657863e-05, "loss": 2.3964, "step": 1170000 }, { "epoch": 0.89, "eval_accuracy": 0.5446484994400751, "eval_loss": 2.3814730644226074, "eval_runtime": 9420.5632, "eval_samples_per_second": 29.19, "eval_steps_per_second": 7.298, "step": 1170000 }, { "epoch": 0.89, "learning_rate": 4.777064808908339e-05, "loss": 2.4406, "step": 1170500 }, { "epoch": 0.89, "learning_rate": 4.776969543158814e-05, "loss": 2.448, "step": 1171000 }, { "epoch": 0.89, "learning_rate": 4.77687427740929e-05, "loss": 2.4208, "step": 1171500 }, { "epoch": 0.89, "learning_rate": 4.776779011659766e-05, "loss": 2.4359, "step": 1172000 }, { "epoch": 0.89, "learning_rate": 4.7766839364417404e-05, "loss": 2.4596, "step": 1172500 }, { "epoch": 0.89, "learning_rate": 4.776588670692216e-05, "loss": 2.4564, "step": 1173000 }, { "epoch": 0.89, "learning_rate": 4.776493404942692e-05, "loss": 2.4353, "step": 1173500 }, { "epoch": 0.89, "learning_rate": 4.776398139193168e-05, "loss": 2.4617, "step": 1174000 }, { "epoch": 0.9, "learning_rate": 4.776302873443644e-05, "loss": 2.476, "step": 1174500 }, { "epoch": 0.9, "learning_rate": 4.7762076076941196e-05, "loss": 2.4436, "step": 1175000 }, { "epoch": 0.9, "learning_rate": 4.7761123419445954e-05, "loss": 2.4418, "step": 1175500 }, { "epoch": 0.9, "learning_rate": 4.776017076195071e-05, "loss": 2.4541, "step": 1176000 }, { "epoch": 0.9, "learning_rate": 4.7759220009770454e-05, "loss": 2.4424, "step": 1176500 }, { "epoch": 0.9, "learning_rate": 4.775826735227522e-05, "loss": 2.5097, "step": 1177000 }, { "epoch": 0.9, "learning_rate": 4.775731469477998e-05, "loss": 2.4514, "step": 1177500 }, { "epoch": 0.9, "learning_rate": 4.775636394259972e-05, "loss": 2.457, "step": 1178000 }, { "epoch": 0.9, "learning_rate": 4.775541128510448e-05, "loss": 2.4624, "step": 1178500 }, { "epoch": 0.9, "learning_rate": 4.7754458627609235e-05, "loss": 2.4465, "step": 1179000 }, { "epoch": 0.9, "learning_rate": 4.7753505970113993e-05, "loss": 2.4631, "step": 1179500 }, { "epoch": 0.9, "learning_rate": 4.775255521793374e-05, "loss": 2.454, "step": 1180000 }, { "epoch": 0.9, "learning_rate": 4.775160256043849e-05, "loss": 2.4103, "step": 1180500 }, { "epoch": 0.9, "learning_rate": 4.775064990294326e-05, "loss": 2.4276, "step": 1181000 }, { "epoch": 0.9, "learning_rate": 4.7749697245448017e-05, "loss": 2.459, "step": 1181500 }, { "epoch": 0.9, "learning_rate": 4.774874458795277e-05, "loss": 2.4622, "step": 1182000 }, { "epoch": 0.9, "learning_rate": 4.774779193045753e-05, "loss": 2.4372, "step": 1182500 }, { "epoch": 0.9, "learning_rate": 4.7746839272962285e-05, "loss": 2.4385, "step": 1183000 }, { "epoch": 0.9, "learning_rate": 4.774588661546704e-05, "loss": 2.4579, "step": 1183500 }, { "epoch": 0.9, "learning_rate": 4.774493586328679e-05, "loss": 2.405, "step": 1184000 }, { "epoch": 0.9, "learning_rate": 4.774398511110654e-05, "loss": 2.4625, "step": 1184500 }, { "epoch": 0.9, "learning_rate": 4.774303435892629e-05, "loss": 2.4594, "step": 1185000 }, { "epoch": 0.9, "learning_rate": 4.7742081701431046e-05, "loss": 2.4533, "step": 1185500 }, { "epoch": 0.9, "learning_rate": 4.7741129043935804e-05, "loss": 2.4499, "step": 1186000 }, { "epoch": 0.9, "learning_rate": 4.774017638644056e-05, "loss": 2.435, "step": 1186500 }, { "epoch": 0.9, "learning_rate": 4.773922372894532e-05, "loss": 2.4371, "step": 1187000 }, { "epoch": 0.91, "learning_rate": 4.773827107145007e-05, "loss": 2.4572, "step": 1187500 }, { "epoch": 0.91, "learning_rate": 4.773731841395484e-05, "loss": 2.4763, "step": 1188000 }, { "epoch": 0.91, "learning_rate": 4.773636575645959e-05, "loss": 2.4357, "step": 1188500 }, { "epoch": 0.91, "learning_rate": 4.773541309896435e-05, "loss": 2.4211, "step": 1189000 }, { "epoch": 0.91, "learning_rate": 4.7734460441469106e-05, "loss": 2.4481, "step": 1189500 }, { "epoch": 0.91, "learning_rate": 4.7733511594603844e-05, "loss": 2.4566, "step": 1190000 }, { "epoch": 0.91, "learning_rate": 4.77325589371086e-05, "loss": 2.4505, "step": 1190500 }, { "epoch": 0.91, "learning_rate": 4.773160627961336e-05, "loss": 2.4457, "step": 1191000 }, { "epoch": 0.91, "learning_rate": 4.773065362211812e-05, "loss": 2.4757, "step": 1191500 }, { "epoch": 0.91, "learning_rate": 4.772970096462288e-05, "loss": 2.4399, "step": 1192000 }, { "epoch": 0.91, "learning_rate": 4.7728748307127635e-05, "loss": 2.4767, "step": 1192500 }, { "epoch": 0.91, "learning_rate": 4.772779564963239e-05, "loss": 2.4568, "step": 1193000 }, { "epoch": 0.91, "learning_rate": 4.772684299213715e-05, "loss": 2.4412, "step": 1193500 }, { "epoch": 0.91, "learning_rate": 4.77258903346419e-05, "loss": 2.4723, "step": 1194000 }, { "epoch": 0.91, "learning_rate": 4.772493767714666e-05, "loss": 2.4569, "step": 1194500 }, { "epoch": 0.91, "learning_rate": 4.772398501965143e-05, "loss": 2.4732, "step": 1195000 }, { "epoch": 0.91, "learning_rate": 4.772303236215618e-05, "loss": 2.4534, "step": 1195500 }, { "epoch": 0.91, "learning_rate": 4.7722079704660937e-05, "loss": 2.453, "step": 1196000 }, { "epoch": 0.91, "learning_rate": 4.7721128952480685e-05, "loss": 2.4507, "step": 1196500 }, { "epoch": 0.91, "learning_rate": 4.772017629498544e-05, "loss": 2.4421, "step": 1197000 }, { "epoch": 0.91, "learning_rate": 4.77192236374902e-05, "loss": 2.4792, "step": 1197500 }, { "epoch": 0.91, "learning_rate": 4.771827288530994e-05, "loss": 2.433, "step": 1198000 }, { "epoch": 0.91, "learning_rate": 4.771732022781471e-05, "loss": 2.4545, "step": 1198500 }, { "epoch": 0.91, "learning_rate": 4.7716367570319466e-05, "loss": 2.4525, "step": 1199000 }, { "epoch": 0.91, "learning_rate": 4.771541681813921e-05, "loss": 2.4323, "step": 1199500 }, { "epoch": 0.91, "learning_rate": 4.7714464160643966e-05, "loss": 2.445, "step": 1200000 }, { "epoch": 0.91, "eval_accuracy": 0.5452781652350712, "eval_loss": 2.3779778480529785, "eval_runtime": 9418.4291, "eval_samples_per_second": 29.197, "eval_steps_per_second": 7.299, "step": 1200000 }, { "epoch": 0.91, "learning_rate": 4.771351150314873e-05, "loss": 2.4534, "step": 1200500 }, { "epoch": 0.92, "learning_rate": 4.771255884565348e-05, "loss": 2.4407, "step": 1201000 }, { "epoch": 0.92, "learning_rate": 4.771160618815824e-05, "loss": 2.4623, "step": 1201500 }, { "epoch": 0.92, "learning_rate": 4.7710653530663e-05, "loss": 2.4527, "step": 1202000 }, { "epoch": 0.92, "learning_rate": 4.770970087316776e-05, "loss": 2.4325, "step": 1202500 }, { "epoch": 0.92, "learning_rate": 4.7708748215672516e-05, "loss": 2.4457, "step": 1203000 }, { "epoch": 0.92, "learning_rate": 4.770779555817727e-05, "loss": 2.4407, "step": 1203500 }, { "epoch": 0.92, "learning_rate": 4.770684290068203e-05, "loss": 2.4487, "step": 1204000 }, { "epoch": 0.92, "learning_rate": 4.7705890243186784e-05, "loss": 2.415, "step": 1204500 }, { "epoch": 0.92, "learning_rate": 4.770493758569154e-05, "loss": 2.4298, "step": 1205000 }, { "epoch": 0.92, "learning_rate": 4.77039849281963e-05, "loss": 2.4653, "step": 1205500 }, { "epoch": 0.92, "learning_rate": 4.770303417601605e-05, "loss": 2.4568, "step": 1206000 }, { "epoch": 0.92, "learning_rate": 4.770208151852081e-05, "loss": 2.4529, "step": 1206500 }, { "epoch": 0.92, "learning_rate": 4.7701130766340555e-05, "loss": 2.4402, "step": 1207000 }, { "epoch": 0.92, "learning_rate": 4.7700178108845314e-05, "loss": 2.4254, "step": 1207500 }, { "epoch": 0.92, "learning_rate": 4.769922545135007e-05, "loss": 2.4466, "step": 1208000 }, { "epoch": 0.92, "learning_rate": 4.769827279385482e-05, "loss": 2.438, "step": 1208500 }, { "epoch": 0.92, "learning_rate": 4.769732013635959e-05, "loss": 2.455, "step": 1209000 }, { "epoch": 0.92, "learning_rate": 4.769636938417934e-05, "loss": 2.4445, "step": 1209500 }, { "epoch": 0.92, "learning_rate": 4.769541672668409e-05, "loss": 2.4321, "step": 1210000 }, { "epoch": 0.92, "learning_rate": 4.7694464069188846e-05, "loss": 2.4361, "step": 1210500 }, { "epoch": 0.92, "learning_rate": 4.769351141169361e-05, "loss": 2.4632, "step": 1211000 }, { "epoch": 0.92, "learning_rate": 4.769255875419836e-05, "loss": 2.4711, "step": 1211500 }, { "epoch": 0.92, "learning_rate": 4.769160800201811e-05, "loss": 2.4287, "step": 1212000 }, { "epoch": 0.92, "learning_rate": 4.769065534452287e-05, "loss": 2.4288, "step": 1212500 }, { "epoch": 0.92, "learning_rate": 4.768970268702763e-05, "loss": 2.4304, "step": 1213000 }, { "epoch": 0.92, "learning_rate": 4.7688750029532386e-05, "loss": 2.4764, "step": 1213500 }, { "epoch": 0.93, "learning_rate": 4.768779737203714e-05, "loss": 2.4436, "step": 1214000 }, { "epoch": 0.93, "learning_rate": 4.76868447145419e-05, "loss": 2.4726, "step": 1214500 }, { "epoch": 0.93, "learning_rate": 4.7685892057046654e-05, "loss": 2.4391, "step": 1215000 }, { "epoch": 0.93, "learning_rate": 4.76849413048664e-05, "loss": 2.4477, "step": 1215500 }, { "epoch": 0.93, "learning_rate": 4.768398864737116e-05, "loss": 2.4675, "step": 1216000 }, { "epoch": 0.93, "learning_rate": 4.768303598987592e-05, "loss": 2.4406, "step": 1216500 }, { "epoch": 0.93, "learning_rate": 4.768208523769567e-05, "loss": 2.4212, "step": 1217000 }, { "epoch": 0.93, "learning_rate": 4.7681132580200426e-05, "loss": 2.4576, "step": 1217500 }, { "epoch": 0.93, "learning_rate": 4.7680179922705184e-05, "loss": 2.4724, "step": 1218000 }, { "epoch": 0.93, "learning_rate": 4.767922726520994e-05, "loss": 2.4229, "step": 1218500 }, { "epoch": 0.93, "learning_rate": 4.76782746077147e-05, "loss": 2.448, "step": 1219000 }, { "epoch": 0.93, "learning_rate": 4.767732195021946e-05, "loss": 2.4539, "step": 1219500 }, { "epoch": 0.93, "learning_rate": 4.767636929272422e-05, "loss": 2.4208, "step": 1220000 }, { "epoch": 0.93, "learning_rate": 4.767541663522897e-05, "loss": 2.4288, "step": 1220500 }, { "epoch": 0.93, "learning_rate": 4.767446397773373e-05, "loss": 2.4625, "step": 1221000 }, { "epoch": 0.93, "learning_rate": 4.7673511320238485e-05, "loss": 2.4565, "step": 1221500 }, { "epoch": 0.93, "learning_rate": 4.7672558662743244e-05, "loss": 2.454, "step": 1222000 }, { "epoch": 0.93, "learning_rate": 4.7671606005248e-05, "loss": 2.4292, "step": 1222500 }, { "epoch": 0.93, "learning_rate": 4.767065334775276e-05, "loss": 2.4394, "step": 1223000 }, { "epoch": 0.93, "learning_rate": 4.766970259557251e-05, "loss": 2.4593, "step": 1223500 }, { "epoch": 0.93, "learning_rate": 4.766874993807727e-05, "loss": 2.4652, "step": 1224000 }, { "epoch": 0.93, "learning_rate": 4.7667797280582025e-05, "loss": 2.4719, "step": 1224500 }, { "epoch": 0.93, "learning_rate": 4.766684462308678e-05, "loss": 2.4309, "step": 1225000 }, { "epoch": 0.93, "learning_rate": 4.766589387090653e-05, "loss": 2.4518, "step": 1225500 }, { "epoch": 0.93, "learning_rate": 4.766494121341128e-05, "loss": 2.4464, "step": 1226000 }, { "epoch": 0.93, "learning_rate": 4.766398855591605e-05, "loss": 2.4406, "step": 1226500 }, { "epoch": 0.94, "learning_rate": 4.76630358984208e-05, "loss": 2.4466, "step": 1227000 }, { "epoch": 0.94, "learning_rate": 4.766208514624055e-05, "loss": 2.4317, "step": 1227500 }, { "epoch": 0.94, "learning_rate": 4.7661132488745306e-05, "loss": 2.4657, "step": 1228000 }, { "epoch": 0.94, "learning_rate": 4.7660179831250064e-05, "loss": 2.4223, "step": 1228500 }, { "epoch": 0.94, "learning_rate": 4.765922717375482e-05, "loss": 2.4664, "step": 1229000 }, { "epoch": 0.94, "learning_rate": 4.7658274516259574e-05, "loss": 2.4872, "step": 1229500 }, { "epoch": 0.94, "learning_rate": 4.765732376407932e-05, "loss": 2.426, "step": 1230000 }, { "epoch": 0.94, "eval_accuracy": 0.545539682023057, "eval_loss": 2.3749537467956543, "eval_runtime": 9421.9591, "eval_samples_per_second": 29.186, "eval_steps_per_second": 7.296, "step": 1230000 }, { "epoch": 0.94, "learning_rate": 4.765637110658409e-05, "loss": 2.4645, "step": 1230500 }, { "epoch": 0.94, "learning_rate": 4.765541844908884e-05, "loss": 2.4384, "step": 1231000 }, { "epoch": 0.94, "learning_rate": 4.76544657915936e-05, "loss": 2.4337, "step": 1231500 }, { "epoch": 0.94, "learning_rate": 4.765351313409836e-05, "loss": 2.4517, "step": 1232000 }, { "epoch": 0.94, "learning_rate": 4.7652560476603114e-05, "loss": 2.44, "step": 1232500 }, { "epoch": 0.94, "learning_rate": 4.765160781910787e-05, "loss": 2.4386, "step": 1233000 }, { "epoch": 0.94, "learning_rate": 4.765065706692762e-05, "loss": 2.4378, "step": 1233500 }, { "epoch": 0.94, "learning_rate": 4.764970440943238e-05, "loss": 2.4887, "step": 1234000 }, { "epoch": 0.94, "learning_rate": 4.764875175193714e-05, "loss": 2.4659, "step": 1234500 }, { "epoch": 0.94, "learning_rate": 4.764779909444189e-05, "loss": 2.424, "step": 1235000 }, { "epoch": 0.94, "learning_rate": 4.7646848342261644e-05, "loss": 2.4355, "step": 1235500 }, { "epoch": 0.94, "learning_rate": 4.76458956847664e-05, "loss": 2.4244, "step": 1236000 }, { "epoch": 0.94, "learning_rate": 4.7644943027271154e-05, "loss": 2.3749, "step": 1236500 }, { "epoch": 0.94, "learning_rate": 4.764399036977591e-05, "loss": 2.4463, "step": 1237000 }, { "epoch": 0.94, "learning_rate": 4.764303771228067e-05, "loss": 2.4514, "step": 1237500 }, { "epoch": 0.94, "learning_rate": 4.764208505478543e-05, "loss": 2.4449, "step": 1238000 }, { "epoch": 0.94, "learning_rate": 4.764113430260518e-05, "loss": 2.4579, "step": 1238500 }, { "epoch": 0.94, "learning_rate": 4.7640181645109935e-05, "loss": 2.4395, "step": 1239000 }, { "epoch": 0.94, "learning_rate": 4.763922898761469e-05, "loss": 2.4286, "step": 1239500 }, { "epoch": 0.95, "learning_rate": 4.763827633011945e-05, "loss": 2.4426, "step": 1240000 }, { "epoch": 0.95, "learning_rate": 4.763732367262421e-05, "loss": 2.4494, "step": 1240500 }, { "epoch": 0.95, "learning_rate": 4.763637101512897e-05, "loss": 2.4401, "step": 1241000 }, { "epoch": 0.95, "learning_rate": 4.763541835763372e-05, "loss": 2.4457, "step": 1241500 }, { "epoch": 0.95, "learning_rate": 4.763446570013848e-05, "loss": 2.4725, "step": 1242000 }, { "epoch": 0.95, "learning_rate": 4.763351494795823e-05, "loss": 2.4496, "step": 1242500 }, { "epoch": 0.95, "learning_rate": 4.763256419577798e-05, "loss": 2.4385, "step": 1243000 }, { "epoch": 0.95, "learning_rate": 4.763161153828273e-05, "loss": 2.4381, "step": 1243500 }, { "epoch": 0.95, "learning_rate": 4.763065888078749e-05, "loss": 2.4714, "step": 1244000 }, { "epoch": 0.95, "learning_rate": 4.762970622329225e-05, "loss": 2.4348, "step": 1244500 }, { "epoch": 0.95, "learning_rate": 4.762875356579701e-05, "loss": 2.4444, "step": 1245000 }, { "epoch": 0.95, "learning_rate": 4.7627804718931746e-05, "loss": 2.4591, "step": 1245500 }, { "epoch": 0.95, "learning_rate": 4.7626852061436504e-05, "loss": 2.4604, "step": 1246000 }, { "epoch": 0.95, "learning_rate": 4.762589940394126e-05, "loss": 2.4265, "step": 1246500 }, { "epoch": 0.95, "learning_rate": 4.762494674644602e-05, "loss": 2.4375, "step": 1247000 }, { "epoch": 0.95, "learning_rate": 4.762399408895077e-05, "loss": 2.4498, "step": 1247500 }, { "epoch": 0.95, "learning_rate": 4.762304143145554e-05, "loss": 2.4491, "step": 1248000 }, { "epoch": 0.95, "learning_rate": 4.762208877396029e-05, "loss": 2.4241, "step": 1248500 }, { "epoch": 0.95, "learning_rate": 4.762113611646505e-05, "loss": 2.4576, "step": 1249000 }, { "epoch": 0.95, "learning_rate": 4.7620183458969805e-05, "loss": 2.4299, "step": 1249500 }, { "epoch": 0.95, "learning_rate": 4.7619230801474564e-05, "loss": 2.4504, "step": 1250000 }, { "epoch": 0.95, "learning_rate": 4.761827814397932e-05, "loss": 2.477, "step": 1250500 }, { "epoch": 0.95, "learning_rate": 4.7617325486484074e-05, "loss": 2.4614, "step": 1251000 }, { "epoch": 0.95, "learning_rate": 4.761637282898884e-05, "loss": 2.4511, "step": 1251500 }, { "epoch": 0.95, "learning_rate": 4.761542207680859e-05, "loss": 2.4488, "step": 1252000 }, { "epoch": 0.95, "learning_rate": 4.7614471324628335e-05, "loss": 2.4652, "step": 1252500 }, { "epoch": 0.95, "learning_rate": 4.761351866713309e-05, "loss": 2.445, "step": 1253000 }, { "epoch": 0.96, "learning_rate": 4.761256600963785e-05, "loss": 2.45, "step": 1253500 }, { "epoch": 0.96, "learning_rate": 4.76116133521426e-05, "loss": 2.4297, "step": 1254000 }, { "epoch": 0.96, "learning_rate": 4.761066069464736e-05, "loss": 2.4438, "step": 1254500 }, { "epoch": 0.96, "learning_rate": 4.760970994246711e-05, "loss": 2.4604, "step": 1255000 }, { "epoch": 0.96, "learning_rate": 4.760875728497187e-05, "loss": 2.4621, "step": 1255500 }, { "epoch": 0.96, "learning_rate": 4.7607804627476626e-05, "loss": 2.4478, "step": 1256000 }, { "epoch": 0.96, "learning_rate": 4.760685196998138e-05, "loss": 2.4338, "step": 1256500 }, { "epoch": 0.96, "learning_rate": 4.760589931248614e-05, "loss": 2.4517, "step": 1257000 }, { "epoch": 0.96, "learning_rate": 4.760494856030589e-05, "loss": 2.4592, "step": 1257500 }, { "epoch": 0.96, "learning_rate": 4.760399590281064e-05, "loss": 2.4118, "step": 1258000 }, { "epoch": 0.96, "learning_rate": 4.76030432453154e-05, "loss": 2.4349, "step": 1258500 }, { "epoch": 0.96, "learning_rate": 4.7602090587820166e-05, "loss": 2.443, "step": 1259000 }, { "epoch": 0.96, "learning_rate": 4.760113793032492e-05, "loss": 2.4299, "step": 1259500 }, { "epoch": 0.96, "learning_rate": 4.7600185272829676e-05, "loss": 2.4209, "step": 1260000 }, { "epoch": 0.96, "eval_accuracy": 0.5460106632076834, "eval_loss": 2.3717141151428223, "eval_runtime": 9429.812, "eval_samples_per_second": 29.162, "eval_steps_per_second": 7.29, "step": 1260000 }, { "epoch": 0.96, "learning_rate": 4.7599232615334434e-05, "loss": 2.4372, "step": 1260500 }, { "epoch": 0.96, "learning_rate": 4.759827995783919e-05, "loss": 2.4361, "step": 1261000 }, { "epoch": 0.96, "learning_rate": 4.759732730034395e-05, "loss": 2.4553, "step": 1261500 }, { "epoch": 0.96, "learning_rate": 4.75963765481637e-05, "loss": 2.4358, "step": 1262000 }, { "epoch": 0.96, "learning_rate": 4.759542389066846e-05, "loss": 2.4493, "step": 1262500 }, { "epoch": 0.96, "learning_rate": 4.7594473138488206e-05, "loss": 2.4218, "step": 1263000 }, { "epoch": 0.96, "learning_rate": 4.759352048099296e-05, "loss": 2.4555, "step": 1263500 }, { "epoch": 0.96, "learning_rate": 4.759256782349772e-05, "loss": 2.462, "step": 1264000 }, { "epoch": 0.96, "learning_rate": 4.7591615166002474e-05, "loss": 2.4469, "step": 1264500 }, { "epoch": 0.96, "learning_rate": 4.759066250850723e-05, "loss": 2.453, "step": 1265000 }, { "epoch": 0.96, "learning_rate": 4.758970985101199e-05, "loss": 2.4316, "step": 1265500 }, { "epoch": 0.96, "learning_rate": 4.758875719351675e-05, "loss": 2.4283, "step": 1266000 }, { "epoch": 0.97, "learning_rate": 4.75878064413365e-05, "loss": 2.4142, "step": 1266500 }, { "epoch": 0.97, "learning_rate": 4.7586853783841255e-05, "loss": 2.4259, "step": 1267000 }, { "epoch": 0.97, "learning_rate": 4.7585901126346013e-05, "loss": 2.4472, "step": 1267500 }, { "epoch": 0.97, "learning_rate": 4.758494846885077e-05, "loss": 2.4415, "step": 1268000 }, { "epoch": 0.97, "learning_rate": 4.758399771667052e-05, "loss": 2.4481, "step": 1268500 }, { "epoch": 0.97, "learning_rate": 4.758304505917527e-05, "loss": 2.4556, "step": 1269000 }, { "epoch": 0.97, "learning_rate": 4.7582092401680037e-05, "loss": 2.4314, "step": 1269500 }, { "epoch": 0.97, "learning_rate": 4.758113974418479e-05, "loss": 2.4641, "step": 1270000 }, { "epoch": 0.97, "learning_rate": 4.7580187086689546e-05, "loss": 2.4439, "step": 1270500 }, { "epoch": 0.97, "learning_rate": 4.757923442919431e-05, "loss": 2.4514, "step": 1271000 }, { "epoch": 0.97, "learning_rate": 4.757828177169906e-05, "loss": 2.4243, "step": 1271500 }, { "epoch": 0.97, "learning_rate": 4.757732911420382e-05, "loss": 2.4347, "step": 1272000 }, { "epoch": 0.97, "learning_rate": 4.757637836202357e-05, "loss": 2.448, "step": 1272500 }, { "epoch": 0.97, "learning_rate": 4.757542570452833e-05, "loss": 2.4335, "step": 1273000 }, { "epoch": 0.97, "learning_rate": 4.7574474952348076e-05, "loss": 2.4376, "step": 1273500 }, { "epoch": 0.97, "learning_rate": 4.757352229485283e-05, "loss": 2.4371, "step": 1274000 }, { "epoch": 0.97, "learning_rate": 4.7572569637357586e-05, "loss": 2.4221, "step": 1274500 }, { "epoch": 0.97, "learning_rate": 4.757161697986235e-05, "loss": 2.429, "step": 1275000 }, { "epoch": 0.97, "learning_rate": 4.75706643223671e-05, "loss": 2.4395, "step": 1275500 }, { "epoch": 0.97, "learning_rate": 4.756971166487186e-05, "loss": 2.4272, "step": 1276000 }, { "epoch": 0.97, "learning_rate": 4.756876091269161e-05, "loss": 2.4779, "step": 1276500 }, { "epoch": 0.97, "learning_rate": 4.756780825519637e-05, "loss": 2.4485, "step": 1277000 }, { "epoch": 0.97, "learning_rate": 4.7566855597701126e-05, "loss": 2.4643, "step": 1277500 }, { "epoch": 0.97, "learning_rate": 4.7565902940205884e-05, "loss": 2.424, "step": 1278000 }, { "epoch": 0.97, "learning_rate": 4.756495028271064e-05, "loss": 2.4251, "step": 1278500 }, { "epoch": 0.97, "learning_rate": 4.756399953053039e-05, "loss": 2.4298, "step": 1279000 }, { "epoch": 0.98, "learning_rate": 4.756304687303514e-05, "loss": 2.4454, "step": 1279500 }, { "epoch": 0.98, "learning_rate": 4.756209421553991e-05, "loss": 2.4731, "step": 1280000 }, { "epoch": 0.98, "learning_rate": 4.7561141558044665e-05, "loss": 2.3861, "step": 1280500 }, { "epoch": 0.98, "learning_rate": 4.756018890054942e-05, "loss": 2.4305, "step": 1281000 }, { "epoch": 0.98, "learning_rate": 4.7559236243054175e-05, "loss": 2.4326, "step": 1281500 }, { "epoch": 0.98, "learning_rate": 4.7558283585558933e-05, "loss": 2.4333, "step": 1282000 }, { "epoch": 0.98, "learning_rate": 4.755733092806369e-05, "loss": 2.4663, "step": 1282500 }, { "epoch": 0.98, "learning_rate": 4.755637827056845e-05, "loss": 2.4284, "step": 1283000 }, { "epoch": 0.98, "learning_rate": 4.75554275183882e-05, "loss": 2.4206, "step": 1283500 }, { "epoch": 0.98, "learning_rate": 4.7554474860892957e-05, "loss": 2.4531, "step": 1284000 }, { "epoch": 0.98, "learning_rate": 4.755352220339771e-05, "loss": 2.4535, "step": 1284500 }, { "epoch": 0.98, "learning_rate": 4.755256954590247e-05, "loss": 2.4424, "step": 1285000 }, { "epoch": 0.98, "learning_rate": 4.755161688840723e-05, "loss": 2.4311, "step": 1285500 }, { "epoch": 0.98, "learning_rate": 4.755066423091198e-05, "loss": 2.443, "step": 1286000 }, { "epoch": 0.98, "learning_rate": 4.754971157341674e-05, "loss": 2.4266, "step": 1286500 }, { "epoch": 0.98, "learning_rate": 4.7548760821236496e-05, "loss": 2.447, "step": 1287000 }, { "epoch": 0.98, "learning_rate": 4.754780816374125e-05, "loss": 2.4502, "step": 1287500 }, { "epoch": 0.98, "learning_rate": 4.7546855506246006e-05, "loss": 2.4308, "step": 1288000 }, { "epoch": 0.98, "learning_rate": 4.7545902848750764e-05, "loss": 2.438, "step": 1288500 }, { "epoch": 0.98, "learning_rate": 4.754495019125552e-05, "loss": 2.4328, "step": 1289000 }, { "epoch": 0.98, "learning_rate": 4.754399943907527e-05, "loss": 2.4774, "step": 1289500 }, { "epoch": 0.98, "learning_rate": 4.754304868689502e-05, "loss": 2.4336, "step": 1290000 }, { "epoch": 0.98, "eval_accuracy": 0.5466836224585419, "eval_loss": 2.367841958999634, "eval_runtime": 9409.8624, "eval_samples_per_second": 29.223, "eval_steps_per_second": 7.306, "step": 1290000 }, { "epoch": 0.98, "learning_rate": 4.754209602939978e-05, "loss": 2.4576, "step": 1290500 }, { "epoch": 0.98, "learning_rate": 4.7541143371904536e-05, "loss": 2.4288, "step": 1291000 }, { "epoch": 0.98, "learning_rate": 4.754019071440929e-05, "loss": 2.4196, "step": 1291500 }, { "epoch": 0.98, "learning_rate": 4.7539238056914046e-05, "loss": 2.4701, "step": 1292000 }, { "epoch": 0.99, "learning_rate": 4.7538285399418804e-05, "loss": 2.3882, "step": 1292500 }, { "epoch": 0.99, "learning_rate": 4.753733274192356e-05, "loss": 2.4307, "step": 1293000 }, { "epoch": 0.99, "learning_rate": 4.753638008442832e-05, "loss": 2.4462, "step": 1293500 }, { "epoch": 0.99, "learning_rate": 4.753542933224807e-05, "loss": 2.4597, "step": 1294000 }, { "epoch": 0.99, "learning_rate": 4.753447667475283e-05, "loss": 2.4105, "step": 1294500 }, { "epoch": 0.99, "learning_rate": 4.7533524017257585e-05, "loss": 2.4247, "step": 1295000 }, { "epoch": 0.99, "learning_rate": 4.753257135976234e-05, "loss": 2.4193, "step": 1295500 }, { "epoch": 0.99, "learning_rate": 4.75316187022671e-05, "loss": 2.4141, "step": 1296000 }, { "epoch": 0.99, "learning_rate": 4.753066604477185e-05, "loss": 2.4735, "step": 1296500 }, { "epoch": 0.99, "learning_rate": 4.752971338727661e-05, "loss": 2.4149, "step": 1297000 }, { "epoch": 0.99, "learning_rate": 4.752876072978137e-05, "loss": 2.4019, "step": 1297500 }, { "epoch": 0.99, "learning_rate": 4.752780997760112e-05, "loss": 2.4309, "step": 1298000 }, { "epoch": 0.99, "learning_rate": 4.7526857320105877e-05, "loss": 2.4284, "step": 1298500 }, { "epoch": 0.99, "learning_rate": 4.7525904662610635e-05, "loss": 2.4322, "step": 1299000 }, { "epoch": 0.99, "learning_rate": 4.752495200511539e-05, "loss": 2.4123, "step": 1299500 }, { "epoch": 0.99, "learning_rate": 4.752400125293514e-05, "loss": 2.4198, "step": 1300000 }, { "epoch": 0.99, "learning_rate": 4.752304859543989e-05, "loss": 2.4349, "step": 1300500 }, { "epoch": 0.99, "learning_rate": 4.752209593794466e-05, "loss": 2.4353, "step": 1301000 }, { "epoch": 0.99, "learning_rate": 4.7521143280449416e-05, "loss": 2.44, "step": 1301500 }, { "epoch": 0.99, "learning_rate": 4.752019252826916e-05, "loss": 2.4681, "step": 1302000 }, { "epoch": 0.99, "learning_rate": 4.7519241776088906e-05, "loss": 2.4758, "step": 1302500 }, { "epoch": 0.99, "learning_rate": 4.7518289118593664e-05, "loss": 2.4167, "step": 1303000 }, { "epoch": 0.99, "learning_rate": 4.751733646109842e-05, "loss": 2.43, "step": 1303500 }, { "epoch": 0.99, "learning_rate": 4.751638380360318e-05, "loss": 2.4617, "step": 1304000 }, { "epoch": 0.99, "learning_rate": 4.751543114610794e-05, "loss": 2.38, "step": 1304500 }, { "epoch": 0.99, "learning_rate": 4.75144784886127e-05, "loss": 2.4408, "step": 1305000 }, { "epoch": 0.99, "learning_rate": 4.7513527736432446e-05, "loss": 2.4388, "step": 1305500 }, { "epoch": 1.0, "learning_rate": 4.7512575078937204e-05, "loss": 2.4548, "step": 1306000 }, { "epoch": 1.0, "learning_rate": 4.751162242144196e-05, "loss": 2.4251, "step": 1306500 }, { "epoch": 1.0, "learning_rate": 4.751066976394672e-05, "loss": 2.4523, "step": 1307000 }, { "epoch": 1.0, "learning_rate": 4.750971710645147e-05, "loss": 2.4168, "step": 1307500 }, { "epoch": 1.0, "learning_rate": 4.750876444895623e-05, "loss": 2.4403, "step": 1308000 }, { "epoch": 1.0, "learning_rate": 4.750781179146099e-05, "loss": 2.4531, "step": 1308500 }, { "epoch": 1.0, "learning_rate": 4.750686103928074e-05, "loss": 2.4373, "step": 1309000 }, { "epoch": 1.0, "learning_rate": 4.7505908381785495e-05, "loss": 2.4364, "step": 1309500 }, { "epoch": 1.0, "learning_rate": 4.7504955724290254e-05, "loss": 2.4515, "step": 1310000 }, { "epoch": 1.0, "learning_rate": 4.750400306679501e-05, "loss": 2.446, "step": 1310500 }, { "epoch": 1.0, "learning_rate": 4.750305040929977e-05, "loss": 2.4509, "step": 1311000 }, { "epoch": 1.0, "learning_rate": 4.750209775180453e-05, "loss": 2.4453, "step": 1311500 }, { "epoch": 1.0, "learning_rate": 4.750114509430929e-05, "loss": 2.4448, "step": 1312000 } ], "max_steps": 26242380, "num_train_epochs": 20, "total_flos": 9.748521040622911e+18, "trial_name": null, "trial_params": null }