{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9996485441617778, "eval_steps": 500, "global_step": 2311, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0004325610316580605, "grad_norm": 10.074225425720215, "learning_rate": 8.000000000000001e-07, "loss": 3.4502, "step": 1 }, { "epoch": 0.000865122063316121, "grad_norm": 10.193795204162598, "learning_rate": 1.6000000000000001e-06, "loss": 3.4309, "step": 2 }, { "epoch": 0.0012976830949741815, "grad_norm": 9.579278945922852, "learning_rate": 2.4000000000000003e-06, "loss": 3.4075, "step": 3 }, { "epoch": 0.001730244126632242, "grad_norm": 9.713829040527344, "learning_rate": 3.2000000000000003e-06, "loss": 3.4737, "step": 4 }, { "epoch": 0.0021628051582903026, "grad_norm": 9.49286937713623, "learning_rate": 4.000000000000001e-06, "loss": 3.4149, "step": 5 }, { "epoch": 0.002595366189948363, "grad_norm": 10.680686950683594, "learning_rate": 4.800000000000001e-06, "loss": 3.4793, "step": 6 }, { "epoch": 0.0030279272216064235, "grad_norm": 10.119192123413086, "learning_rate": 5.600000000000001e-06, "loss": 3.3707, "step": 7 }, { "epoch": 0.003460488253264484, "grad_norm": 10.217940330505371, "learning_rate": 6.4000000000000006e-06, "loss": 3.4339, "step": 8 }, { "epoch": 0.0038930492849225447, "grad_norm": 10.2111234664917, "learning_rate": 7.2000000000000005e-06, "loss": 3.3944, "step": 9 }, { "epoch": 0.004325610316580605, "grad_norm": 9.485647201538086, "learning_rate": 8.000000000000001e-06, "loss": 3.3145, "step": 10 }, { "epoch": 0.004758171348238666, "grad_norm": 10.536123275756836, "learning_rate": 8.8e-06, "loss": 3.37, "step": 11 }, { "epoch": 0.005190732379896726, "grad_norm": 10.602726936340332, "learning_rate": 9.600000000000001e-06, "loss": 3.4304, "step": 12 }, { "epoch": 0.0056232934115547865, "grad_norm": 10.237251281738281, "learning_rate": 1.04e-05, "loss": 3.3628, "step": 13 }, { "epoch": 0.006055854443212847, "grad_norm": 10.820761680603027, "learning_rate": 1.1200000000000001e-05, "loss": 3.4281, "step": 14 }, { "epoch": 0.006488415474870907, "grad_norm": 10.799651145935059, "learning_rate": 1.2e-05, "loss": 3.4524, "step": 15 }, { "epoch": 0.006920976506528968, "grad_norm": 11.567938804626465, "learning_rate": 1.2800000000000001e-05, "loss": 3.4891, "step": 16 }, { "epoch": 0.007353537538187028, "grad_norm": 11.040711402893066, "learning_rate": 1.3600000000000002e-05, "loss": 3.3775, "step": 17 }, { "epoch": 0.0077860985698450895, "grad_norm": 10.833939552307129, "learning_rate": 1.4400000000000001e-05, "loss": 3.4044, "step": 18 }, { "epoch": 0.008218659601503149, "grad_norm": 11.41971206665039, "learning_rate": 1.5200000000000002e-05, "loss": 3.4054, "step": 19 }, { "epoch": 0.00865122063316121, "grad_norm": 11.41285514831543, "learning_rate": 1.6000000000000003e-05, "loss": 3.2957, "step": 20 }, { "epoch": 0.00908378166481927, "grad_norm": 11.056841850280762, "learning_rate": 1.6800000000000002e-05, "loss": 3.3388, "step": 21 }, { "epoch": 0.009516342696477331, "grad_norm": 11.64125919342041, "learning_rate": 1.76e-05, "loss": 3.3148, "step": 22 }, { "epoch": 0.00994890372813539, "grad_norm": 11.749085426330566, "learning_rate": 1.8400000000000003e-05, "loss": 3.3344, "step": 23 }, { "epoch": 0.010381464759793452, "grad_norm": 10.946850776672363, "learning_rate": 1.9200000000000003e-05, "loss": 3.3136, "step": 24 }, { "epoch": 0.010814025791451513, "grad_norm": 11.453843116760254, "learning_rate": 2e-05, "loss": 3.2952, "step": 25 }, { "epoch": 0.011246586823109573, "grad_norm": 10.720843315124512, "learning_rate": 1.99912510936133e-05, "loss": 3.3089, "step": 26 }, { "epoch": 0.011679147854767634, "grad_norm": 10.230990409851074, "learning_rate": 1.9982502187226597e-05, "loss": 3.1701, "step": 27 }, { "epoch": 0.012111708886425694, "grad_norm": 10.450053215026855, "learning_rate": 1.9973753280839896e-05, "loss": 3.3022, "step": 28 }, { "epoch": 0.012544269918083755, "grad_norm": 9.970443725585938, "learning_rate": 1.9965004374453195e-05, "loss": 3.1913, "step": 29 }, { "epoch": 0.012976830949741815, "grad_norm": 8.793647766113281, "learning_rate": 1.9956255468066494e-05, "loss": 3.1559, "step": 30 }, { "epoch": 0.013409391981399876, "grad_norm": 9.256972312927246, "learning_rate": 1.9947506561679793e-05, "loss": 3.0502, "step": 31 }, { "epoch": 0.013841953013057936, "grad_norm": 9.748438835144043, "learning_rate": 1.993875765529309e-05, "loss": 3.1057, "step": 32 }, { "epoch": 0.014274514044715997, "grad_norm": 9.952435493469238, "learning_rate": 1.993000874890639e-05, "loss": 3.0494, "step": 33 }, { "epoch": 0.014707075076374056, "grad_norm": 9.690461158752441, "learning_rate": 1.9921259842519688e-05, "loss": 3.0637, "step": 34 }, { "epoch": 0.015139636108032118, "grad_norm": 9.847665786743164, "learning_rate": 1.9912510936132984e-05, "loss": 3.039, "step": 35 }, { "epoch": 0.015572197139690179, "grad_norm": 8.799947738647461, "learning_rate": 1.9903762029746283e-05, "loss": 3.0492, "step": 36 }, { "epoch": 0.01600475817134824, "grad_norm": 9.004206657409668, "learning_rate": 1.9895013123359582e-05, "loss": 2.9891, "step": 37 }, { "epoch": 0.016437319203006298, "grad_norm": 9.604561805725098, "learning_rate": 1.9886264216972878e-05, "loss": 2.9614, "step": 38 }, { "epoch": 0.01686988023466436, "grad_norm": 9.404193878173828, "learning_rate": 1.9877515310586177e-05, "loss": 2.9704, "step": 39 }, { "epoch": 0.01730244126632242, "grad_norm": 10.134177207946777, "learning_rate": 1.9868766404199476e-05, "loss": 2.8793, "step": 40 }, { "epoch": 0.01773500229798048, "grad_norm": 8.96322250366211, "learning_rate": 1.9860017497812775e-05, "loss": 2.913, "step": 41 }, { "epoch": 0.01816756332963854, "grad_norm": 8.704127311706543, "learning_rate": 1.9851268591426075e-05, "loss": 2.8886, "step": 42 }, { "epoch": 0.018600124361296603, "grad_norm": 9.069153785705566, "learning_rate": 1.984251968503937e-05, "loss": 2.8725, "step": 43 }, { "epoch": 0.019032685392954662, "grad_norm": 9.031394004821777, "learning_rate": 1.983377077865267e-05, "loss": 2.8581, "step": 44 }, { "epoch": 0.019465246424612722, "grad_norm": 8.829975128173828, "learning_rate": 1.982502187226597e-05, "loss": 2.8044, "step": 45 }, { "epoch": 0.01989780745627078, "grad_norm": 8.565601348876953, "learning_rate": 1.9816272965879265e-05, "loss": 2.8094, "step": 46 }, { "epoch": 0.020330368487928845, "grad_norm": 8.82313060760498, "learning_rate": 1.9807524059492564e-05, "loss": 2.7984, "step": 47 }, { "epoch": 0.020762929519586904, "grad_norm": 9.0712251663208, "learning_rate": 1.9798775153105863e-05, "loss": 2.6778, "step": 48 }, { "epoch": 0.021195490551244964, "grad_norm": 9.187145233154297, "learning_rate": 1.9790026246719162e-05, "loss": 2.7568, "step": 49 }, { "epoch": 0.021628051582903027, "grad_norm": 9.249820709228516, "learning_rate": 1.978127734033246e-05, "loss": 2.7787, "step": 50 }, { "epoch": 0.022060612614561086, "grad_norm": 8.687273025512695, "learning_rate": 1.977252843394576e-05, "loss": 2.7557, "step": 51 }, { "epoch": 0.022493173646219146, "grad_norm": 9.046874046325684, "learning_rate": 1.9763779527559057e-05, "loss": 2.6655, "step": 52 }, { "epoch": 0.022925734677877205, "grad_norm": 9.147759437561035, "learning_rate": 1.9755030621172356e-05, "loss": 2.6384, "step": 53 }, { "epoch": 0.02335829570953527, "grad_norm": 8.494421005249023, "learning_rate": 1.9746281714785655e-05, "loss": 2.668, "step": 54 }, { "epoch": 0.023790856741193328, "grad_norm": 9.623828887939453, "learning_rate": 1.973753280839895e-05, "loss": 2.5583, "step": 55 }, { "epoch": 0.024223417772851388, "grad_norm": 8.837956428527832, "learning_rate": 1.972878390201225e-05, "loss": 2.657, "step": 56 }, { "epoch": 0.024655978804509447, "grad_norm": 9.294939041137695, "learning_rate": 1.972003499562555e-05, "loss": 2.5761, "step": 57 }, { "epoch": 0.02508853983616751, "grad_norm": 9.436785697937012, "learning_rate": 1.9711286089238845e-05, "loss": 2.5929, "step": 58 }, { "epoch": 0.02552110086782557, "grad_norm": 8.875762939453125, "learning_rate": 1.9702537182852148e-05, "loss": 2.6005, "step": 59 }, { "epoch": 0.02595366189948363, "grad_norm": 8.302167892456055, "learning_rate": 1.9693788276465443e-05, "loss": 2.5536, "step": 60 }, { "epoch": 0.026386222931141692, "grad_norm": 8.504595756530762, "learning_rate": 1.9685039370078743e-05, "loss": 2.479, "step": 61 }, { "epoch": 0.026818783962799752, "grad_norm": 9.210023880004883, "learning_rate": 1.9676290463692042e-05, "loss": 2.4938, "step": 62 }, { "epoch": 0.02725134499445781, "grad_norm": 8.98862075805664, "learning_rate": 1.9667541557305338e-05, "loss": 2.4786, "step": 63 }, { "epoch": 0.02768390602611587, "grad_norm": 8.775471687316895, "learning_rate": 1.9658792650918637e-05, "loss": 2.4588, "step": 64 }, { "epoch": 0.028116467057773934, "grad_norm": 8.455652236938477, "learning_rate": 1.9650043744531936e-05, "loss": 2.5419, "step": 65 }, { "epoch": 0.028549028089431994, "grad_norm": 8.13263988494873, "learning_rate": 1.9641294838145232e-05, "loss": 2.4738, "step": 66 }, { "epoch": 0.028981589121090053, "grad_norm": 8.96052360534668, "learning_rate": 1.963254593175853e-05, "loss": 2.3496, "step": 67 }, { "epoch": 0.029414150152748113, "grad_norm": 8.10035228729248, "learning_rate": 1.962379702537183e-05, "loss": 2.4487, "step": 68 }, { "epoch": 0.029846711184406176, "grad_norm": 8.069640159606934, "learning_rate": 1.961504811898513e-05, "loss": 2.4545, "step": 69 }, { "epoch": 0.030279272216064235, "grad_norm": 8.711186408996582, "learning_rate": 1.960629921259843e-05, "loss": 2.3936, "step": 70 }, { "epoch": 0.030711833247722295, "grad_norm": 9.187376022338867, "learning_rate": 1.9597550306211725e-05, "loss": 2.2613, "step": 71 }, { "epoch": 0.031144394279380358, "grad_norm": 8.515965461730957, "learning_rate": 1.9588801399825024e-05, "loss": 2.4022, "step": 72 }, { "epoch": 0.031576955311038414, "grad_norm": 8.901565551757812, "learning_rate": 1.9580052493438323e-05, "loss": 2.3848, "step": 73 }, { "epoch": 0.03200951634269648, "grad_norm": 8.479150772094727, "learning_rate": 1.957130358705162e-05, "loss": 2.2304, "step": 74 }, { "epoch": 0.03244207737435454, "grad_norm": 8.931798934936523, "learning_rate": 1.9562554680664918e-05, "loss": 2.3045, "step": 75 }, { "epoch": 0.032874638406012596, "grad_norm": 8.757261276245117, "learning_rate": 1.9553805774278217e-05, "loss": 2.3887, "step": 76 }, { "epoch": 0.03330719943767066, "grad_norm": 8.421159744262695, "learning_rate": 1.9545056867891513e-05, "loss": 2.3631, "step": 77 }, { "epoch": 0.03373976046932872, "grad_norm": 8.453868865966797, "learning_rate": 1.9536307961504816e-05, "loss": 2.1783, "step": 78 }, { "epoch": 0.03417232150098678, "grad_norm": 8.190146446228027, "learning_rate": 1.952755905511811e-05, "loss": 2.3526, "step": 79 }, { "epoch": 0.03460488253264484, "grad_norm": 9.044288635253906, "learning_rate": 1.951881014873141e-05, "loss": 2.2243, "step": 80 }, { "epoch": 0.0350374435643029, "grad_norm": 7.598982334136963, "learning_rate": 1.951006124234471e-05, "loss": 2.3016, "step": 81 }, { "epoch": 0.03547000459596096, "grad_norm": 8.639673233032227, "learning_rate": 1.9501312335958006e-05, "loss": 2.2283, "step": 82 }, { "epoch": 0.035902565627619024, "grad_norm": 8.017471313476562, "learning_rate": 1.9492563429571305e-05, "loss": 2.2479, "step": 83 }, { "epoch": 0.03633512665927708, "grad_norm": 8.051676750183105, "learning_rate": 1.9483814523184604e-05, "loss": 2.2333, "step": 84 }, { "epoch": 0.03676768769093514, "grad_norm": 9.102787971496582, "learning_rate": 1.94750656167979e-05, "loss": 2.1107, "step": 85 }, { "epoch": 0.037200248722593206, "grad_norm": 7.457028388977051, "learning_rate": 1.94663167104112e-05, "loss": 2.307, "step": 86 }, { "epoch": 0.03763280975425126, "grad_norm": 8.147334098815918, "learning_rate": 1.9457567804024498e-05, "loss": 2.1986, "step": 87 }, { "epoch": 0.038065370785909325, "grad_norm": 8.232810974121094, "learning_rate": 1.9448818897637797e-05, "loss": 2.1094, "step": 88 }, { "epoch": 0.03849793181756739, "grad_norm": 8.264384269714355, "learning_rate": 1.9440069991251097e-05, "loss": 2.2574, "step": 89 }, { "epoch": 0.038930492849225444, "grad_norm": 8.394896507263184, "learning_rate": 1.9431321084864392e-05, "loss": 2.2311, "step": 90 }, { "epoch": 0.03936305388088351, "grad_norm": 8.641265869140625, "learning_rate": 1.9422572178477692e-05, "loss": 2.1939, "step": 91 }, { "epoch": 0.03979561491254156, "grad_norm": 7.687323570251465, "learning_rate": 1.941382327209099e-05, "loss": 2.1942, "step": 92 }, { "epoch": 0.040228175944199626, "grad_norm": 7.773819923400879, "learning_rate": 1.9405074365704287e-05, "loss": 2.2004, "step": 93 }, { "epoch": 0.04066073697585769, "grad_norm": 8.127384185791016, "learning_rate": 1.9396325459317586e-05, "loss": 2.1736, "step": 94 }, { "epoch": 0.041093298007515745, "grad_norm": 9.069780349731445, "learning_rate": 1.9387576552930885e-05, "loss": 2.0747, "step": 95 }, { "epoch": 0.04152585903917381, "grad_norm": 7.772279739379883, "learning_rate": 1.9378827646544184e-05, "loss": 2.1285, "step": 96 }, { "epoch": 0.04195842007083187, "grad_norm": 8.430010795593262, "learning_rate": 1.937007874015748e-05, "loss": 2.2845, "step": 97 }, { "epoch": 0.04239098110248993, "grad_norm": 7.560873508453369, "learning_rate": 1.9361329833770783e-05, "loss": 2.1905, "step": 98 }, { "epoch": 0.04282354213414799, "grad_norm": 9.867399215698242, "learning_rate": 1.935258092738408e-05, "loss": 2.1375, "step": 99 }, { "epoch": 0.043256103165806054, "grad_norm": 9.414137840270996, "learning_rate": 1.9343832020997378e-05, "loss": 2.059, "step": 100 }, { "epoch": 0.04368866419746411, "grad_norm": 8.068355560302734, "learning_rate": 1.9335083114610677e-05, "loss": 2.1939, "step": 101 }, { "epoch": 0.04412122522912217, "grad_norm": 7.778262615203857, "learning_rate": 1.9326334208223973e-05, "loss": 2.2688, "step": 102 }, { "epoch": 0.04455378626078023, "grad_norm": 8.01554012298584, "learning_rate": 1.9317585301837272e-05, "loss": 2.1017, "step": 103 }, { "epoch": 0.04498634729243829, "grad_norm": 7.774407386779785, "learning_rate": 1.930883639545057e-05, "loss": 2.0725, "step": 104 }, { "epoch": 0.045418908324096355, "grad_norm": 8.284996032714844, "learning_rate": 1.9300087489063867e-05, "loss": 2.0856, "step": 105 }, { "epoch": 0.04585146935575441, "grad_norm": 10.201037406921387, "learning_rate": 1.9291338582677166e-05, "loss": 2.3246, "step": 106 }, { "epoch": 0.046284030387412474, "grad_norm": 7.818156719207764, "learning_rate": 1.9282589676290465e-05, "loss": 2.041, "step": 107 }, { "epoch": 0.04671659141907054, "grad_norm": 10.078817367553711, "learning_rate": 1.9273840769903765e-05, "loss": 2.0881, "step": 108 }, { "epoch": 0.04714915245072859, "grad_norm": 7.91151762008667, "learning_rate": 1.9265091863517064e-05, "loss": 2.0693, "step": 109 }, { "epoch": 0.047581713482386656, "grad_norm": 8.433507919311523, "learning_rate": 1.925634295713036e-05, "loss": 2.1478, "step": 110 }, { "epoch": 0.04801427451404472, "grad_norm": 8.348559379577637, "learning_rate": 1.924759405074366e-05, "loss": 2.0982, "step": 111 }, { "epoch": 0.048446835545702775, "grad_norm": 8.86949348449707, "learning_rate": 1.9238845144356958e-05, "loss": 2.0297, "step": 112 }, { "epoch": 0.04887939657736084, "grad_norm": 8.434800148010254, "learning_rate": 1.9230096237970254e-05, "loss": 2.1681, "step": 113 }, { "epoch": 0.049311957609018894, "grad_norm": 9.036213874816895, "learning_rate": 1.9221347331583553e-05, "loss": 2.047, "step": 114 }, { "epoch": 0.04974451864067696, "grad_norm": 9.136820793151855, "learning_rate": 1.9212598425196852e-05, "loss": 2.0121, "step": 115 }, { "epoch": 0.05017707967233502, "grad_norm": 10.170919418334961, "learning_rate": 1.9203849518810148e-05, "loss": 2.1075, "step": 116 }, { "epoch": 0.050609640703993077, "grad_norm": 8.642443656921387, "learning_rate": 1.919510061242345e-05, "loss": 2.1603, "step": 117 }, { "epoch": 0.05104220173565114, "grad_norm": 8.400376319885254, "learning_rate": 1.9186351706036747e-05, "loss": 2.0528, "step": 118 }, { "epoch": 0.0514747627673092, "grad_norm": 10.212697982788086, "learning_rate": 1.9177602799650046e-05, "loss": 2.0214, "step": 119 }, { "epoch": 0.05190732379896726, "grad_norm": 8.315373420715332, "learning_rate": 1.9168853893263345e-05, "loss": 2.0902, "step": 120 }, { "epoch": 0.05233988483062532, "grad_norm": 9.261432647705078, "learning_rate": 1.916010498687664e-05, "loss": 1.9828, "step": 121 }, { "epoch": 0.052772445862283385, "grad_norm": 9.281742095947266, "learning_rate": 1.915135608048994e-05, "loss": 2.0072, "step": 122 }, { "epoch": 0.05320500689394144, "grad_norm": 9.036360740661621, "learning_rate": 1.914260717410324e-05, "loss": 2.0366, "step": 123 }, { "epoch": 0.053637567925599504, "grad_norm": 9.62345027923584, "learning_rate": 1.9133858267716535e-05, "loss": 2.0936, "step": 124 }, { "epoch": 0.05407012895725756, "grad_norm": 8.963865280151367, "learning_rate": 1.9125109361329834e-05, "loss": 1.9454, "step": 125 }, { "epoch": 0.05450268998891562, "grad_norm": 9.449174880981445, "learning_rate": 1.9116360454943133e-05, "loss": 2.0623, "step": 126 }, { "epoch": 0.054935251020573686, "grad_norm": 9.598356246948242, "learning_rate": 1.9107611548556433e-05, "loss": 2.0225, "step": 127 }, { "epoch": 0.05536781205223174, "grad_norm": 8.742138862609863, "learning_rate": 1.9098862642169732e-05, "loss": 2.0985, "step": 128 }, { "epoch": 0.055800373083889805, "grad_norm": 9.00941276550293, "learning_rate": 1.9090113735783028e-05, "loss": 1.9905, "step": 129 }, { "epoch": 0.05623293411554787, "grad_norm": 10.214828491210938, "learning_rate": 1.9081364829396327e-05, "loss": 2.0573, "step": 130 }, { "epoch": 0.056665495147205924, "grad_norm": 8.646235466003418, "learning_rate": 1.9072615923009626e-05, "loss": 1.9669, "step": 131 }, { "epoch": 0.05709805617886399, "grad_norm": 9.02608585357666, "learning_rate": 1.9063867016622922e-05, "loss": 1.9952, "step": 132 }, { "epoch": 0.05753061721052205, "grad_norm": 9.741292953491211, "learning_rate": 1.905511811023622e-05, "loss": 2.0118, "step": 133 }, { "epoch": 0.057963178242180106, "grad_norm": 9.34339427947998, "learning_rate": 1.904636920384952e-05, "loss": 2.0352, "step": 134 }, { "epoch": 0.05839573927383817, "grad_norm": 9.542576789855957, "learning_rate": 1.9037620297462816e-05, "loss": 2.0067, "step": 135 }, { "epoch": 0.058828300305496226, "grad_norm": 9.324414253234863, "learning_rate": 1.902887139107612e-05, "loss": 2.0344, "step": 136 }, { "epoch": 0.05926086133715429, "grad_norm": 10.529446601867676, "learning_rate": 1.9020122484689415e-05, "loss": 1.905, "step": 137 }, { "epoch": 0.05969342236881235, "grad_norm": 9.371881484985352, "learning_rate": 1.9011373578302714e-05, "loss": 1.9717, "step": 138 }, { "epoch": 0.06012598340047041, "grad_norm": 10.534412384033203, "learning_rate": 1.9002624671916013e-05, "loss": 2.1448, "step": 139 }, { "epoch": 0.06055854443212847, "grad_norm": 9.028956413269043, "learning_rate": 1.899387576552931e-05, "loss": 2.0132, "step": 140 }, { "epoch": 0.060991105463786534, "grad_norm": 9.384864807128906, "learning_rate": 1.8985126859142608e-05, "loss": 1.9173, "step": 141 }, { "epoch": 0.06142366649544459, "grad_norm": 9.299654960632324, "learning_rate": 1.8976377952755907e-05, "loss": 1.9669, "step": 142 }, { "epoch": 0.06185622752710265, "grad_norm": 10.067913055419922, "learning_rate": 1.8967629046369206e-05, "loss": 1.9765, "step": 143 }, { "epoch": 0.062288788558760716, "grad_norm": 10.266395568847656, "learning_rate": 1.8958880139982502e-05, "loss": 1.9275, "step": 144 }, { "epoch": 0.06272134959041878, "grad_norm": 9.555045127868652, "learning_rate": 1.89501312335958e-05, "loss": 1.9977, "step": 145 }, { "epoch": 0.06315391062207683, "grad_norm": 9.367684364318848, "learning_rate": 1.89413823272091e-05, "loss": 1.9813, "step": 146 }, { "epoch": 0.06358647165373489, "grad_norm": 9.195287704467773, "learning_rate": 1.89326334208224e-05, "loss": 1.982, "step": 147 }, { "epoch": 0.06401903268539295, "grad_norm": 11.219182014465332, "learning_rate": 1.89238845144357e-05, "loss": 2.0398, "step": 148 }, { "epoch": 0.06445159371705102, "grad_norm": 10.429877281188965, "learning_rate": 1.8915135608048995e-05, "loss": 1.9312, "step": 149 }, { "epoch": 0.06488415474870908, "grad_norm": 10.788617134094238, "learning_rate": 1.8906386701662294e-05, "loss": 1.9711, "step": 150 }, { "epoch": 0.06531671578036713, "grad_norm": 11.090737342834473, "learning_rate": 1.8897637795275593e-05, "loss": 2.0233, "step": 151 }, { "epoch": 0.06574927681202519, "grad_norm": 10.588007926940918, "learning_rate": 1.888888888888889e-05, "loss": 2.0727, "step": 152 }, { "epoch": 0.06618183784368326, "grad_norm": 10.38557243347168, "learning_rate": 1.888013998250219e-05, "loss": 1.9655, "step": 153 }, { "epoch": 0.06661439887534132, "grad_norm": 9.603656768798828, "learning_rate": 1.8871391076115488e-05, "loss": 1.9975, "step": 154 }, { "epoch": 0.06704695990699938, "grad_norm": 9.86426830291748, "learning_rate": 1.8862642169728787e-05, "loss": 1.9229, "step": 155 }, { "epoch": 0.06747952093865744, "grad_norm": 9.31710147857666, "learning_rate": 1.8853893263342086e-05, "loss": 1.9682, "step": 156 }, { "epoch": 0.0679120819703155, "grad_norm": 10.599958419799805, "learning_rate": 1.8845144356955382e-05, "loss": 1.9162, "step": 157 }, { "epoch": 0.06834464300197356, "grad_norm": 9.712906837463379, "learning_rate": 1.883639545056868e-05, "loss": 1.9684, "step": 158 }, { "epoch": 0.06877720403363162, "grad_norm": 10.723039627075195, "learning_rate": 1.882764654418198e-05, "loss": 1.9253, "step": 159 }, { "epoch": 0.06920976506528968, "grad_norm": 9.580291748046875, "learning_rate": 1.8818897637795276e-05, "loss": 2.0012, "step": 160 }, { "epoch": 0.06964232609694775, "grad_norm": 10.511956214904785, "learning_rate": 1.8810148731408575e-05, "loss": 1.9845, "step": 161 }, { "epoch": 0.0700748871286058, "grad_norm": 11.413307189941406, "learning_rate": 1.8801399825021874e-05, "loss": 1.9733, "step": 162 }, { "epoch": 0.07050744816026386, "grad_norm": 9.986128807067871, "learning_rate": 1.879265091863517e-05, "loss": 1.8991, "step": 163 }, { "epoch": 0.07094000919192192, "grad_norm": 10.067076683044434, "learning_rate": 1.878390201224847e-05, "loss": 1.9415, "step": 164 }, { "epoch": 0.07137257022357998, "grad_norm": 10.817390441894531, "learning_rate": 1.877515310586177e-05, "loss": 1.965, "step": 165 }, { "epoch": 0.07180513125523805, "grad_norm": 10.907906532287598, "learning_rate": 1.8766404199475068e-05, "loss": 1.8497, "step": 166 }, { "epoch": 0.07223769228689611, "grad_norm": 12.694193840026855, "learning_rate": 1.8757655293088367e-05, "loss": 1.8588, "step": 167 }, { "epoch": 0.07267025331855416, "grad_norm": 11.461287498474121, "learning_rate": 1.8748906386701663e-05, "loss": 1.9708, "step": 168 }, { "epoch": 0.07310281435021222, "grad_norm": 12.703743934631348, "learning_rate": 1.8740157480314962e-05, "loss": 1.9838, "step": 169 }, { "epoch": 0.07353537538187029, "grad_norm": 9.77420425415039, "learning_rate": 1.873140857392826e-05, "loss": 1.9551, "step": 170 }, { "epoch": 0.07396793641352835, "grad_norm": 11.191120147705078, "learning_rate": 1.8722659667541557e-05, "loss": 2.001, "step": 171 }, { "epoch": 0.07440049744518641, "grad_norm": 9.07374095916748, "learning_rate": 1.8713910761154856e-05, "loss": 1.874, "step": 172 }, { "epoch": 0.07483305847684446, "grad_norm": 10.750893592834473, "learning_rate": 1.8705161854768156e-05, "loss": 1.8673, "step": 173 }, { "epoch": 0.07526561950850252, "grad_norm": 10.637674331665039, "learning_rate": 1.8696412948381455e-05, "loss": 1.9057, "step": 174 }, { "epoch": 0.07569818054016059, "grad_norm": 11.109024047851562, "learning_rate": 1.8687664041994754e-05, "loss": 1.9262, "step": 175 }, { "epoch": 0.07613074157181865, "grad_norm": 10.31051254272461, "learning_rate": 1.867891513560805e-05, "loss": 1.9316, "step": 176 }, { "epoch": 0.07656330260347671, "grad_norm": 10.382652282714844, "learning_rate": 1.867016622922135e-05, "loss": 1.8514, "step": 177 }, { "epoch": 0.07699586363513478, "grad_norm": 11.28693962097168, "learning_rate": 1.8661417322834648e-05, "loss": 1.9258, "step": 178 }, { "epoch": 0.07742842466679282, "grad_norm": 9.876675605773926, "learning_rate": 1.8652668416447944e-05, "loss": 1.935, "step": 179 }, { "epoch": 0.07786098569845089, "grad_norm": 10.760346412658691, "learning_rate": 1.8643919510061243e-05, "loss": 1.8479, "step": 180 }, { "epoch": 0.07829354673010895, "grad_norm": 10.346819877624512, "learning_rate": 1.8635170603674542e-05, "loss": 1.8821, "step": 181 }, { "epoch": 0.07872610776176701, "grad_norm": 10.750500679016113, "learning_rate": 1.8626421697287838e-05, "loss": 1.8996, "step": 182 }, { "epoch": 0.07915866879342508, "grad_norm": 11.348615646362305, "learning_rate": 1.8617672790901137e-05, "loss": 1.8542, "step": 183 }, { "epoch": 0.07959122982508313, "grad_norm": 10.184338569641113, "learning_rate": 1.8608923884514437e-05, "loss": 1.9138, "step": 184 }, { "epoch": 0.08002379085674119, "grad_norm": 12.307705879211426, "learning_rate": 1.8600174978127736e-05, "loss": 1.8653, "step": 185 }, { "epoch": 0.08045635188839925, "grad_norm": 9.707239151000977, "learning_rate": 1.8591426071741035e-05, "loss": 1.9195, "step": 186 }, { "epoch": 0.08088891292005732, "grad_norm": 10.568989753723145, "learning_rate": 1.858267716535433e-05, "loss": 1.8921, "step": 187 }, { "epoch": 0.08132147395171538, "grad_norm": 10.401839256286621, "learning_rate": 1.857392825896763e-05, "loss": 1.9463, "step": 188 }, { "epoch": 0.08175403498337344, "grad_norm": 11.29499626159668, "learning_rate": 1.856517935258093e-05, "loss": 1.8881, "step": 189 }, { "epoch": 0.08218659601503149, "grad_norm": 12.853240013122559, "learning_rate": 1.855643044619423e-05, "loss": 1.8063, "step": 190 }, { "epoch": 0.08261915704668955, "grad_norm": 12.939081192016602, "learning_rate": 1.8547681539807524e-05, "loss": 1.8993, "step": 191 }, { "epoch": 0.08305171807834762, "grad_norm": 11.397017478942871, "learning_rate": 1.8538932633420824e-05, "loss": 1.9577, "step": 192 }, { "epoch": 0.08348427911000568, "grad_norm": 10.588729858398438, "learning_rate": 1.8530183727034123e-05, "loss": 1.9725, "step": 193 }, { "epoch": 0.08391684014166374, "grad_norm": 10.367941856384277, "learning_rate": 1.8521434820647422e-05, "loss": 1.9014, "step": 194 }, { "epoch": 0.08434940117332179, "grad_norm": 10.358473777770996, "learning_rate": 1.851268591426072e-05, "loss": 1.9505, "step": 195 }, { "epoch": 0.08478196220497985, "grad_norm": 10.932351112365723, "learning_rate": 1.8503937007874017e-05, "loss": 1.9628, "step": 196 }, { "epoch": 0.08521452323663792, "grad_norm": 10.701963424682617, "learning_rate": 1.8495188101487316e-05, "loss": 1.8751, "step": 197 }, { "epoch": 0.08564708426829598, "grad_norm": 12.282137870788574, "learning_rate": 1.8486439195100615e-05, "loss": 1.7744, "step": 198 }, { "epoch": 0.08607964529995404, "grad_norm": 11.679487228393555, "learning_rate": 1.847769028871391e-05, "loss": 1.8378, "step": 199 }, { "epoch": 0.08651220633161211, "grad_norm": 15.917386054992676, "learning_rate": 1.846894138232721e-05, "loss": 1.8038, "step": 200 }, { "epoch": 0.08694476736327016, "grad_norm": 13.143421173095703, "learning_rate": 1.846019247594051e-05, "loss": 1.9285, "step": 201 }, { "epoch": 0.08737732839492822, "grad_norm": 12.354574203491211, "learning_rate": 1.8451443569553805e-05, "loss": 1.7922, "step": 202 }, { "epoch": 0.08780988942658628, "grad_norm": 13.790152549743652, "learning_rate": 1.8442694663167108e-05, "loss": 1.9562, "step": 203 }, { "epoch": 0.08824245045824435, "grad_norm": 11.429886817932129, "learning_rate": 1.8433945756780404e-05, "loss": 1.7733, "step": 204 }, { "epoch": 0.08867501148990241, "grad_norm": 11.52943229675293, "learning_rate": 1.8425196850393703e-05, "loss": 1.8627, "step": 205 }, { "epoch": 0.08910757252156046, "grad_norm": 11.378132820129395, "learning_rate": 1.8416447944007002e-05, "loss": 1.7967, "step": 206 }, { "epoch": 0.08954013355321852, "grad_norm": 10.589323997497559, "learning_rate": 1.8407699037620298e-05, "loss": 1.9194, "step": 207 }, { "epoch": 0.08997269458487658, "grad_norm": 10.294939994812012, "learning_rate": 1.8398950131233597e-05, "loss": 1.8982, "step": 208 }, { "epoch": 0.09040525561653465, "grad_norm": 10.35794448852539, "learning_rate": 1.8390201224846896e-05, "loss": 1.9172, "step": 209 }, { "epoch": 0.09083781664819271, "grad_norm": 11.790461540222168, "learning_rate": 1.8381452318460192e-05, "loss": 1.8459, "step": 210 }, { "epoch": 0.09127037767985077, "grad_norm": 12.333897590637207, "learning_rate": 1.837270341207349e-05, "loss": 1.7778, "step": 211 }, { "epoch": 0.09170293871150882, "grad_norm": 12.425847053527832, "learning_rate": 1.836395450568679e-05, "loss": 1.8701, "step": 212 }, { "epoch": 0.09213549974316688, "grad_norm": 11.710013389587402, "learning_rate": 1.835520559930009e-05, "loss": 1.7047, "step": 213 }, { "epoch": 0.09256806077482495, "grad_norm": 11.748705863952637, "learning_rate": 1.834645669291339e-05, "loss": 1.8291, "step": 214 }, { "epoch": 0.09300062180648301, "grad_norm": 11.944961547851562, "learning_rate": 1.8337707786526685e-05, "loss": 1.8243, "step": 215 }, { "epoch": 0.09343318283814107, "grad_norm": 11.755900382995605, "learning_rate": 1.8328958880139984e-05, "loss": 1.883, "step": 216 }, { "epoch": 0.09386574386979912, "grad_norm": 12.01804256439209, "learning_rate": 1.8320209973753283e-05, "loss": 1.8377, "step": 217 }, { "epoch": 0.09429830490145719, "grad_norm": 11.53456974029541, "learning_rate": 1.831146106736658e-05, "loss": 1.7949, "step": 218 }, { "epoch": 0.09473086593311525, "grad_norm": 10.804949760437012, "learning_rate": 1.830271216097988e-05, "loss": 1.8741, "step": 219 }, { "epoch": 0.09516342696477331, "grad_norm": 11.649739265441895, "learning_rate": 1.8293963254593178e-05, "loss": 1.8424, "step": 220 }, { "epoch": 0.09559598799643138, "grad_norm": 11.372157096862793, "learning_rate": 1.8285214348206473e-05, "loss": 1.7853, "step": 221 }, { "epoch": 0.09602854902808944, "grad_norm": 10.01382064819336, "learning_rate": 1.8276465441819776e-05, "loss": 1.8669, "step": 222 }, { "epoch": 0.09646111005974749, "grad_norm": 11.315641403198242, "learning_rate": 1.8267716535433072e-05, "loss": 1.7636, "step": 223 }, { "epoch": 0.09689367109140555, "grad_norm": 12.253576278686523, "learning_rate": 1.825896762904637e-05, "loss": 1.8279, "step": 224 }, { "epoch": 0.09732623212306361, "grad_norm": 11.940933227539062, "learning_rate": 1.825021872265967e-05, "loss": 1.9796, "step": 225 }, { "epoch": 0.09775879315472168, "grad_norm": 11.986372947692871, "learning_rate": 1.8241469816272966e-05, "loss": 1.8362, "step": 226 }, { "epoch": 0.09819135418637974, "grad_norm": 12.15494155883789, "learning_rate": 1.8232720909886265e-05, "loss": 1.8801, "step": 227 }, { "epoch": 0.09862391521803779, "grad_norm": 11.447915077209473, "learning_rate": 1.8223972003499564e-05, "loss": 1.8247, "step": 228 }, { "epoch": 0.09905647624969585, "grad_norm": 11.581623077392578, "learning_rate": 1.821522309711286e-05, "loss": 1.8369, "step": 229 }, { "epoch": 0.09948903728135391, "grad_norm": 11.14620590209961, "learning_rate": 1.820647419072616e-05, "loss": 1.8653, "step": 230 }, { "epoch": 0.09992159831301198, "grad_norm": 12.83015251159668, "learning_rate": 1.819772528433946e-05, "loss": 1.8845, "step": 231 }, { "epoch": 0.10035415934467004, "grad_norm": 11.10083293914795, "learning_rate": 1.8188976377952758e-05, "loss": 1.837, "step": 232 }, { "epoch": 0.1007867203763281, "grad_norm": 12.611995697021484, "learning_rate": 1.8180227471566057e-05, "loss": 1.8799, "step": 233 }, { "epoch": 0.10121928140798615, "grad_norm": 14.688155174255371, "learning_rate": 1.8171478565179353e-05, "loss": 1.9175, "step": 234 }, { "epoch": 0.10165184243964422, "grad_norm": 11.024300575256348, "learning_rate": 1.8162729658792652e-05, "loss": 1.7136, "step": 235 }, { "epoch": 0.10208440347130228, "grad_norm": 14.889878273010254, "learning_rate": 1.815398075240595e-05, "loss": 1.7828, "step": 236 }, { "epoch": 0.10251696450296034, "grad_norm": 11.804996490478516, "learning_rate": 1.814523184601925e-05, "loss": 1.8391, "step": 237 }, { "epoch": 0.1029495255346184, "grad_norm": 11.816452026367188, "learning_rate": 1.8136482939632546e-05, "loss": 1.8498, "step": 238 }, { "epoch": 0.10338208656627645, "grad_norm": 13.521631240844727, "learning_rate": 1.8127734033245846e-05, "loss": 1.7968, "step": 239 }, { "epoch": 0.10381464759793452, "grad_norm": 11.820068359375, "learning_rate": 1.8118985126859145e-05, "loss": 1.8115, "step": 240 }, { "epoch": 0.10424720862959258, "grad_norm": 14.849625587463379, "learning_rate": 1.811023622047244e-05, "loss": 1.7471, "step": 241 }, { "epoch": 0.10467976966125064, "grad_norm": 11.63190746307373, "learning_rate": 1.8101487314085743e-05, "loss": 1.7383, "step": 242 }, { "epoch": 0.1051123306929087, "grad_norm": 11.374736785888672, "learning_rate": 1.809273840769904e-05, "loss": 1.8412, "step": 243 }, { "epoch": 0.10554489172456677, "grad_norm": 14.540604591369629, "learning_rate": 1.8083989501312338e-05, "loss": 1.7489, "step": 244 }, { "epoch": 0.10597745275622482, "grad_norm": 12.734722137451172, "learning_rate": 1.8075240594925637e-05, "loss": 1.8997, "step": 245 }, { "epoch": 0.10641001378788288, "grad_norm": 13.632735252380371, "learning_rate": 1.8066491688538933e-05, "loss": 1.8283, "step": 246 }, { "epoch": 0.10684257481954094, "grad_norm": 13.188791275024414, "learning_rate": 1.8057742782152232e-05, "loss": 1.7936, "step": 247 }, { "epoch": 0.10727513585119901, "grad_norm": 12.36187744140625, "learning_rate": 1.804899387576553e-05, "loss": 1.7985, "step": 248 }, { "epoch": 0.10770769688285707, "grad_norm": 13.20405101776123, "learning_rate": 1.8040244969378827e-05, "loss": 1.7463, "step": 249 }, { "epoch": 0.10814025791451512, "grad_norm": 12.463266372680664, "learning_rate": 1.8031496062992127e-05, "loss": 1.7374, "step": 250 }, { "epoch": 0.10857281894617318, "grad_norm": 13.607605934143066, "learning_rate": 1.8022747156605426e-05, "loss": 1.7598, "step": 251 }, { "epoch": 0.10900537997783125, "grad_norm": 13.48353099822998, "learning_rate": 1.8013998250218725e-05, "loss": 1.7661, "step": 252 }, { "epoch": 0.10943794100948931, "grad_norm": 13.995173454284668, "learning_rate": 1.8005249343832024e-05, "loss": 1.7972, "step": 253 }, { "epoch": 0.10987050204114737, "grad_norm": 14.007890701293945, "learning_rate": 1.799650043744532e-05, "loss": 1.7641, "step": 254 }, { "epoch": 0.11030306307280544, "grad_norm": 16.224395751953125, "learning_rate": 1.798775153105862e-05, "loss": 1.8158, "step": 255 }, { "epoch": 0.11073562410446348, "grad_norm": 15.336459159851074, "learning_rate": 1.797900262467192e-05, "loss": 1.7918, "step": 256 }, { "epoch": 0.11116818513612155, "grad_norm": 14.77893352508545, "learning_rate": 1.7970253718285214e-05, "loss": 1.8369, "step": 257 }, { "epoch": 0.11160074616777961, "grad_norm": 12.808744430541992, "learning_rate": 1.7961504811898514e-05, "loss": 1.7846, "step": 258 }, { "epoch": 0.11203330719943767, "grad_norm": 11.46886920928955, "learning_rate": 1.7952755905511813e-05, "loss": 1.8555, "step": 259 }, { "epoch": 0.11246586823109574, "grad_norm": 13.576665878295898, "learning_rate": 1.794400699912511e-05, "loss": 1.8384, "step": 260 }, { "epoch": 0.11289842926275379, "grad_norm": 13.28268814086914, "learning_rate": 1.793525809273841e-05, "loss": 1.809, "step": 261 }, { "epoch": 0.11333099029441185, "grad_norm": 11.743298530578613, "learning_rate": 1.7926509186351707e-05, "loss": 1.7515, "step": 262 }, { "epoch": 0.11376355132606991, "grad_norm": 12.474933624267578, "learning_rate": 1.7917760279965006e-05, "loss": 1.7624, "step": 263 }, { "epoch": 0.11419611235772797, "grad_norm": 13.138879776000977, "learning_rate": 1.7909011373578305e-05, "loss": 1.8352, "step": 264 }, { "epoch": 0.11462867338938604, "grad_norm": 11.728463172912598, "learning_rate": 1.79002624671916e-05, "loss": 1.8015, "step": 265 }, { "epoch": 0.1150612344210441, "grad_norm": 11.462135314941406, "learning_rate": 1.78915135608049e-05, "loss": 1.7868, "step": 266 }, { "epoch": 0.11549379545270215, "grad_norm": 12.589030265808105, "learning_rate": 1.78827646544182e-05, "loss": 1.6825, "step": 267 }, { "epoch": 0.11592635648436021, "grad_norm": 14.064338684082031, "learning_rate": 1.7874015748031495e-05, "loss": 1.7202, "step": 268 }, { "epoch": 0.11635891751601828, "grad_norm": 12.65044116973877, "learning_rate": 1.7865266841644795e-05, "loss": 1.7865, "step": 269 }, { "epoch": 0.11679147854767634, "grad_norm": 13.943512916564941, "learning_rate": 1.7856517935258094e-05, "loss": 1.7506, "step": 270 }, { "epoch": 0.1172240395793344, "grad_norm": 13.109914779663086, "learning_rate": 1.7847769028871393e-05, "loss": 1.7659, "step": 271 }, { "epoch": 0.11765660061099245, "grad_norm": 12.937542915344238, "learning_rate": 1.7839020122484692e-05, "loss": 1.8309, "step": 272 }, { "epoch": 0.11808916164265051, "grad_norm": 12.564200401306152, "learning_rate": 1.7830271216097988e-05, "loss": 1.8121, "step": 273 }, { "epoch": 0.11852172267430858, "grad_norm": 12.3132905960083, "learning_rate": 1.7821522309711287e-05, "loss": 1.8254, "step": 274 }, { "epoch": 0.11895428370596664, "grad_norm": 10.902737617492676, "learning_rate": 1.7812773403324587e-05, "loss": 1.7647, "step": 275 }, { "epoch": 0.1193868447376247, "grad_norm": 11.777158737182617, "learning_rate": 1.7804024496937882e-05, "loss": 1.776, "step": 276 }, { "epoch": 0.11981940576928277, "grad_norm": 13.244769096374512, "learning_rate": 1.779527559055118e-05, "loss": 1.7909, "step": 277 }, { "epoch": 0.12025196680094082, "grad_norm": 13.334715843200684, "learning_rate": 1.778652668416448e-05, "loss": 1.7326, "step": 278 }, { "epoch": 0.12068452783259888, "grad_norm": 11.51339340209961, "learning_rate": 1.7777777777777777e-05, "loss": 1.7133, "step": 279 }, { "epoch": 0.12111708886425694, "grad_norm": 13.407567024230957, "learning_rate": 1.776902887139108e-05, "loss": 1.8828, "step": 280 }, { "epoch": 0.121549649895915, "grad_norm": 10.572535514831543, "learning_rate": 1.7760279965004375e-05, "loss": 1.8534, "step": 281 }, { "epoch": 0.12198221092757307, "grad_norm": 12.543910026550293, "learning_rate": 1.7751531058617674e-05, "loss": 1.7876, "step": 282 }, { "epoch": 0.12241477195923112, "grad_norm": 12.414093017578125, "learning_rate": 1.7742782152230973e-05, "loss": 1.7249, "step": 283 }, { "epoch": 0.12284733299088918, "grad_norm": 12.800058364868164, "learning_rate": 1.7734033245844273e-05, "loss": 1.8623, "step": 284 }, { "epoch": 0.12327989402254724, "grad_norm": 14.119792938232422, "learning_rate": 1.772528433945757e-05, "loss": 1.8873, "step": 285 }, { "epoch": 0.1237124550542053, "grad_norm": 15.86700439453125, "learning_rate": 1.7716535433070868e-05, "loss": 1.6621, "step": 286 }, { "epoch": 0.12414501608586337, "grad_norm": 12.095022201538086, "learning_rate": 1.7707786526684167e-05, "loss": 1.8166, "step": 287 }, { "epoch": 0.12457757711752143, "grad_norm": 14.493571281433105, "learning_rate": 1.7699037620297463e-05, "loss": 1.9194, "step": 288 }, { "epoch": 0.1250101381491795, "grad_norm": 12.202247619628906, "learning_rate": 1.7690288713910762e-05, "loss": 1.7355, "step": 289 }, { "epoch": 0.12544269918083756, "grad_norm": 13.065779685974121, "learning_rate": 1.768153980752406e-05, "loss": 1.7089, "step": 290 }, { "epoch": 0.1258752602124956, "grad_norm": 11.993766784667969, "learning_rate": 1.767279090113736e-05, "loss": 1.7966, "step": 291 }, { "epoch": 0.12630782124415366, "grad_norm": 12.460329055786133, "learning_rate": 1.766404199475066e-05, "loss": 1.8666, "step": 292 }, { "epoch": 0.12674038227581172, "grad_norm": 12.044783592224121, "learning_rate": 1.7655293088363955e-05, "loss": 1.7071, "step": 293 }, { "epoch": 0.12717294330746978, "grad_norm": 14.959409713745117, "learning_rate": 1.7646544181977255e-05, "loss": 1.8463, "step": 294 }, { "epoch": 0.12760550433912785, "grad_norm": 15.096085548400879, "learning_rate": 1.7637795275590554e-05, "loss": 1.8419, "step": 295 }, { "epoch": 0.1280380653707859, "grad_norm": 12.444029808044434, "learning_rate": 1.762904636920385e-05, "loss": 1.7926, "step": 296 }, { "epoch": 0.12847062640244397, "grad_norm": 11.794731140136719, "learning_rate": 1.762029746281715e-05, "loss": 1.7099, "step": 297 }, { "epoch": 0.12890318743410203, "grad_norm": 12.06802749633789, "learning_rate": 1.7611548556430448e-05, "loss": 1.7576, "step": 298 }, { "epoch": 0.1293357484657601, "grad_norm": 13.183172225952148, "learning_rate": 1.7602799650043747e-05, "loss": 1.7683, "step": 299 }, { "epoch": 0.12976830949741816, "grad_norm": 12.662933349609375, "learning_rate": 1.7594050743657046e-05, "loss": 1.7571, "step": 300 }, { "epoch": 0.13020087052907622, "grad_norm": 13.42810344696045, "learning_rate": 1.7585301837270342e-05, "loss": 1.8103, "step": 301 }, { "epoch": 0.13063343156073426, "grad_norm": 12.349446296691895, "learning_rate": 1.757655293088364e-05, "loss": 1.8039, "step": 302 }, { "epoch": 0.13106599259239232, "grad_norm": 12.955143928527832, "learning_rate": 1.756780402449694e-05, "loss": 1.7638, "step": 303 }, { "epoch": 0.13149855362405038, "grad_norm": 12.840434074401855, "learning_rate": 1.7559055118110236e-05, "loss": 1.6824, "step": 304 }, { "epoch": 0.13193111465570845, "grad_norm": 12.311904907226562, "learning_rate": 1.7550306211723536e-05, "loss": 1.7549, "step": 305 }, { "epoch": 0.1323636756873665, "grad_norm": 14.21117877960205, "learning_rate": 1.7541557305336835e-05, "loss": 1.8167, "step": 306 }, { "epoch": 0.13279623671902457, "grad_norm": 15.326268196105957, "learning_rate": 1.753280839895013e-05, "loss": 1.7197, "step": 307 }, { "epoch": 0.13322879775068264, "grad_norm": 12.805957794189453, "learning_rate": 1.752405949256343e-05, "loss": 1.7673, "step": 308 }, { "epoch": 0.1336613587823407, "grad_norm": 14.023848533630371, "learning_rate": 1.751531058617673e-05, "loss": 1.684, "step": 309 }, { "epoch": 0.13409391981399876, "grad_norm": 14.051582336425781, "learning_rate": 1.7506561679790028e-05, "loss": 1.7981, "step": 310 }, { "epoch": 0.13452648084565683, "grad_norm": 14.332208633422852, "learning_rate": 1.7497812773403328e-05, "loss": 1.7263, "step": 311 }, { "epoch": 0.1349590418773149, "grad_norm": 13.481989860534668, "learning_rate": 1.7489063867016623e-05, "loss": 1.844, "step": 312 }, { "epoch": 0.13539160290897292, "grad_norm": 13.109614372253418, "learning_rate": 1.7480314960629923e-05, "loss": 1.7525, "step": 313 }, { "epoch": 0.135824163940631, "grad_norm": 12.776349067687988, "learning_rate": 1.7471566054243222e-05, "loss": 1.8072, "step": 314 }, { "epoch": 0.13625672497228905, "grad_norm": 12.574403762817383, "learning_rate": 1.7462817147856518e-05, "loss": 1.7611, "step": 315 }, { "epoch": 0.1366892860039471, "grad_norm": 13.381563186645508, "learning_rate": 1.7454068241469817e-05, "loss": 1.6969, "step": 316 }, { "epoch": 0.13712184703560518, "grad_norm": 12.077308654785156, "learning_rate": 1.7445319335083116e-05, "loss": 1.8818, "step": 317 }, { "epoch": 0.13755440806726324, "grad_norm": 12.719555854797363, "learning_rate": 1.7436570428696415e-05, "loss": 1.7705, "step": 318 }, { "epoch": 0.1379869690989213, "grad_norm": 14.66213607788086, "learning_rate": 1.7427821522309714e-05, "loss": 1.6998, "step": 319 }, { "epoch": 0.13841953013057937, "grad_norm": 13.779396057128906, "learning_rate": 1.741907261592301e-05, "loss": 1.7427, "step": 320 }, { "epoch": 0.13885209116223743, "grad_norm": 14.436192512512207, "learning_rate": 1.741032370953631e-05, "loss": 1.746, "step": 321 }, { "epoch": 0.1392846521938955, "grad_norm": 13.372818946838379, "learning_rate": 1.740157480314961e-05, "loss": 1.7303, "step": 322 }, { "epoch": 0.13971721322555355, "grad_norm": 12.75151538848877, "learning_rate": 1.7392825896762904e-05, "loss": 1.7359, "step": 323 }, { "epoch": 0.1401497742572116, "grad_norm": 14.832432746887207, "learning_rate": 1.7384076990376204e-05, "loss": 1.7595, "step": 324 }, { "epoch": 0.14058233528886965, "grad_norm": 12.097084045410156, "learning_rate": 1.7375328083989503e-05, "loss": 1.8151, "step": 325 }, { "epoch": 0.14101489632052772, "grad_norm": 15.27542781829834, "learning_rate": 1.73665791776028e-05, "loss": 1.7799, "step": 326 }, { "epoch": 0.14144745735218578, "grad_norm": 13.651289939880371, "learning_rate": 1.7357830271216098e-05, "loss": 1.788, "step": 327 }, { "epoch": 0.14188001838384384, "grad_norm": 12.324203491210938, "learning_rate": 1.7349081364829397e-05, "loss": 1.7426, "step": 328 }, { "epoch": 0.1423125794155019, "grad_norm": 13.443849563598633, "learning_rate": 1.7340332458442696e-05, "loss": 1.7648, "step": 329 }, { "epoch": 0.14274514044715997, "grad_norm": 15.6445894241333, "learning_rate": 1.7331583552055995e-05, "loss": 1.819, "step": 330 }, { "epoch": 0.14317770147881803, "grad_norm": 14.16102409362793, "learning_rate": 1.7322834645669295e-05, "loss": 1.7316, "step": 331 }, { "epoch": 0.1436102625104761, "grad_norm": 14.051115989685059, "learning_rate": 1.731408573928259e-05, "loss": 1.7395, "step": 332 }, { "epoch": 0.14404282354213416, "grad_norm": 13.74497127532959, "learning_rate": 1.730533683289589e-05, "loss": 1.7931, "step": 333 }, { "epoch": 0.14447538457379222, "grad_norm": 14.392535209655762, "learning_rate": 1.729658792650919e-05, "loss": 1.8631, "step": 334 }, { "epoch": 0.14490794560545026, "grad_norm": 12.427038192749023, "learning_rate": 1.7287839020122485e-05, "loss": 1.7007, "step": 335 }, { "epoch": 0.14534050663710832, "grad_norm": 13.627511024475098, "learning_rate": 1.7279090113735784e-05, "loss": 1.7324, "step": 336 }, { "epoch": 0.14577306766876638, "grad_norm": 13.441803932189941, "learning_rate": 1.7270341207349083e-05, "loss": 1.7494, "step": 337 }, { "epoch": 0.14620562870042444, "grad_norm": 15.683148384094238, "learning_rate": 1.7261592300962382e-05, "loss": 1.6773, "step": 338 }, { "epoch": 0.1466381897320825, "grad_norm": 14.39584732055664, "learning_rate": 1.725284339457568e-05, "loss": 1.6515, "step": 339 }, { "epoch": 0.14707075076374057, "grad_norm": 13.478503227233887, "learning_rate": 1.7244094488188977e-05, "loss": 1.7104, "step": 340 }, { "epoch": 0.14750331179539863, "grad_norm": 13.521794319152832, "learning_rate": 1.7235345581802277e-05, "loss": 1.8039, "step": 341 }, { "epoch": 0.1479358728270567, "grad_norm": 13.772107124328613, "learning_rate": 1.7226596675415576e-05, "loss": 1.7705, "step": 342 }, { "epoch": 0.14836843385871476, "grad_norm": 12.936812400817871, "learning_rate": 1.721784776902887e-05, "loss": 1.6975, "step": 343 }, { "epoch": 0.14880099489037282, "grad_norm": 13.462157249450684, "learning_rate": 1.720909886264217e-05, "loss": 1.7893, "step": 344 }, { "epoch": 0.14923355592203089, "grad_norm": 12.636610984802246, "learning_rate": 1.720034995625547e-05, "loss": 1.7366, "step": 345 }, { "epoch": 0.14966611695368892, "grad_norm": 12.809752464294434, "learning_rate": 1.7191601049868766e-05, "loss": 1.7415, "step": 346 }, { "epoch": 0.15009867798534698, "grad_norm": 13.976734161376953, "learning_rate": 1.718285214348207e-05, "loss": 1.735, "step": 347 }, { "epoch": 0.15053123901700505, "grad_norm": 12.980571746826172, "learning_rate": 1.7174103237095364e-05, "loss": 1.7759, "step": 348 }, { "epoch": 0.1509638000486631, "grad_norm": 15.27466106414795, "learning_rate": 1.7165354330708663e-05, "loss": 1.7244, "step": 349 }, { "epoch": 0.15139636108032117, "grad_norm": 14.315723419189453, "learning_rate": 1.7156605424321963e-05, "loss": 1.7684, "step": 350 }, { "epoch": 0.15182892211197924, "grad_norm": 13.90048599243164, "learning_rate": 1.714785651793526e-05, "loss": 1.7475, "step": 351 }, { "epoch": 0.1522614831436373, "grad_norm": 13.61722469329834, "learning_rate": 1.7139107611548558e-05, "loss": 1.6943, "step": 352 }, { "epoch": 0.15269404417529536, "grad_norm": 14.208736419677734, "learning_rate": 1.7130358705161857e-05, "loss": 1.7269, "step": 353 }, { "epoch": 0.15312660520695343, "grad_norm": 12.213624954223633, "learning_rate": 1.7121609798775153e-05, "loss": 1.7076, "step": 354 }, { "epoch": 0.1535591662386115, "grad_norm": 14.757974624633789, "learning_rate": 1.7112860892388452e-05, "loss": 1.714, "step": 355 }, { "epoch": 0.15399172727026955, "grad_norm": 13.660282135009766, "learning_rate": 1.710411198600175e-05, "loss": 1.7442, "step": 356 }, { "epoch": 0.1544242883019276, "grad_norm": 13.772233963012695, "learning_rate": 1.709536307961505e-05, "loss": 1.7667, "step": 357 }, { "epoch": 0.15485684933358565, "grad_norm": 14.523663520812988, "learning_rate": 1.708661417322835e-05, "loss": 1.6005, "step": 358 }, { "epoch": 0.1552894103652437, "grad_norm": 14.664237976074219, "learning_rate": 1.7077865266841645e-05, "loss": 1.6852, "step": 359 }, { "epoch": 0.15572197139690178, "grad_norm": 14.592924118041992, "learning_rate": 1.7069116360454945e-05, "loss": 1.7847, "step": 360 }, { "epoch": 0.15615453242855984, "grad_norm": 13.490509033203125, "learning_rate": 1.7060367454068244e-05, "loss": 1.7272, "step": 361 }, { "epoch": 0.1565870934602179, "grad_norm": 14.302976608276367, "learning_rate": 1.705161854768154e-05, "loss": 1.6654, "step": 362 }, { "epoch": 0.15701965449187597, "grad_norm": 13.850686073303223, "learning_rate": 1.704286964129484e-05, "loss": 1.6631, "step": 363 }, { "epoch": 0.15745221552353403, "grad_norm": 12.717830657958984, "learning_rate": 1.7034120734908138e-05, "loss": 1.7526, "step": 364 }, { "epoch": 0.1578847765551921, "grad_norm": 14.529204368591309, "learning_rate": 1.7025371828521434e-05, "loss": 1.6707, "step": 365 }, { "epoch": 0.15831733758685015, "grad_norm": 13.817510604858398, "learning_rate": 1.7016622922134736e-05, "loss": 1.7353, "step": 366 }, { "epoch": 0.15874989861850822, "grad_norm": 15.481966018676758, "learning_rate": 1.7007874015748032e-05, "loss": 1.6683, "step": 367 }, { "epoch": 0.15918245965016625, "grad_norm": 14.049205780029297, "learning_rate": 1.699912510936133e-05, "loss": 1.7273, "step": 368 }, { "epoch": 0.15961502068182432, "grad_norm": 14.65227222442627, "learning_rate": 1.699037620297463e-05, "loss": 1.6432, "step": 369 }, { "epoch": 0.16004758171348238, "grad_norm": 14.594015121459961, "learning_rate": 1.6981627296587927e-05, "loss": 1.8021, "step": 370 }, { "epoch": 0.16048014274514044, "grad_norm": 13.285860061645508, "learning_rate": 1.6972878390201226e-05, "loss": 1.6821, "step": 371 }, { "epoch": 0.1609127037767985, "grad_norm": 14.19520378112793, "learning_rate": 1.6964129483814525e-05, "loss": 1.6678, "step": 372 }, { "epoch": 0.16134526480845657, "grad_norm": 14.195395469665527, "learning_rate": 1.695538057742782e-05, "loss": 1.7482, "step": 373 }, { "epoch": 0.16177782584011463, "grad_norm": 14.017548561096191, "learning_rate": 1.694663167104112e-05, "loss": 1.7071, "step": 374 }, { "epoch": 0.1622103868717727, "grad_norm": 14.560656547546387, "learning_rate": 1.693788276465442e-05, "loss": 1.711, "step": 375 }, { "epoch": 0.16264294790343076, "grad_norm": 14.667351722717285, "learning_rate": 1.692913385826772e-05, "loss": 1.6147, "step": 376 }, { "epoch": 0.16307550893508882, "grad_norm": 15.273613929748535, "learning_rate": 1.6920384951881018e-05, "loss": 1.7753, "step": 377 }, { "epoch": 0.16350806996674688, "grad_norm": 15.08775806427002, "learning_rate": 1.6911636045494317e-05, "loss": 1.8437, "step": 378 }, { "epoch": 0.16394063099840492, "grad_norm": 16.35053825378418, "learning_rate": 1.6902887139107613e-05, "loss": 1.6818, "step": 379 }, { "epoch": 0.16437319203006298, "grad_norm": 14.675270080566406, "learning_rate": 1.6894138232720912e-05, "loss": 1.8182, "step": 380 }, { "epoch": 0.16480575306172104, "grad_norm": 12.950959205627441, "learning_rate": 1.688538932633421e-05, "loss": 1.6691, "step": 381 }, { "epoch": 0.1652383140933791, "grad_norm": 15.214534759521484, "learning_rate": 1.6876640419947507e-05, "loss": 1.7198, "step": 382 }, { "epoch": 0.16567087512503717, "grad_norm": 14.145734786987305, "learning_rate": 1.6867891513560806e-05, "loss": 1.8248, "step": 383 }, { "epoch": 0.16610343615669523, "grad_norm": 13.487668991088867, "learning_rate": 1.6859142607174105e-05, "loss": 1.7785, "step": 384 }, { "epoch": 0.1665359971883533, "grad_norm": 15.348024368286133, "learning_rate": 1.68503937007874e-05, "loss": 1.6533, "step": 385 }, { "epoch": 0.16696855822001136, "grad_norm": 13.304719924926758, "learning_rate": 1.6841644794400704e-05, "loss": 1.7462, "step": 386 }, { "epoch": 0.16740111925166942, "grad_norm": 15.1539888381958, "learning_rate": 1.6832895888014e-05, "loss": 1.7745, "step": 387 }, { "epoch": 0.16783368028332749, "grad_norm": 14.753242492675781, "learning_rate": 1.68241469816273e-05, "loss": 1.7696, "step": 388 }, { "epoch": 0.16826624131498555, "grad_norm": 14.071645736694336, "learning_rate": 1.6815398075240598e-05, "loss": 1.7495, "step": 389 }, { "epoch": 0.16869880234664358, "grad_norm": 13.17452335357666, "learning_rate": 1.6806649168853894e-05, "loss": 1.6654, "step": 390 }, { "epoch": 0.16913136337830165, "grad_norm": 13.739713668823242, "learning_rate": 1.6797900262467193e-05, "loss": 1.871, "step": 391 }, { "epoch": 0.1695639244099597, "grad_norm": 14.904608726501465, "learning_rate": 1.6789151356080492e-05, "loss": 1.7308, "step": 392 }, { "epoch": 0.16999648544161777, "grad_norm": 13.76181697845459, "learning_rate": 1.6780402449693788e-05, "loss": 1.7218, "step": 393 }, { "epoch": 0.17042904647327584, "grad_norm": 15.682334899902344, "learning_rate": 1.6771653543307087e-05, "loss": 1.7135, "step": 394 }, { "epoch": 0.1708616075049339, "grad_norm": 14.770147323608398, "learning_rate": 1.6762904636920386e-05, "loss": 1.6647, "step": 395 }, { "epoch": 0.17129416853659196, "grad_norm": 14.868924140930176, "learning_rate": 1.6754155730533686e-05, "loss": 1.8061, "step": 396 }, { "epoch": 0.17172672956825002, "grad_norm": 13.417051315307617, "learning_rate": 1.6745406824146985e-05, "loss": 1.7588, "step": 397 }, { "epoch": 0.1721592905999081, "grad_norm": 15.186949729919434, "learning_rate": 1.673665791776028e-05, "loss": 1.7662, "step": 398 }, { "epoch": 0.17259185163156615, "grad_norm": 14.485387802124023, "learning_rate": 1.672790901137358e-05, "loss": 1.7135, "step": 399 }, { "epoch": 0.17302441266322421, "grad_norm": 14.660444259643555, "learning_rate": 1.671916010498688e-05, "loss": 1.6872, "step": 400 }, { "epoch": 0.17345697369488225, "grad_norm": 16.875911712646484, "learning_rate": 1.6710411198600175e-05, "loss": 1.6135, "step": 401 }, { "epoch": 0.1738895347265403, "grad_norm": 15.121524810791016, "learning_rate": 1.6701662292213474e-05, "loss": 1.6156, "step": 402 }, { "epoch": 0.17432209575819838, "grad_norm": 14.13774585723877, "learning_rate": 1.6692913385826773e-05, "loss": 1.7311, "step": 403 }, { "epoch": 0.17475465678985644, "grad_norm": 15.118247032165527, "learning_rate": 1.668416447944007e-05, "loss": 1.7038, "step": 404 }, { "epoch": 0.1751872178215145, "grad_norm": 17.146011352539062, "learning_rate": 1.667541557305337e-05, "loss": 1.6302, "step": 405 }, { "epoch": 0.17561977885317256, "grad_norm": 13.198725700378418, "learning_rate": 1.6666666666666667e-05, "loss": 1.7395, "step": 406 }, { "epoch": 0.17605233988483063, "grad_norm": 14.39787483215332, "learning_rate": 1.6657917760279967e-05, "loss": 1.7715, "step": 407 }, { "epoch": 0.1764849009164887, "grad_norm": 14.981849670410156, "learning_rate": 1.6649168853893266e-05, "loss": 1.6916, "step": 408 }, { "epoch": 0.17691746194814675, "grad_norm": 14.908326148986816, "learning_rate": 1.6640419947506562e-05, "loss": 1.7563, "step": 409 }, { "epoch": 0.17735002297980482, "grad_norm": 14.553082466125488, "learning_rate": 1.663167104111986e-05, "loss": 1.7202, "step": 410 }, { "epoch": 0.17778258401146288, "grad_norm": 13.793340682983398, "learning_rate": 1.662292213473316e-05, "loss": 1.7678, "step": 411 }, { "epoch": 0.17821514504312091, "grad_norm": 16.31795310974121, "learning_rate": 1.6614173228346456e-05, "loss": 1.8089, "step": 412 }, { "epoch": 0.17864770607477898, "grad_norm": 17.318607330322266, "learning_rate": 1.6605424321959755e-05, "loss": 1.6696, "step": 413 }, { "epoch": 0.17908026710643704, "grad_norm": 13.9689302444458, "learning_rate": 1.6596675415573054e-05, "loss": 1.7372, "step": 414 }, { "epoch": 0.1795128281380951, "grad_norm": 14.510354995727539, "learning_rate": 1.6587926509186354e-05, "loss": 1.6597, "step": 415 }, { "epoch": 0.17994538916975317, "grad_norm": 15.901954650878906, "learning_rate": 1.6579177602799653e-05, "loss": 1.7872, "step": 416 }, { "epoch": 0.18037795020141123, "grad_norm": 13.021835327148438, "learning_rate": 1.657042869641295e-05, "loss": 1.6739, "step": 417 }, { "epoch": 0.1808105112330693, "grad_norm": 14.94437313079834, "learning_rate": 1.6561679790026248e-05, "loss": 1.6915, "step": 418 }, { "epoch": 0.18124307226472736, "grad_norm": 14.61279296875, "learning_rate": 1.6552930883639547e-05, "loss": 1.5963, "step": 419 }, { "epoch": 0.18167563329638542, "grad_norm": 16.396753311157227, "learning_rate": 1.6544181977252843e-05, "loss": 1.702, "step": 420 }, { "epoch": 0.18210819432804348, "grad_norm": 16.830820083618164, "learning_rate": 1.6535433070866142e-05, "loss": 1.8108, "step": 421 }, { "epoch": 0.18254075535970155, "grad_norm": 15.551237106323242, "learning_rate": 1.652668416447944e-05, "loss": 1.7831, "step": 422 }, { "epoch": 0.18297331639135958, "grad_norm": 15.525558471679688, "learning_rate": 1.651793525809274e-05, "loss": 1.6718, "step": 423 }, { "epoch": 0.18340587742301764, "grad_norm": 12.82323169708252, "learning_rate": 1.650918635170604e-05, "loss": 1.642, "step": 424 }, { "epoch": 0.1838384384546757, "grad_norm": 15.793924331665039, "learning_rate": 1.650043744531934e-05, "loss": 1.6871, "step": 425 }, { "epoch": 0.18427099948633377, "grad_norm": 14.773061752319336, "learning_rate": 1.6491688538932635e-05, "loss": 1.6956, "step": 426 }, { "epoch": 0.18470356051799183, "grad_norm": 16.91400909423828, "learning_rate": 1.6482939632545934e-05, "loss": 1.6336, "step": 427 }, { "epoch": 0.1851361215496499, "grad_norm": 15.226611137390137, "learning_rate": 1.6474190726159233e-05, "loss": 1.7333, "step": 428 }, { "epoch": 0.18556868258130796, "grad_norm": 16.962453842163086, "learning_rate": 1.646544181977253e-05, "loss": 1.6063, "step": 429 }, { "epoch": 0.18600124361296602, "grad_norm": 16.48338508605957, "learning_rate": 1.6456692913385828e-05, "loss": 1.7043, "step": 430 }, { "epoch": 0.18643380464462408, "grad_norm": 14.127233505249023, "learning_rate": 1.6447944006999127e-05, "loss": 1.709, "step": 431 }, { "epoch": 0.18686636567628215, "grad_norm": 15.949320793151855, "learning_rate": 1.6439195100612423e-05, "loss": 1.7161, "step": 432 }, { "epoch": 0.1872989267079402, "grad_norm": 15.113992691040039, "learning_rate": 1.6430446194225722e-05, "loss": 1.7378, "step": 433 }, { "epoch": 0.18773148773959825, "grad_norm": 15.371466636657715, "learning_rate": 1.642169728783902e-05, "loss": 1.6978, "step": 434 }, { "epoch": 0.1881640487712563, "grad_norm": 15.309712409973145, "learning_rate": 1.641294838145232e-05, "loss": 1.7107, "step": 435 }, { "epoch": 0.18859660980291437, "grad_norm": 14.199424743652344, "learning_rate": 1.640419947506562e-05, "loss": 1.6976, "step": 436 }, { "epoch": 0.18902917083457244, "grad_norm": 16.60711097717285, "learning_rate": 1.6395450568678916e-05, "loss": 1.5963, "step": 437 }, { "epoch": 0.1894617318662305, "grad_norm": 14.320137023925781, "learning_rate": 1.6386701662292215e-05, "loss": 1.7274, "step": 438 }, { "epoch": 0.18989429289788856, "grad_norm": 15.285809516906738, "learning_rate": 1.6377952755905514e-05, "loss": 1.7584, "step": 439 }, { "epoch": 0.19032685392954662, "grad_norm": 15.035262107849121, "learning_rate": 1.636920384951881e-05, "loss": 1.6926, "step": 440 }, { "epoch": 0.1907594149612047, "grad_norm": 15.250046730041504, "learning_rate": 1.636045494313211e-05, "loss": 1.7165, "step": 441 }, { "epoch": 0.19119197599286275, "grad_norm": 17.97435188293457, "learning_rate": 1.635170603674541e-05, "loss": 1.8039, "step": 442 }, { "epoch": 0.1916245370245208, "grad_norm": 13.659225463867188, "learning_rate": 1.6342957130358708e-05, "loss": 1.6173, "step": 443 }, { "epoch": 0.19205709805617888, "grad_norm": 15.350072860717773, "learning_rate": 1.6334208223972007e-05, "loss": 1.6974, "step": 444 }, { "epoch": 0.1924896590878369, "grad_norm": 16.155649185180664, "learning_rate": 1.6325459317585303e-05, "loss": 1.6537, "step": 445 }, { "epoch": 0.19292222011949497, "grad_norm": 14.439220428466797, "learning_rate": 1.6316710411198602e-05, "loss": 1.7225, "step": 446 }, { "epoch": 0.19335478115115304, "grad_norm": 17.609535217285156, "learning_rate": 1.63079615048119e-05, "loss": 1.723, "step": 447 }, { "epoch": 0.1937873421828111, "grad_norm": 17.415800094604492, "learning_rate": 1.6299212598425197e-05, "loss": 1.636, "step": 448 }, { "epoch": 0.19421990321446916, "grad_norm": 15.260915756225586, "learning_rate": 1.6290463692038496e-05, "loss": 1.6597, "step": 449 }, { "epoch": 0.19465246424612723, "grad_norm": 16.445232391357422, "learning_rate": 1.6281714785651795e-05, "loss": 1.7469, "step": 450 }, { "epoch": 0.1950850252777853, "grad_norm": 16.05836296081543, "learning_rate": 1.627296587926509e-05, "loss": 1.651, "step": 451 }, { "epoch": 0.19551758630944335, "grad_norm": 14.436004638671875, "learning_rate": 1.626421697287839e-05, "loss": 1.6635, "step": 452 }, { "epoch": 0.19595014734110142, "grad_norm": 14.610448837280273, "learning_rate": 1.625546806649169e-05, "loss": 1.8475, "step": 453 }, { "epoch": 0.19638270837275948, "grad_norm": 16.329818725585938, "learning_rate": 1.624671916010499e-05, "loss": 1.7341, "step": 454 }, { "epoch": 0.19681526940441754, "grad_norm": 15.847041130065918, "learning_rate": 1.6237970253718288e-05, "loss": 1.721, "step": 455 }, { "epoch": 0.19724783043607558, "grad_norm": 13.708486557006836, "learning_rate": 1.6229221347331584e-05, "loss": 1.7186, "step": 456 }, { "epoch": 0.19768039146773364, "grad_norm": 15.195647239685059, "learning_rate": 1.6220472440944883e-05, "loss": 1.7061, "step": 457 }, { "epoch": 0.1981129524993917, "grad_norm": 16.739809036254883, "learning_rate": 1.6211723534558182e-05, "loss": 1.6949, "step": 458 }, { "epoch": 0.19854551353104977, "grad_norm": 15.860810279846191, "learning_rate": 1.6202974628171478e-05, "loss": 1.654, "step": 459 }, { "epoch": 0.19897807456270783, "grad_norm": 16.588729858398438, "learning_rate": 1.6194225721784777e-05, "loss": 1.6778, "step": 460 }, { "epoch": 0.1994106355943659, "grad_norm": 15.871442794799805, "learning_rate": 1.6185476815398076e-05, "loss": 1.7289, "step": 461 }, { "epoch": 0.19984319662602396, "grad_norm": 15.373008728027344, "learning_rate": 1.6176727909011372e-05, "loss": 1.6259, "step": 462 }, { "epoch": 0.20027575765768202, "grad_norm": 14.221921920776367, "learning_rate": 1.6167979002624675e-05, "loss": 1.6892, "step": 463 }, { "epoch": 0.20070831868934008, "grad_norm": 17.429065704345703, "learning_rate": 1.615923009623797e-05, "loss": 1.724, "step": 464 }, { "epoch": 0.20114087972099814, "grad_norm": 20.687255859375, "learning_rate": 1.615048118985127e-05, "loss": 1.6504, "step": 465 }, { "epoch": 0.2015734407526562, "grad_norm": 15.03296947479248, "learning_rate": 1.614173228346457e-05, "loss": 1.6124, "step": 466 }, { "epoch": 0.20200600178431424, "grad_norm": 16.98514175415039, "learning_rate": 1.6132983377077865e-05, "loss": 1.78, "step": 467 }, { "epoch": 0.2024385628159723, "grad_norm": 14.70289134979248, "learning_rate": 1.6124234470691164e-05, "loss": 1.7044, "step": 468 }, { "epoch": 0.20287112384763037, "grad_norm": 14.248015403747559, "learning_rate": 1.6115485564304463e-05, "loss": 1.7596, "step": 469 }, { "epoch": 0.20330368487928843, "grad_norm": 15.52908706665039, "learning_rate": 1.6106736657917763e-05, "loss": 1.7105, "step": 470 }, { "epoch": 0.2037362459109465, "grad_norm": 18.255674362182617, "learning_rate": 1.609798775153106e-05, "loss": 1.7161, "step": 471 }, { "epoch": 0.20416880694260456, "grad_norm": 22.3636474609375, "learning_rate": 1.608923884514436e-05, "loss": 1.5983, "step": 472 }, { "epoch": 0.20460136797426262, "grad_norm": 17.28295135498047, "learning_rate": 1.6080489938757657e-05, "loss": 1.6615, "step": 473 }, { "epoch": 0.20503392900592068, "grad_norm": 13.663941383361816, "learning_rate": 1.6071741032370956e-05, "loss": 1.6207, "step": 474 }, { "epoch": 0.20546649003757875, "grad_norm": 14.511791229248047, "learning_rate": 1.6062992125984255e-05, "loss": 1.7379, "step": 475 }, { "epoch": 0.2058990510692368, "grad_norm": 15.742350578308105, "learning_rate": 1.605424321959755e-05, "loss": 1.7579, "step": 476 }, { "epoch": 0.20633161210089487, "grad_norm": 15.552555084228516, "learning_rate": 1.604549431321085e-05, "loss": 1.641, "step": 477 }, { "epoch": 0.2067641731325529, "grad_norm": 21.889766693115234, "learning_rate": 1.603674540682415e-05, "loss": 1.7403, "step": 478 }, { "epoch": 0.20719673416421097, "grad_norm": 20.306562423706055, "learning_rate": 1.6027996500437445e-05, "loss": 1.7009, "step": 479 }, { "epoch": 0.20762929519586903, "grad_norm": 18.12723731994629, "learning_rate": 1.6019247594050744e-05, "loss": 1.6979, "step": 480 }, { "epoch": 0.2080618562275271, "grad_norm": 14.94935417175293, "learning_rate": 1.6010498687664044e-05, "loss": 1.6484, "step": 481 }, { "epoch": 0.20849441725918516, "grad_norm": 14.743106842041016, "learning_rate": 1.6001749781277343e-05, "loss": 1.6963, "step": 482 }, { "epoch": 0.20892697829084322, "grad_norm": 14.982324600219727, "learning_rate": 1.5993000874890642e-05, "loss": 1.6868, "step": 483 }, { "epoch": 0.2093595393225013, "grad_norm": 16.85517692565918, "learning_rate": 1.5984251968503938e-05, "loss": 1.5787, "step": 484 }, { "epoch": 0.20979210035415935, "grad_norm": 15.209792137145996, "learning_rate": 1.5975503062117237e-05, "loss": 1.7086, "step": 485 }, { "epoch": 0.2102246613858174, "grad_norm": 14.563628196716309, "learning_rate": 1.5966754155730536e-05, "loss": 1.7039, "step": 486 }, { "epoch": 0.21065722241747548, "grad_norm": 15.1539945602417, "learning_rate": 1.5958005249343832e-05, "loss": 1.6866, "step": 487 }, { "epoch": 0.21108978344913354, "grad_norm": 19.112892150878906, "learning_rate": 1.594925634295713e-05, "loss": 1.5955, "step": 488 }, { "epoch": 0.21152234448079157, "grad_norm": 17.022937774658203, "learning_rate": 1.594050743657043e-05, "loss": 1.7138, "step": 489 }, { "epoch": 0.21195490551244964, "grad_norm": 18.67705535888672, "learning_rate": 1.5931758530183726e-05, "loss": 1.7335, "step": 490 }, { "epoch": 0.2123874665441077, "grad_norm": 17.58185577392578, "learning_rate": 1.592300962379703e-05, "loss": 1.6535, "step": 491 }, { "epoch": 0.21282002757576576, "grad_norm": 16.525270462036133, "learning_rate": 1.5914260717410325e-05, "loss": 1.6493, "step": 492 }, { "epoch": 0.21325258860742383, "grad_norm": 18.45403289794922, "learning_rate": 1.5905511811023624e-05, "loss": 1.6937, "step": 493 }, { "epoch": 0.2136851496390819, "grad_norm": 18.30523681640625, "learning_rate": 1.5896762904636923e-05, "loss": 1.6143, "step": 494 }, { "epoch": 0.21411771067073995, "grad_norm": 17.691091537475586, "learning_rate": 1.588801399825022e-05, "loss": 1.6493, "step": 495 }, { "epoch": 0.21455027170239802, "grad_norm": 15.201132774353027, "learning_rate": 1.5879265091863518e-05, "loss": 1.6753, "step": 496 }, { "epoch": 0.21498283273405608, "grad_norm": 17.74352264404297, "learning_rate": 1.5870516185476817e-05, "loss": 1.7362, "step": 497 }, { "epoch": 0.21541539376571414, "grad_norm": 16.426006317138672, "learning_rate": 1.5861767279090113e-05, "loss": 1.5865, "step": 498 }, { "epoch": 0.2158479547973722, "grad_norm": 18.481365203857422, "learning_rate": 1.5853018372703412e-05, "loss": 1.7536, "step": 499 }, { "epoch": 0.21628051582903024, "grad_norm": 16.491756439208984, "learning_rate": 1.584426946631671e-05, "loss": 1.7682, "step": 500 }, { "epoch": 0.2167130768606883, "grad_norm": 15.862979888916016, "learning_rate": 1.583552055993001e-05, "loss": 1.6539, "step": 501 }, { "epoch": 0.21714563789234637, "grad_norm": 14.549097061157227, "learning_rate": 1.582677165354331e-05, "loss": 1.7378, "step": 502 }, { "epoch": 0.21757819892400443, "grad_norm": 14.149446487426758, "learning_rate": 1.5818022747156606e-05, "loss": 1.6905, "step": 503 }, { "epoch": 0.2180107599556625, "grad_norm": 17.096704483032227, "learning_rate": 1.5809273840769905e-05, "loss": 1.5642, "step": 504 }, { "epoch": 0.21844332098732056, "grad_norm": 17.932981491088867, "learning_rate": 1.5800524934383204e-05, "loss": 1.6426, "step": 505 }, { "epoch": 0.21887588201897862, "grad_norm": 17.249406814575195, "learning_rate": 1.57917760279965e-05, "loss": 1.6455, "step": 506 }, { "epoch": 0.21930844305063668, "grad_norm": 18.000295639038086, "learning_rate": 1.57830271216098e-05, "loss": 1.6118, "step": 507 }, { "epoch": 0.21974100408229474, "grad_norm": 16.909530639648438, "learning_rate": 1.57742782152231e-05, "loss": 1.6894, "step": 508 }, { "epoch": 0.2201735651139528, "grad_norm": 17.263492584228516, "learning_rate": 1.5765529308836394e-05, "loss": 1.7422, "step": 509 }, { "epoch": 0.22060612614561087, "grad_norm": 16.53858184814453, "learning_rate": 1.5756780402449694e-05, "loss": 1.5943, "step": 510 }, { "epoch": 0.2210386871772689, "grad_norm": 15.582964897155762, "learning_rate": 1.5748031496062993e-05, "loss": 1.568, "step": 511 }, { "epoch": 0.22147124820892697, "grad_norm": 17.556089401245117, "learning_rate": 1.5739282589676292e-05, "loss": 1.7498, "step": 512 }, { "epoch": 0.22190380924058503, "grad_norm": 16.844451904296875, "learning_rate": 1.573053368328959e-05, "loss": 1.6113, "step": 513 }, { "epoch": 0.2223363702722431, "grad_norm": 16.041711807250977, "learning_rate": 1.5721784776902887e-05, "loss": 1.7614, "step": 514 }, { "epoch": 0.22276893130390116, "grad_norm": 17.016220092773438, "learning_rate": 1.5713035870516186e-05, "loss": 1.6991, "step": 515 }, { "epoch": 0.22320149233555922, "grad_norm": 16.927995681762695, "learning_rate": 1.5704286964129485e-05, "loss": 1.6816, "step": 516 }, { "epoch": 0.22363405336721728, "grad_norm": 16.519311904907227, "learning_rate": 1.5695538057742785e-05, "loss": 1.5943, "step": 517 }, { "epoch": 0.22406661439887535, "grad_norm": 18.04831314086914, "learning_rate": 1.568678915135608e-05, "loss": 1.7193, "step": 518 }, { "epoch": 0.2244991754305334, "grad_norm": 18.500019073486328, "learning_rate": 1.567804024496938e-05, "loss": 1.6316, "step": 519 }, { "epoch": 0.22493173646219147, "grad_norm": 18.94154930114746, "learning_rate": 1.566929133858268e-05, "loss": 1.6008, "step": 520 }, { "epoch": 0.22536429749384954, "grad_norm": 15.78384780883789, "learning_rate": 1.5660542432195978e-05, "loss": 1.6177, "step": 521 }, { "epoch": 0.22579685852550757, "grad_norm": 18.01935577392578, "learning_rate": 1.5651793525809277e-05, "loss": 1.7999, "step": 522 }, { "epoch": 0.22622941955716563, "grad_norm": 19.498945236206055, "learning_rate": 1.5643044619422573e-05, "loss": 1.6587, "step": 523 }, { "epoch": 0.2266619805888237, "grad_norm": 16.238536834716797, "learning_rate": 1.5634295713035872e-05, "loss": 1.6589, "step": 524 }, { "epoch": 0.22709454162048176, "grad_norm": 14.878324508666992, "learning_rate": 1.562554680664917e-05, "loss": 1.692, "step": 525 }, { "epoch": 0.22752710265213982, "grad_norm": 15.313729286193848, "learning_rate": 1.5616797900262467e-05, "loss": 1.6564, "step": 526 }, { "epoch": 0.2279596636837979, "grad_norm": 14.713421821594238, "learning_rate": 1.5608048993875766e-05, "loss": 1.6997, "step": 527 }, { "epoch": 0.22839222471545595, "grad_norm": 14.811202049255371, "learning_rate": 1.5599300087489066e-05, "loss": 1.7147, "step": 528 }, { "epoch": 0.228824785747114, "grad_norm": 17.43622589111328, "learning_rate": 1.559055118110236e-05, "loss": 1.6092, "step": 529 }, { "epoch": 0.22925734677877208, "grad_norm": 16.88450813293457, "learning_rate": 1.5581802274715664e-05, "loss": 1.6191, "step": 530 }, { "epoch": 0.22968990781043014, "grad_norm": 17.206796646118164, "learning_rate": 1.557305336832896e-05, "loss": 1.7351, "step": 531 }, { "epoch": 0.2301224688420882, "grad_norm": 18.76306915283203, "learning_rate": 1.556430446194226e-05, "loss": 1.5707, "step": 532 }, { "epoch": 0.23055502987374624, "grad_norm": 15.88653564453125, "learning_rate": 1.555555555555556e-05, "loss": 1.6625, "step": 533 }, { "epoch": 0.2309875909054043, "grad_norm": 17.676189422607422, "learning_rate": 1.5546806649168854e-05, "loss": 1.6742, "step": 534 }, { "epoch": 0.23142015193706236, "grad_norm": 17.690826416015625, "learning_rate": 1.5538057742782153e-05, "loss": 1.6381, "step": 535 }, { "epoch": 0.23185271296872043, "grad_norm": 16.335132598876953, "learning_rate": 1.5529308836395453e-05, "loss": 1.5988, "step": 536 }, { "epoch": 0.2322852740003785, "grad_norm": 17.346731185913086, "learning_rate": 1.552055993000875e-05, "loss": 1.687, "step": 537 }, { "epoch": 0.23271783503203655, "grad_norm": 16.654361724853516, "learning_rate": 1.5511811023622048e-05, "loss": 1.6375, "step": 538 }, { "epoch": 0.23315039606369461, "grad_norm": 17.761816024780273, "learning_rate": 1.5503062117235347e-05, "loss": 1.6785, "step": 539 }, { "epoch": 0.23358295709535268, "grad_norm": 16.973031997680664, "learning_rate": 1.5494313210848646e-05, "loss": 1.6814, "step": 540 }, { "epoch": 0.23401551812701074, "grad_norm": 16.909561157226562, "learning_rate": 1.5485564304461945e-05, "loss": 1.6855, "step": 541 }, { "epoch": 0.2344480791586688, "grad_norm": 17.03185272216797, "learning_rate": 1.547681539807524e-05, "loss": 1.6887, "step": 542 }, { "epoch": 0.23488064019032687, "grad_norm": 17.3386173248291, "learning_rate": 1.546806649168854e-05, "loss": 1.6587, "step": 543 }, { "epoch": 0.2353132012219849, "grad_norm": 16.984390258789062, "learning_rate": 1.545931758530184e-05, "loss": 1.5812, "step": 544 }, { "epoch": 0.23574576225364297, "grad_norm": 16.079471588134766, "learning_rate": 1.5450568678915135e-05, "loss": 1.7236, "step": 545 }, { "epoch": 0.23617832328530103, "grad_norm": 16.26591682434082, "learning_rate": 1.5441819772528434e-05, "loss": 1.6052, "step": 546 }, { "epoch": 0.2366108843169591, "grad_norm": 17.542015075683594, "learning_rate": 1.5433070866141734e-05, "loss": 1.5683, "step": 547 }, { "epoch": 0.23704344534861715, "grad_norm": 19.87151527404785, "learning_rate": 1.542432195975503e-05, "loss": 1.5891, "step": 548 }, { "epoch": 0.23747600638027522, "grad_norm": 17.004043579101562, "learning_rate": 1.5415573053368332e-05, "loss": 1.6065, "step": 549 }, { "epoch": 0.23790856741193328, "grad_norm": 16.69712257385254, "learning_rate": 1.5406824146981628e-05, "loss": 1.6858, "step": 550 }, { "epoch": 0.23834112844359134, "grad_norm": 17.42692756652832, "learning_rate": 1.5398075240594927e-05, "loss": 1.7016, "step": 551 }, { "epoch": 0.2387736894752494, "grad_norm": 15.988450050354004, "learning_rate": 1.5389326334208226e-05, "loss": 1.592, "step": 552 }, { "epoch": 0.23920625050690747, "grad_norm": 16.765094757080078, "learning_rate": 1.5380577427821522e-05, "loss": 1.5969, "step": 553 }, { "epoch": 0.23963881153856553, "grad_norm": 16.65338134765625, "learning_rate": 1.537182852143482e-05, "loss": 1.5488, "step": 554 }, { "epoch": 0.24007137257022357, "grad_norm": 20.152639389038086, "learning_rate": 1.536307961504812e-05, "loss": 1.5912, "step": 555 }, { "epoch": 0.24050393360188163, "grad_norm": 17.90691566467285, "learning_rate": 1.5354330708661416e-05, "loss": 1.7089, "step": 556 }, { "epoch": 0.2409364946335397, "grad_norm": 16.137821197509766, "learning_rate": 1.5345581802274716e-05, "loss": 1.6125, "step": 557 }, { "epoch": 0.24136905566519776, "grad_norm": 17.443849563598633, "learning_rate": 1.5336832895888015e-05, "loss": 1.7119, "step": 558 }, { "epoch": 0.24180161669685582, "grad_norm": 19.99116325378418, "learning_rate": 1.5328083989501314e-05, "loss": 1.6325, "step": 559 }, { "epoch": 0.24223417772851388, "grad_norm": 17.063501358032227, "learning_rate": 1.5319335083114613e-05, "loss": 1.6513, "step": 560 }, { "epoch": 0.24266673876017195, "grad_norm": 16.421655654907227, "learning_rate": 1.531058617672791e-05, "loss": 1.683, "step": 561 }, { "epoch": 0.24309929979183, "grad_norm": 20.072221755981445, "learning_rate": 1.5301837270341208e-05, "loss": 1.5857, "step": 562 }, { "epoch": 0.24353186082348807, "grad_norm": 17.94641876220703, "learning_rate": 1.5293088363954507e-05, "loss": 1.7222, "step": 563 }, { "epoch": 0.24396442185514614, "grad_norm": 18.8425235748291, "learning_rate": 1.5284339457567807e-05, "loss": 1.6236, "step": 564 }, { "epoch": 0.2443969828868042, "grad_norm": 16.484027862548828, "learning_rate": 1.5275590551181102e-05, "loss": 1.6454, "step": 565 }, { "epoch": 0.24482954391846223, "grad_norm": 16.227195739746094, "learning_rate": 1.52668416447944e-05, "loss": 1.5695, "step": 566 }, { "epoch": 0.2452621049501203, "grad_norm": 17.701627731323242, "learning_rate": 1.52580927384077e-05, "loss": 1.723, "step": 567 }, { "epoch": 0.24569466598177836, "grad_norm": 19.588029861450195, "learning_rate": 1.5249343832021e-05, "loss": 1.6525, "step": 568 }, { "epoch": 0.24612722701343642, "grad_norm": 16.687175750732422, "learning_rate": 1.5240594925634298e-05, "loss": 1.6708, "step": 569 }, { "epoch": 0.24655978804509449, "grad_norm": 16.77758026123047, "learning_rate": 1.5231846019247595e-05, "loss": 1.6512, "step": 570 }, { "epoch": 0.24699234907675255, "grad_norm": 16.585819244384766, "learning_rate": 1.5223097112860894e-05, "loss": 1.5813, "step": 571 }, { "epoch": 0.2474249101084106, "grad_norm": 18.844066619873047, "learning_rate": 1.5214348206474192e-05, "loss": 1.6259, "step": 572 }, { "epoch": 0.24785747114006867, "grad_norm": 18.0266170501709, "learning_rate": 1.520559930008749e-05, "loss": 1.5875, "step": 573 }, { "epoch": 0.24829003217172674, "grad_norm": 18.513761520385742, "learning_rate": 1.5196850393700789e-05, "loss": 1.6878, "step": 574 }, { "epoch": 0.2487225932033848, "grad_norm": 16.963220596313477, "learning_rate": 1.5188101487314086e-05, "loss": 1.7002, "step": 575 }, { "epoch": 0.24915515423504286, "grad_norm": 16.365562438964844, "learning_rate": 1.5179352580927385e-05, "loss": 1.6331, "step": 576 }, { "epoch": 0.2495877152667009, "grad_norm": 18.376493453979492, "learning_rate": 1.5170603674540683e-05, "loss": 1.5584, "step": 577 }, { "epoch": 0.250020276298359, "grad_norm": 15.05932903289795, "learning_rate": 1.5161854768153984e-05, "loss": 1.6453, "step": 578 }, { "epoch": 0.25045283733001705, "grad_norm": 16.900829315185547, "learning_rate": 1.5153105861767281e-05, "loss": 1.6164, "step": 579 }, { "epoch": 0.2508853983616751, "grad_norm": 18.5715389251709, "learning_rate": 1.5144356955380579e-05, "loss": 1.7586, "step": 580 }, { "epoch": 0.2513179593933332, "grad_norm": 16.25688934326172, "learning_rate": 1.5135608048993878e-05, "loss": 1.5743, "step": 581 }, { "epoch": 0.2517505204249912, "grad_norm": 16.32343292236328, "learning_rate": 1.5126859142607175e-05, "loss": 1.6409, "step": 582 }, { "epoch": 0.25218308145664925, "grad_norm": 18.865724563598633, "learning_rate": 1.5118110236220473e-05, "loss": 1.5924, "step": 583 }, { "epoch": 0.2526156424883073, "grad_norm": 16.306575775146484, "learning_rate": 1.5109361329833772e-05, "loss": 1.6264, "step": 584 }, { "epoch": 0.2530482035199654, "grad_norm": 19.097639083862305, "learning_rate": 1.510061242344707e-05, "loss": 1.6059, "step": 585 }, { "epoch": 0.25348076455162344, "grad_norm": 16.674352645874023, "learning_rate": 1.5091863517060367e-05, "loss": 1.7933, "step": 586 }, { "epoch": 0.2539133255832815, "grad_norm": 19.233652114868164, "learning_rate": 1.5083114610673668e-05, "loss": 1.5857, "step": 587 }, { "epoch": 0.25434588661493956, "grad_norm": 18.98441505432129, "learning_rate": 1.5074365704286966e-05, "loss": 1.709, "step": 588 }, { "epoch": 0.25477844764659763, "grad_norm": 18.186471939086914, "learning_rate": 1.5065616797900265e-05, "loss": 1.6863, "step": 589 }, { "epoch": 0.2552110086782557, "grad_norm": 18.54801368713379, "learning_rate": 1.5056867891513562e-05, "loss": 1.6825, "step": 590 }, { "epoch": 0.25564356970991375, "grad_norm": 16.038410186767578, "learning_rate": 1.504811898512686e-05, "loss": 1.6853, "step": 591 }, { "epoch": 0.2560761307415718, "grad_norm": 17.528871536254883, "learning_rate": 1.5039370078740159e-05, "loss": 1.7077, "step": 592 }, { "epoch": 0.2565086917732299, "grad_norm": 15.960848808288574, "learning_rate": 1.5030621172353457e-05, "loss": 1.5823, "step": 593 }, { "epoch": 0.25694125280488794, "grad_norm": 18.292795181274414, "learning_rate": 1.5021872265966754e-05, "loss": 1.5904, "step": 594 }, { "epoch": 0.257373813836546, "grad_norm": 18.43050193786621, "learning_rate": 1.5013123359580053e-05, "loss": 1.6279, "step": 595 }, { "epoch": 0.25780637486820407, "grad_norm": 18.099613189697266, "learning_rate": 1.500437445319335e-05, "loss": 1.6725, "step": 596 }, { "epoch": 0.25823893589986213, "grad_norm": 17.15315055847168, "learning_rate": 1.4995625546806652e-05, "loss": 1.6643, "step": 597 }, { "epoch": 0.2586714969315202, "grad_norm": 15.87186050415039, "learning_rate": 1.498687664041995e-05, "loss": 1.6733, "step": 598 }, { "epoch": 0.25910405796317826, "grad_norm": 15.417596817016602, "learning_rate": 1.4978127734033248e-05, "loss": 1.743, "step": 599 }, { "epoch": 0.2595366189948363, "grad_norm": 16.56854248046875, "learning_rate": 1.4969378827646546e-05, "loss": 1.7008, "step": 600 }, { "epoch": 0.2599691800264944, "grad_norm": 16.416122436523438, "learning_rate": 1.4960629921259843e-05, "loss": 1.7122, "step": 601 }, { "epoch": 0.26040174105815245, "grad_norm": 16.724794387817383, "learning_rate": 1.4951881014873143e-05, "loss": 1.7211, "step": 602 }, { "epoch": 0.2608343020898105, "grad_norm": 16.47406768798828, "learning_rate": 1.494313210848644e-05, "loss": 1.6575, "step": 603 }, { "epoch": 0.2612668631214685, "grad_norm": 18.724267959594727, "learning_rate": 1.4934383202099738e-05, "loss": 1.6323, "step": 604 }, { "epoch": 0.2616994241531266, "grad_norm": 17.24701499938965, "learning_rate": 1.4925634295713037e-05, "loss": 1.7832, "step": 605 }, { "epoch": 0.26213198518478464, "grad_norm": 18.684057235717773, "learning_rate": 1.4916885389326334e-05, "loss": 1.5967, "step": 606 }, { "epoch": 0.2625645462164427, "grad_norm": 16.600284576416016, "learning_rate": 1.4908136482939635e-05, "loss": 1.5627, "step": 607 }, { "epoch": 0.26299710724810077, "grad_norm": 17.85321044921875, "learning_rate": 1.4899387576552933e-05, "loss": 1.7005, "step": 608 }, { "epoch": 0.26342966827975883, "grad_norm": 16.918190002441406, "learning_rate": 1.489063867016623e-05, "loss": 1.6876, "step": 609 }, { "epoch": 0.2638622293114169, "grad_norm": 16.909236907958984, "learning_rate": 1.488188976377953e-05, "loss": 1.5946, "step": 610 }, { "epoch": 0.26429479034307496, "grad_norm": 17.064176559448242, "learning_rate": 1.4873140857392827e-05, "loss": 1.7075, "step": 611 }, { "epoch": 0.264727351374733, "grad_norm": 19.011877059936523, "learning_rate": 1.4864391951006125e-05, "loss": 1.765, "step": 612 }, { "epoch": 0.2651599124063911, "grad_norm": 17.99385643005371, "learning_rate": 1.4855643044619424e-05, "loss": 1.6925, "step": 613 }, { "epoch": 0.26559247343804915, "grad_norm": 17.17367172241211, "learning_rate": 1.4846894138232721e-05, "loss": 1.5354, "step": 614 }, { "epoch": 0.2660250344697072, "grad_norm": 19.10651206970215, "learning_rate": 1.4838145231846019e-05, "loss": 1.5447, "step": 615 }, { "epoch": 0.2664575955013653, "grad_norm": 18.000473022460938, "learning_rate": 1.482939632545932e-05, "loss": 1.5983, "step": 616 }, { "epoch": 0.26689015653302334, "grad_norm": 17.769773483276367, "learning_rate": 1.4820647419072617e-05, "loss": 1.6972, "step": 617 }, { "epoch": 0.2673227175646814, "grad_norm": 17.360748291015625, "learning_rate": 1.4811898512685916e-05, "loss": 1.6898, "step": 618 }, { "epoch": 0.26775527859633946, "grad_norm": 18.90128517150879, "learning_rate": 1.4803149606299214e-05, "loss": 1.5293, "step": 619 }, { "epoch": 0.2681878396279975, "grad_norm": 18.458005905151367, "learning_rate": 1.4794400699912513e-05, "loss": 1.6708, "step": 620 }, { "epoch": 0.2686204006596556, "grad_norm": 16.414493560791016, "learning_rate": 1.478565179352581e-05, "loss": 1.5625, "step": 621 }, { "epoch": 0.26905296169131365, "grad_norm": 17.863723754882812, "learning_rate": 1.4776902887139108e-05, "loss": 1.6055, "step": 622 }, { "epoch": 0.2694855227229717, "grad_norm": 19.638256072998047, "learning_rate": 1.4768153980752407e-05, "loss": 1.6344, "step": 623 }, { "epoch": 0.2699180837546298, "grad_norm": 19.117496490478516, "learning_rate": 1.4759405074365705e-05, "loss": 1.6323, "step": 624 }, { "epoch": 0.27035064478628784, "grad_norm": 17.23756217956543, "learning_rate": 1.4750656167979002e-05, "loss": 1.6586, "step": 625 }, { "epoch": 0.27078320581794585, "grad_norm": 17.25014877319336, "learning_rate": 1.4741907261592303e-05, "loss": 1.6476, "step": 626 }, { "epoch": 0.2712157668496039, "grad_norm": 22.015825271606445, "learning_rate": 1.47331583552056e-05, "loss": 1.5858, "step": 627 }, { "epoch": 0.271648327881262, "grad_norm": 19.36150360107422, "learning_rate": 1.47244094488189e-05, "loss": 1.6196, "step": 628 }, { "epoch": 0.27208088891292004, "grad_norm": 17.298595428466797, "learning_rate": 1.4715660542432198e-05, "loss": 1.6995, "step": 629 }, { "epoch": 0.2725134499445781, "grad_norm": 17.826095581054688, "learning_rate": 1.4706911636045495e-05, "loss": 1.7035, "step": 630 }, { "epoch": 0.27294601097623616, "grad_norm": 17.059179306030273, "learning_rate": 1.4698162729658794e-05, "loss": 1.603, "step": 631 }, { "epoch": 0.2733785720078942, "grad_norm": 18.210107803344727, "learning_rate": 1.4689413823272092e-05, "loss": 1.7385, "step": 632 }, { "epoch": 0.2738111330395523, "grad_norm": 18.413015365600586, "learning_rate": 1.468066491688539e-05, "loss": 1.588, "step": 633 }, { "epoch": 0.27424369407121035, "grad_norm": 17.428800582885742, "learning_rate": 1.4671916010498688e-05, "loss": 1.673, "step": 634 }, { "epoch": 0.2746762551028684, "grad_norm": 17.01789665222168, "learning_rate": 1.4663167104111988e-05, "loss": 1.6085, "step": 635 }, { "epoch": 0.2751088161345265, "grad_norm": 16.585988998413086, "learning_rate": 1.4654418197725287e-05, "loss": 1.7314, "step": 636 }, { "epoch": 0.27554137716618454, "grad_norm": 18.863544464111328, "learning_rate": 1.4645669291338584e-05, "loss": 1.685, "step": 637 }, { "epoch": 0.2759739381978426, "grad_norm": 18.15181541442871, "learning_rate": 1.4636920384951882e-05, "loss": 1.7304, "step": 638 }, { "epoch": 0.27640649922950067, "grad_norm": 18.61130142211914, "learning_rate": 1.4628171478565181e-05, "loss": 1.5327, "step": 639 }, { "epoch": 0.27683906026115873, "grad_norm": 17.54844856262207, "learning_rate": 1.4619422572178479e-05, "loss": 1.6265, "step": 640 }, { "epoch": 0.2772716212928168, "grad_norm": 17.166112899780273, "learning_rate": 1.4610673665791776e-05, "loss": 1.6124, "step": 641 }, { "epoch": 0.27770418232447486, "grad_norm": 19.315031051635742, "learning_rate": 1.4601924759405075e-05, "loss": 1.5918, "step": 642 }, { "epoch": 0.2781367433561329, "grad_norm": 18.814680099487305, "learning_rate": 1.4593175853018373e-05, "loss": 1.5706, "step": 643 }, { "epoch": 0.278569304387791, "grad_norm": 17.746023178100586, "learning_rate": 1.4584426946631672e-05, "loss": 1.7087, "step": 644 }, { "epoch": 0.27900186541944905, "grad_norm": 21.044361114501953, "learning_rate": 1.4575678040244971e-05, "loss": 1.6326, "step": 645 }, { "epoch": 0.2794344264511071, "grad_norm": 20.996479034423828, "learning_rate": 1.456692913385827e-05, "loss": 1.5941, "step": 646 }, { "epoch": 0.2798669874827652, "grad_norm": 18.970081329345703, "learning_rate": 1.4558180227471568e-05, "loss": 1.6649, "step": 647 }, { "epoch": 0.2802995485144232, "grad_norm": 18.267818450927734, "learning_rate": 1.4549431321084865e-05, "loss": 1.6545, "step": 648 }, { "epoch": 0.28073210954608124, "grad_norm": 16.575632095336914, "learning_rate": 1.4540682414698165e-05, "loss": 1.6283, "step": 649 }, { "epoch": 0.2811646705777393, "grad_norm": 17.574951171875, "learning_rate": 1.4531933508311462e-05, "loss": 1.664, "step": 650 }, { "epoch": 0.28159723160939737, "grad_norm": 18.63271713256836, "learning_rate": 1.452318460192476e-05, "loss": 1.6736, "step": 651 }, { "epoch": 0.28202979264105543, "grad_norm": 18.147533416748047, "learning_rate": 1.4514435695538059e-05, "loss": 1.6374, "step": 652 }, { "epoch": 0.2824623536727135, "grad_norm": 17.428810119628906, "learning_rate": 1.4505686789151356e-05, "loss": 1.7221, "step": 653 }, { "epoch": 0.28289491470437156, "grad_norm": 17.664213180541992, "learning_rate": 1.4496937882764654e-05, "loss": 1.6235, "step": 654 }, { "epoch": 0.2833274757360296, "grad_norm": 18.84585952758789, "learning_rate": 1.4488188976377955e-05, "loss": 1.7157, "step": 655 }, { "epoch": 0.2837600367676877, "grad_norm": 19.42424774169922, "learning_rate": 1.4479440069991252e-05, "loss": 1.6526, "step": 656 }, { "epoch": 0.28419259779934575, "grad_norm": 18.132667541503906, "learning_rate": 1.4470691163604552e-05, "loss": 1.7109, "step": 657 }, { "epoch": 0.2846251588310038, "grad_norm": 18.288928985595703, "learning_rate": 1.4461942257217849e-05, "loss": 1.6393, "step": 658 }, { "epoch": 0.2850577198626619, "grad_norm": 15.85251522064209, "learning_rate": 1.4453193350831147e-05, "loss": 1.6375, "step": 659 }, { "epoch": 0.28549028089431994, "grad_norm": 17.576475143432617, "learning_rate": 1.4444444444444446e-05, "loss": 1.6288, "step": 660 }, { "epoch": 0.285922841925978, "grad_norm": 16.68917465209961, "learning_rate": 1.4435695538057743e-05, "loss": 1.5772, "step": 661 }, { "epoch": 0.28635540295763606, "grad_norm": 19.292522430419922, "learning_rate": 1.442694663167104e-05, "loss": 1.6764, "step": 662 }, { "epoch": 0.2867879639892941, "grad_norm": 19.660804748535156, "learning_rate": 1.441819772528434e-05, "loss": 1.7028, "step": 663 }, { "epoch": 0.2872205250209522, "grad_norm": 17.166290283203125, "learning_rate": 1.440944881889764e-05, "loss": 1.5549, "step": 664 }, { "epoch": 0.28765308605261025, "grad_norm": 15.878508567810059, "learning_rate": 1.4400699912510938e-05, "loss": 1.6366, "step": 665 }, { "epoch": 0.2880856470842683, "grad_norm": 16.062185287475586, "learning_rate": 1.4391951006124236e-05, "loss": 1.6623, "step": 666 }, { "epoch": 0.2885182081159264, "grad_norm": 18.430627822875977, "learning_rate": 1.4383202099737535e-05, "loss": 1.7102, "step": 667 }, { "epoch": 0.28895076914758444, "grad_norm": 18.56020736694336, "learning_rate": 1.4374453193350833e-05, "loss": 1.6928, "step": 668 }, { "epoch": 0.2893833301792425, "grad_norm": 77.00767517089844, "learning_rate": 1.436570428696413e-05, "loss": 1.6911, "step": 669 }, { "epoch": 0.2898158912109005, "grad_norm": 18.834728240966797, "learning_rate": 1.435695538057743e-05, "loss": 1.6888, "step": 670 }, { "epoch": 0.2902484522425586, "grad_norm": 17.199195861816406, "learning_rate": 1.4348206474190727e-05, "loss": 1.691, "step": 671 }, { "epoch": 0.29068101327421664, "grad_norm": 18.331899642944336, "learning_rate": 1.4339457567804024e-05, "loss": 1.6973, "step": 672 }, { "epoch": 0.2911135743058747, "grad_norm": 16.407562255859375, "learning_rate": 1.4330708661417324e-05, "loss": 1.6564, "step": 673 }, { "epoch": 0.29154613533753276, "grad_norm": 18.343046188354492, "learning_rate": 1.4321959755030623e-05, "loss": 1.717, "step": 674 }, { "epoch": 0.2919786963691908, "grad_norm": 19.638093948364258, "learning_rate": 1.4313210848643922e-05, "loss": 1.7019, "step": 675 }, { "epoch": 0.2924112574008489, "grad_norm": 31.77922821044922, "learning_rate": 1.430446194225722e-05, "loss": 1.6391, "step": 676 }, { "epoch": 0.29284381843250695, "grad_norm": 18.263328552246094, "learning_rate": 1.4295713035870517e-05, "loss": 1.5636, "step": 677 }, { "epoch": 0.293276379464165, "grad_norm": 17.560848236083984, "learning_rate": 1.4286964129483816e-05, "loss": 1.6535, "step": 678 }, { "epoch": 0.2937089404958231, "grad_norm": 18.677249908447266, "learning_rate": 1.4278215223097114e-05, "loss": 1.6181, "step": 679 }, { "epoch": 0.29414150152748114, "grad_norm": 18.061887741088867, "learning_rate": 1.4269466316710411e-05, "loss": 1.5453, "step": 680 }, { "epoch": 0.2945740625591392, "grad_norm": 17.87262535095215, "learning_rate": 1.426071741032371e-05, "loss": 1.6855, "step": 681 }, { "epoch": 0.29500662359079727, "grad_norm": 18.203630447387695, "learning_rate": 1.4251968503937008e-05, "loss": 1.5759, "step": 682 }, { "epoch": 0.29543918462245533, "grad_norm": 17.520793914794922, "learning_rate": 1.4243219597550309e-05, "loss": 1.5989, "step": 683 }, { "epoch": 0.2958717456541134, "grad_norm": 19.39581298828125, "learning_rate": 1.4234470691163606e-05, "loss": 1.674, "step": 684 }, { "epoch": 0.29630430668577146, "grad_norm": 18.8718318939209, "learning_rate": 1.4225721784776904e-05, "loss": 1.5593, "step": 685 }, { "epoch": 0.2967368677174295, "grad_norm": 20.18855094909668, "learning_rate": 1.4216972878390203e-05, "loss": 1.5447, "step": 686 }, { "epoch": 0.2971694287490876, "grad_norm": 18.366270065307617, "learning_rate": 1.42082239720035e-05, "loss": 1.5408, "step": 687 }, { "epoch": 0.29760198978074565, "grad_norm": 20.94564437866211, "learning_rate": 1.4199475065616798e-05, "loss": 1.6166, "step": 688 }, { "epoch": 0.2980345508124037, "grad_norm": 19.897859573364258, "learning_rate": 1.4190726159230097e-05, "loss": 1.5997, "step": 689 }, { "epoch": 0.29846711184406177, "grad_norm": 18.334325790405273, "learning_rate": 1.4181977252843395e-05, "loss": 1.6338, "step": 690 }, { "epoch": 0.29889967287571984, "grad_norm": 19.647480010986328, "learning_rate": 1.4173228346456694e-05, "loss": 1.6537, "step": 691 }, { "epoch": 0.29933223390737784, "grad_norm": 16.615379333496094, "learning_rate": 1.4164479440069992e-05, "loss": 1.6717, "step": 692 }, { "epoch": 0.2997647949390359, "grad_norm": 20.802907943725586, "learning_rate": 1.4155730533683293e-05, "loss": 1.6626, "step": 693 }, { "epoch": 0.30019735597069397, "grad_norm": 17.72242546081543, "learning_rate": 1.414698162729659e-05, "loss": 1.6199, "step": 694 }, { "epoch": 0.30062991700235203, "grad_norm": 17.221155166625977, "learning_rate": 1.4138232720909888e-05, "loss": 1.7148, "step": 695 }, { "epoch": 0.3010624780340101, "grad_norm": 18.239665985107422, "learning_rate": 1.4129483814523187e-05, "loss": 1.654, "step": 696 }, { "epoch": 0.30149503906566816, "grad_norm": 20.456558227539062, "learning_rate": 1.4120734908136484e-05, "loss": 1.6063, "step": 697 }, { "epoch": 0.3019276000973262, "grad_norm": 18.604167938232422, "learning_rate": 1.4111986001749782e-05, "loss": 1.6348, "step": 698 }, { "epoch": 0.3023601611289843, "grad_norm": 17.619157791137695, "learning_rate": 1.4103237095363081e-05, "loss": 1.5949, "step": 699 }, { "epoch": 0.30279272216064235, "grad_norm": 17.160158157348633, "learning_rate": 1.4094488188976379e-05, "loss": 1.608, "step": 700 }, { "epoch": 0.3032252831923004, "grad_norm": 17.992366790771484, "learning_rate": 1.4085739282589676e-05, "loss": 1.6816, "step": 701 }, { "epoch": 0.3036578442239585, "grad_norm": 18.279861450195312, "learning_rate": 1.4076990376202975e-05, "loss": 1.5903, "step": 702 }, { "epoch": 0.30409040525561654, "grad_norm": 17.513811111450195, "learning_rate": 1.4068241469816274e-05, "loss": 1.6141, "step": 703 }, { "epoch": 0.3045229662872746, "grad_norm": 19.576257705688477, "learning_rate": 1.4059492563429574e-05, "loss": 1.5614, "step": 704 }, { "epoch": 0.30495552731893266, "grad_norm": 17.985549926757812, "learning_rate": 1.4050743657042871e-05, "loss": 1.6339, "step": 705 }, { "epoch": 0.3053880883505907, "grad_norm": 20.642486572265625, "learning_rate": 1.4041994750656169e-05, "loss": 1.5741, "step": 706 }, { "epoch": 0.3058206493822488, "grad_norm": 18.464508056640625, "learning_rate": 1.4033245844269468e-05, "loss": 1.6104, "step": 707 }, { "epoch": 0.30625321041390685, "grad_norm": 17.332229614257812, "learning_rate": 1.4024496937882765e-05, "loss": 1.6038, "step": 708 }, { "epoch": 0.3066857714455649, "grad_norm": 21.05571937561035, "learning_rate": 1.4015748031496063e-05, "loss": 1.7033, "step": 709 }, { "epoch": 0.307118332477223, "grad_norm": 18.811159133911133, "learning_rate": 1.4006999125109362e-05, "loss": 1.6677, "step": 710 }, { "epoch": 0.30755089350888104, "grad_norm": 17.237346649169922, "learning_rate": 1.399825021872266e-05, "loss": 1.6081, "step": 711 }, { "epoch": 0.3079834545405391, "grad_norm": 17.864810943603516, "learning_rate": 1.398950131233596e-05, "loss": 1.5892, "step": 712 }, { "epoch": 0.30841601557219717, "grad_norm": 17.532041549682617, "learning_rate": 1.3980752405949258e-05, "loss": 1.5575, "step": 713 }, { "epoch": 0.3088485766038552, "grad_norm": 18.569576263427734, "learning_rate": 1.3972003499562557e-05, "loss": 1.6348, "step": 714 }, { "epoch": 0.30928113763551324, "grad_norm": 18.27239227294922, "learning_rate": 1.3963254593175855e-05, "loss": 1.6954, "step": 715 }, { "epoch": 0.3097136986671713, "grad_norm": 20.6396484375, "learning_rate": 1.3954505686789152e-05, "loss": 1.6909, "step": 716 }, { "epoch": 0.31014625969882936, "grad_norm": 19.61980628967285, "learning_rate": 1.3945756780402451e-05, "loss": 1.6499, "step": 717 }, { "epoch": 0.3105788207304874, "grad_norm": 19.68534278869629, "learning_rate": 1.3937007874015749e-05, "loss": 1.5033, "step": 718 }, { "epoch": 0.3110113817621455, "grad_norm": 22.719221115112305, "learning_rate": 1.3928258967629047e-05, "loss": 1.6344, "step": 719 }, { "epoch": 0.31144394279380355, "grad_norm": 18.73038673400879, "learning_rate": 1.3919510061242346e-05, "loss": 1.6237, "step": 720 }, { "epoch": 0.3118765038254616, "grad_norm": 17.38966941833496, "learning_rate": 1.3910761154855643e-05, "loss": 1.5169, "step": 721 }, { "epoch": 0.3123090648571197, "grad_norm": 23.203266143798828, "learning_rate": 1.3902012248468944e-05, "loss": 1.6486, "step": 722 }, { "epoch": 0.31274162588877774, "grad_norm": 20.895124435424805, "learning_rate": 1.3893263342082242e-05, "loss": 1.5868, "step": 723 }, { "epoch": 0.3131741869204358, "grad_norm": 18.580778121948242, "learning_rate": 1.388451443569554e-05, "loss": 1.5986, "step": 724 }, { "epoch": 0.31360674795209387, "grad_norm": 18.76534652709961, "learning_rate": 1.3875765529308838e-05, "loss": 1.5955, "step": 725 }, { "epoch": 0.31403930898375193, "grad_norm": 19.820236206054688, "learning_rate": 1.3867016622922136e-05, "loss": 1.6397, "step": 726 }, { "epoch": 0.31447187001541, "grad_norm": 18.899864196777344, "learning_rate": 1.3858267716535433e-05, "loss": 1.633, "step": 727 }, { "epoch": 0.31490443104706806, "grad_norm": 19.738550186157227, "learning_rate": 1.3849518810148733e-05, "loss": 1.6484, "step": 728 }, { "epoch": 0.3153369920787261, "grad_norm": 20.931293487548828, "learning_rate": 1.384076990376203e-05, "loss": 1.6189, "step": 729 }, { "epoch": 0.3157695531103842, "grad_norm": 18.17987060546875, "learning_rate": 1.3832020997375328e-05, "loss": 1.6628, "step": 730 }, { "epoch": 0.31620211414204225, "grad_norm": 19.147859573364258, "learning_rate": 1.3823272090988629e-05, "loss": 1.5953, "step": 731 }, { "epoch": 0.3166346751737003, "grad_norm": 18.06837272644043, "learning_rate": 1.3814523184601926e-05, "loss": 1.5991, "step": 732 }, { "epoch": 0.31706723620535837, "grad_norm": 17.540843963623047, "learning_rate": 1.3805774278215225e-05, "loss": 1.6227, "step": 733 }, { "epoch": 0.31749979723701643, "grad_norm": 20.706098556518555, "learning_rate": 1.3797025371828523e-05, "loss": 1.5718, "step": 734 }, { "epoch": 0.3179323582686745, "grad_norm": 17.51397132873535, "learning_rate": 1.378827646544182e-05, "loss": 1.6043, "step": 735 }, { "epoch": 0.3183649193003325, "grad_norm": 20.64704132080078, "learning_rate": 1.377952755905512e-05, "loss": 1.5771, "step": 736 }, { "epoch": 0.31879748033199057, "grad_norm": 20.126808166503906, "learning_rate": 1.3770778652668417e-05, "loss": 1.7199, "step": 737 }, { "epoch": 0.31923004136364863, "grad_norm": 17.626785278320312, "learning_rate": 1.3762029746281716e-05, "loss": 1.6152, "step": 738 }, { "epoch": 0.3196626023953067, "grad_norm": 19.35613250732422, "learning_rate": 1.3753280839895014e-05, "loss": 1.5903, "step": 739 }, { "epoch": 0.32009516342696476, "grad_norm": 19.347549438476562, "learning_rate": 1.3744531933508311e-05, "loss": 1.5552, "step": 740 }, { "epoch": 0.3205277244586228, "grad_norm": 22.455907821655273, "learning_rate": 1.3735783027121612e-05, "loss": 1.6141, "step": 741 }, { "epoch": 0.3209602854902809, "grad_norm": 18.277362823486328, "learning_rate": 1.372703412073491e-05, "loss": 1.6331, "step": 742 }, { "epoch": 0.32139284652193895, "grad_norm": 18.168556213378906, "learning_rate": 1.3718285214348209e-05, "loss": 1.7211, "step": 743 }, { "epoch": 0.321825407553597, "grad_norm": 18.769269943237305, "learning_rate": 1.3709536307961506e-05, "loss": 1.5252, "step": 744 }, { "epoch": 0.3222579685852551, "grad_norm": 22.99976348876953, "learning_rate": 1.3700787401574804e-05, "loss": 1.6573, "step": 745 }, { "epoch": 0.32269052961691314, "grad_norm": 20.37944793701172, "learning_rate": 1.3692038495188103e-05, "loss": 1.6173, "step": 746 }, { "epoch": 0.3231230906485712, "grad_norm": 22.109603881835938, "learning_rate": 1.36832895888014e-05, "loss": 1.6456, "step": 747 }, { "epoch": 0.32355565168022926, "grad_norm": 17.85908317565918, "learning_rate": 1.3674540682414698e-05, "loss": 1.5592, "step": 748 }, { "epoch": 0.3239882127118873, "grad_norm": 17.462928771972656, "learning_rate": 1.3665791776027997e-05, "loss": 1.5589, "step": 749 }, { "epoch": 0.3244207737435454, "grad_norm": 17.60029411315918, "learning_rate": 1.3657042869641295e-05, "loss": 1.6156, "step": 750 }, { "epoch": 0.32485333477520345, "grad_norm": 18.957937240600586, "learning_rate": 1.3648293963254596e-05, "loss": 1.7443, "step": 751 }, { "epoch": 0.3252858958068615, "grad_norm": 22.84882926940918, "learning_rate": 1.3639545056867893e-05, "loss": 1.55, "step": 752 }, { "epoch": 0.3257184568385196, "grad_norm": 20.238740921020508, "learning_rate": 1.363079615048119e-05, "loss": 1.5943, "step": 753 }, { "epoch": 0.32615101787017764, "grad_norm": 17.576885223388672, "learning_rate": 1.362204724409449e-05, "loss": 1.6363, "step": 754 }, { "epoch": 0.3265835789018357, "grad_norm": 20.89413070678711, "learning_rate": 1.3613298337707787e-05, "loss": 1.6913, "step": 755 }, { "epoch": 0.32701613993349377, "grad_norm": 18.79582977294922, "learning_rate": 1.3604549431321085e-05, "loss": 1.6068, "step": 756 }, { "epoch": 0.32744870096515183, "grad_norm": 16.650836944580078, "learning_rate": 1.3595800524934384e-05, "loss": 1.634, "step": 757 }, { "epoch": 0.32788126199680984, "grad_norm": 17.64925765991211, "learning_rate": 1.3587051618547682e-05, "loss": 1.6472, "step": 758 }, { "epoch": 0.3283138230284679, "grad_norm": 20.90283966064453, "learning_rate": 1.357830271216098e-05, "loss": 1.5816, "step": 759 }, { "epoch": 0.32874638406012596, "grad_norm": 22.616561889648438, "learning_rate": 1.356955380577428e-05, "loss": 1.5406, "step": 760 }, { "epoch": 0.329178945091784, "grad_norm": 18.393531799316406, "learning_rate": 1.356080489938758e-05, "loss": 1.5632, "step": 761 }, { "epoch": 0.3296115061234421, "grad_norm": 17.92359733581543, "learning_rate": 1.3552055993000877e-05, "loss": 1.6438, "step": 762 }, { "epoch": 0.33004406715510015, "grad_norm": 18.8277530670166, "learning_rate": 1.3543307086614174e-05, "loss": 1.607, "step": 763 }, { "epoch": 0.3304766281867582, "grad_norm": 19.19496726989746, "learning_rate": 1.3534558180227474e-05, "loss": 1.6057, "step": 764 }, { "epoch": 0.3309091892184163, "grad_norm": 19.613494873046875, "learning_rate": 1.3525809273840771e-05, "loss": 1.6079, "step": 765 }, { "epoch": 0.33134175025007434, "grad_norm": 19.068227767944336, "learning_rate": 1.3517060367454069e-05, "loss": 1.5423, "step": 766 }, { "epoch": 0.3317743112817324, "grad_norm": 20.14394760131836, "learning_rate": 1.3508311461067368e-05, "loss": 1.643, "step": 767 }, { "epoch": 0.33220687231339047, "grad_norm": 20.151247024536133, "learning_rate": 1.3499562554680665e-05, "loss": 1.6013, "step": 768 }, { "epoch": 0.33263943334504853, "grad_norm": 19.042261123657227, "learning_rate": 1.3490813648293963e-05, "loss": 1.5702, "step": 769 }, { "epoch": 0.3330719943767066, "grad_norm": 18.928590774536133, "learning_rate": 1.3482064741907264e-05, "loss": 1.5902, "step": 770 }, { "epoch": 0.33350455540836466, "grad_norm": 20.279457092285156, "learning_rate": 1.3473315835520561e-05, "loss": 1.6624, "step": 771 }, { "epoch": 0.3339371164400227, "grad_norm": 19.55834197998047, "learning_rate": 1.346456692913386e-05, "loss": 1.5899, "step": 772 }, { "epoch": 0.3343696774716808, "grad_norm": 19.354820251464844, "learning_rate": 1.3455818022747158e-05, "loss": 1.586, "step": 773 }, { "epoch": 0.33480223850333884, "grad_norm": 21.938444137573242, "learning_rate": 1.3447069116360455e-05, "loss": 1.6159, "step": 774 }, { "epoch": 0.3352347995349969, "grad_norm": 17.96108055114746, "learning_rate": 1.3438320209973755e-05, "loss": 1.6662, "step": 775 }, { "epoch": 0.33566736056665497, "grad_norm": 18.945526123046875, "learning_rate": 1.3429571303587052e-05, "loss": 1.5927, "step": 776 }, { "epoch": 0.33609992159831303, "grad_norm": 19.803407669067383, "learning_rate": 1.342082239720035e-05, "loss": 1.6594, "step": 777 }, { "epoch": 0.3365324826299711, "grad_norm": 18.89844512939453, "learning_rate": 1.3412073490813649e-05, "loss": 1.6048, "step": 778 }, { "epoch": 0.33696504366162916, "grad_norm": 20.112564086914062, "learning_rate": 1.3403324584426948e-05, "loss": 1.6187, "step": 779 }, { "epoch": 0.33739760469328717, "grad_norm": 17.415624618530273, "learning_rate": 1.3394575678040247e-05, "loss": 1.5409, "step": 780 }, { "epoch": 0.33783016572494523, "grad_norm": 16.357385635375977, "learning_rate": 1.3385826771653545e-05, "loss": 1.6128, "step": 781 }, { "epoch": 0.3382627267566033, "grad_norm": 18.058176040649414, "learning_rate": 1.3377077865266842e-05, "loss": 1.5935, "step": 782 }, { "epoch": 0.33869528778826136, "grad_norm": 19.19011688232422, "learning_rate": 1.3368328958880142e-05, "loss": 1.5617, "step": 783 }, { "epoch": 0.3391278488199194, "grad_norm": 19.17144203186035, "learning_rate": 1.3359580052493439e-05, "loss": 1.6607, "step": 784 }, { "epoch": 0.3395604098515775, "grad_norm": 18.61704444885254, "learning_rate": 1.3350831146106738e-05, "loss": 1.612, "step": 785 }, { "epoch": 0.33999297088323555, "grad_norm": 20.21958351135254, "learning_rate": 1.3342082239720036e-05, "loss": 1.5809, "step": 786 }, { "epoch": 0.3404255319148936, "grad_norm": 19.189098358154297, "learning_rate": 1.3333333333333333e-05, "loss": 1.5807, "step": 787 }, { "epoch": 0.34085809294655167, "grad_norm": 22.04606819152832, "learning_rate": 1.3324584426946633e-05, "loss": 1.6273, "step": 788 }, { "epoch": 0.34129065397820973, "grad_norm": 19.73443031311035, "learning_rate": 1.3315835520559932e-05, "loss": 1.6006, "step": 789 }, { "epoch": 0.3417232150098678, "grad_norm": 20.105873107910156, "learning_rate": 1.3307086614173231e-05, "loss": 1.573, "step": 790 }, { "epoch": 0.34215577604152586, "grad_norm": 20.053709030151367, "learning_rate": 1.3298337707786528e-05, "loss": 1.5847, "step": 791 }, { "epoch": 0.3425883370731839, "grad_norm": 17.113309860229492, "learning_rate": 1.3289588801399826e-05, "loss": 1.5184, "step": 792 }, { "epoch": 0.343020898104842, "grad_norm": 17.02407455444336, "learning_rate": 1.3280839895013125e-05, "loss": 1.5491, "step": 793 }, { "epoch": 0.34345345913650005, "grad_norm": 19.07799530029297, "learning_rate": 1.3272090988626423e-05, "loss": 1.6541, "step": 794 }, { "epoch": 0.3438860201681581, "grad_norm": 19.018844604492188, "learning_rate": 1.326334208223972e-05, "loss": 1.5478, "step": 795 }, { "epoch": 0.3443185811998162, "grad_norm": 18.546709060668945, "learning_rate": 1.325459317585302e-05, "loss": 1.6061, "step": 796 }, { "epoch": 0.34475114223147424, "grad_norm": 20.176528930664062, "learning_rate": 1.3245844269466317e-05, "loss": 1.7127, "step": 797 }, { "epoch": 0.3451837032631323, "grad_norm": 20.838176727294922, "learning_rate": 1.3237095363079614e-05, "loss": 1.6132, "step": 798 }, { "epoch": 0.34561626429479037, "grad_norm": 17.911251068115234, "learning_rate": 1.3228346456692915e-05, "loss": 1.659, "step": 799 }, { "epoch": 0.34604882532644843, "grad_norm": 20.308305740356445, "learning_rate": 1.3219597550306213e-05, "loss": 1.5575, "step": 800 }, { "epoch": 0.3464813863581065, "grad_norm": 18.12563133239746, "learning_rate": 1.3210848643919512e-05, "loss": 1.7155, "step": 801 }, { "epoch": 0.3469139473897645, "grad_norm": 18.247697830200195, "learning_rate": 1.320209973753281e-05, "loss": 1.6309, "step": 802 }, { "epoch": 0.34734650842142256, "grad_norm": 20.070714950561523, "learning_rate": 1.3193350831146107e-05, "loss": 1.5833, "step": 803 }, { "epoch": 0.3477790694530806, "grad_norm": 22.178564071655273, "learning_rate": 1.3184601924759406e-05, "loss": 1.6604, "step": 804 }, { "epoch": 0.3482116304847387, "grad_norm": 19.786285400390625, "learning_rate": 1.3175853018372704e-05, "loss": 1.5634, "step": 805 }, { "epoch": 0.34864419151639675, "grad_norm": 18.314327239990234, "learning_rate": 1.3167104111986003e-05, "loss": 1.657, "step": 806 }, { "epoch": 0.3490767525480548, "grad_norm": 20.814380645751953, "learning_rate": 1.31583552055993e-05, "loss": 1.6143, "step": 807 }, { "epoch": 0.3495093135797129, "grad_norm": 19.84009552001953, "learning_rate": 1.3149606299212601e-05, "loss": 1.6296, "step": 808 }, { "epoch": 0.34994187461137094, "grad_norm": 18.69206428527832, "learning_rate": 1.3140857392825899e-05, "loss": 1.6647, "step": 809 }, { "epoch": 0.350374435643029, "grad_norm": 19.76461410522461, "learning_rate": 1.3132108486439196e-05, "loss": 1.5763, "step": 810 }, { "epoch": 0.35080699667468707, "grad_norm": 19.293476104736328, "learning_rate": 1.3123359580052496e-05, "loss": 1.5856, "step": 811 }, { "epoch": 0.35123955770634513, "grad_norm": 20.5203914642334, "learning_rate": 1.3114610673665793e-05, "loss": 1.5266, "step": 812 }, { "epoch": 0.3516721187380032, "grad_norm": 20.248397827148438, "learning_rate": 1.310586176727909e-05, "loss": 1.6702, "step": 813 }, { "epoch": 0.35210467976966126, "grad_norm": 16.614885330200195, "learning_rate": 1.309711286089239e-05, "loss": 1.6621, "step": 814 }, { "epoch": 0.3525372408013193, "grad_norm": 16.071029663085938, "learning_rate": 1.3088363954505687e-05, "loss": 1.6654, "step": 815 }, { "epoch": 0.3529698018329774, "grad_norm": 18.72723960876465, "learning_rate": 1.3079615048118985e-05, "loss": 1.6519, "step": 816 }, { "epoch": 0.35340236286463544, "grad_norm": 18.771820068359375, "learning_rate": 1.3070866141732284e-05, "loss": 1.688, "step": 817 }, { "epoch": 0.3538349238962935, "grad_norm": 17.976329803466797, "learning_rate": 1.3062117235345583e-05, "loss": 1.6781, "step": 818 }, { "epoch": 0.35426748492795157, "grad_norm": 19.907297134399414, "learning_rate": 1.3053368328958883e-05, "loss": 1.623, "step": 819 }, { "epoch": 0.35470004595960963, "grad_norm": 19.37649154663086, "learning_rate": 1.304461942257218e-05, "loss": 1.6589, "step": 820 }, { "epoch": 0.3551326069912677, "grad_norm": 21.129186630249023, "learning_rate": 1.3035870516185478e-05, "loss": 1.5147, "step": 821 }, { "epoch": 0.35556516802292576, "grad_norm": 20.957626342773438, "learning_rate": 1.3027121609798777e-05, "loss": 1.6448, "step": 822 }, { "epoch": 0.3559977290545838, "grad_norm": 21.617694854736328, "learning_rate": 1.3018372703412074e-05, "loss": 1.5604, "step": 823 }, { "epoch": 0.35643029008624183, "grad_norm": 21.156492233276367, "learning_rate": 1.3009623797025372e-05, "loss": 1.6346, "step": 824 }, { "epoch": 0.3568628511178999, "grad_norm": 20.160831451416016, "learning_rate": 1.3000874890638671e-05, "loss": 1.5343, "step": 825 }, { "epoch": 0.35729541214955796, "grad_norm": 21.976581573486328, "learning_rate": 1.2992125984251968e-05, "loss": 1.615, "step": 826 }, { "epoch": 0.357727973181216, "grad_norm": 21.678958892822266, "learning_rate": 1.2983377077865266e-05, "loss": 1.5747, "step": 827 }, { "epoch": 0.3581605342128741, "grad_norm": 22.03537940979004, "learning_rate": 1.2974628171478567e-05, "loss": 1.5686, "step": 828 }, { "epoch": 0.35859309524453215, "grad_norm": 18.182485580444336, "learning_rate": 1.2965879265091864e-05, "loss": 1.6549, "step": 829 }, { "epoch": 0.3590256562761902, "grad_norm": 18.341358184814453, "learning_rate": 1.2957130358705164e-05, "loss": 1.7054, "step": 830 }, { "epoch": 0.35945821730784827, "grad_norm": 20.54411506652832, "learning_rate": 1.2948381452318461e-05, "loss": 1.6214, "step": 831 }, { "epoch": 0.35989077833950633, "grad_norm": 17.28512954711914, "learning_rate": 1.293963254593176e-05, "loss": 1.6161, "step": 832 }, { "epoch": 0.3603233393711644, "grad_norm": 20.21026039123535, "learning_rate": 1.2930883639545058e-05, "loss": 1.5949, "step": 833 }, { "epoch": 0.36075590040282246, "grad_norm": 20.83098030090332, "learning_rate": 1.2922134733158355e-05, "loss": 1.556, "step": 834 }, { "epoch": 0.3611884614344805, "grad_norm": 19.548084259033203, "learning_rate": 1.2913385826771655e-05, "loss": 1.6087, "step": 835 }, { "epoch": 0.3616210224661386, "grad_norm": 19.833934783935547, "learning_rate": 1.2904636920384952e-05, "loss": 1.6511, "step": 836 }, { "epoch": 0.36205358349779665, "grad_norm": 24.85418701171875, "learning_rate": 1.2895888013998253e-05, "loss": 1.6686, "step": 837 }, { "epoch": 0.3624861445294547, "grad_norm": 20.29999542236328, "learning_rate": 1.288713910761155e-05, "loss": 1.6503, "step": 838 }, { "epoch": 0.3629187055611128, "grad_norm": 18.538761138916016, "learning_rate": 1.2878390201224848e-05, "loss": 1.5934, "step": 839 }, { "epoch": 0.36335126659277084, "grad_norm": 20.594356536865234, "learning_rate": 1.2869641294838147e-05, "loss": 1.6694, "step": 840 }, { "epoch": 0.3637838276244289, "grad_norm": 18.274131774902344, "learning_rate": 1.2860892388451445e-05, "loss": 1.6111, "step": 841 }, { "epoch": 0.36421638865608696, "grad_norm": 19.532379150390625, "learning_rate": 1.2852143482064742e-05, "loss": 1.5865, "step": 842 }, { "epoch": 0.364648949687745, "grad_norm": 19.116188049316406, "learning_rate": 1.2843394575678041e-05, "loss": 1.5406, "step": 843 }, { "epoch": 0.3650815107194031, "grad_norm": 21.525840759277344, "learning_rate": 1.2834645669291339e-05, "loss": 1.5579, "step": 844 }, { "epoch": 0.36551407175106115, "grad_norm": 19.764007568359375, "learning_rate": 1.2825896762904636e-05, "loss": 1.5497, "step": 845 }, { "epoch": 0.36594663278271916, "grad_norm": 20.76372528076172, "learning_rate": 1.2817147856517936e-05, "loss": 1.5076, "step": 846 }, { "epoch": 0.3663791938143772, "grad_norm": 26.738079071044922, "learning_rate": 1.2808398950131235e-05, "loss": 1.5783, "step": 847 }, { "epoch": 0.3668117548460353, "grad_norm": 21.190170288085938, "learning_rate": 1.2799650043744534e-05, "loss": 1.4697, "step": 848 }, { "epoch": 0.36724431587769335, "grad_norm": 21.188650131225586, "learning_rate": 1.2790901137357832e-05, "loss": 1.6, "step": 849 }, { "epoch": 0.3676768769093514, "grad_norm": 20.70604705810547, "learning_rate": 1.2782152230971129e-05, "loss": 1.6808, "step": 850 }, { "epoch": 0.3681094379410095, "grad_norm": 20.689743041992188, "learning_rate": 1.2773403324584428e-05, "loss": 1.5621, "step": 851 }, { "epoch": 0.36854199897266754, "grad_norm": 20.194637298583984, "learning_rate": 1.2764654418197726e-05, "loss": 1.6808, "step": 852 }, { "epoch": 0.3689745600043256, "grad_norm": 19.224369049072266, "learning_rate": 1.2755905511811025e-05, "loss": 1.6184, "step": 853 }, { "epoch": 0.36940712103598367, "grad_norm": 19.203989028930664, "learning_rate": 1.2747156605424323e-05, "loss": 1.5306, "step": 854 }, { "epoch": 0.36983968206764173, "grad_norm": 20.381193161010742, "learning_rate": 1.273840769903762e-05, "loss": 1.6061, "step": 855 }, { "epoch": 0.3702722430992998, "grad_norm": 19.450544357299805, "learning_rate": 1.2729658792650921e-05, "loss": 1.6247, "step": 856 }, { "epoch": 0.37070480413095785, "grad_norm": 21.83024787902832, "learning_rate": 1.2720909886264218e-05, "loss": 1.5597, "step": 857 }, { "epoch": 0.3711373651626159, "grad_norm": 21.240888595581055, "learning_rate": 1.2712160979877518e-05, "loss": 1.604, "step": 858 }, { "epoch": 0.371569926194274, "grad_norm": 21.858312606811523, "learning_rate": 1.2703412073490815e-05, "loss": 1.5974, "step": 859 }, { "epoch": 0.37200248722593204, "grad_norm": 19.26487159729004, "learning_rate": 1.2694663167104113e-05, "loss": 1.7254, "step": 860 }, { "epoch": 0.3724350482575901, "grad_norm": 22.703330993652344, "learning_rate": 1.2685914260717412e-05, "loss": 1.6511, "step": 861 }, { "epoch": 0.37286760928924817, "grad_norm": 20.9957218170166, "learning_rate": 1.267716535433071e-05, "loss": 1.5367, "step": 862 }, { "epoch": 0.37330017032090623, "grad_norm": 19.078935623168945, "learning_rate": 1.2668416447944007e-05, "loss": 1.5111, "step": 863 }, { "epoch": 0.3737327313525643, "grad_norm": 20.05775260925293, "learning_rate": 1.2659667541557306e-05, "loss": 1.5996, "step": 864 }, { "epoch": 0.37416529238422236, "grad_norm": 18.394546508789062, "learning_rate": 1.2650918635170604e-05, "loss": 1.6372, "step": 865 }, { "epoch": 0.3745978534158804, "grad_norm": 19.035602569580078, "learning_rate": 1.2642169728783905e-05, "loss": 1.6267, "step": 866 }, { "epoch": 0.3750304144475385, "grad_norm": 19.66758918762207, "learning_rate": 1.2633420822397202e-05, "loss": 1.5181, "step": 867 }, { "epoch": 0.3754629754791965, "grad_norm": 19.294878005981445, "learning_rate": 1.26246719160105e-05, "loss": 1.6038, "step": 868 }, { "epoch": 0.37589553651085456, "grad_norm": 21.874311447143555, "learning_rate": 1.2615923009623799e-05, "loss": 1.5302, "step": 869 }, { "epoch": 0.3763280975425126, "grad_norm": 20.003787994384766, "learning_rate": 1.2607174103237096e-05, "loss": 1.5961, "step": 870 }, { "epoch": 0.3767606585741707, "grad_norm": 19.584362030029297, "learning_rate": 1.2598425196850394e-05, "loss": 1.6231, "step": 871 }, { "epoch": 0.37719321960582874, "grad_norm": 18.144039154052734, "learning_rate": 1.2589676290463693e-05, "loss": 1.4876, "step": 872 }, { "epoch": 0.3776257806374868, "grad_norm": 20.27103042602539, "learning_rate": 1.258092738407699e-05, "loss": 1.5783, "step": 873 }, { "epoch": 0.37805834166914487, "grad_norm": 24.131301879882812, "learning_rate": 1.2572178477690288e-05, "loss": 1.6346, "step": 874 }, { "epoch": 0.37849090270080293, "grad_norm": 25.75758934020996, "learning_rate": 1.2563429571303587e-05, "loss": 1.7327, "step": 875 }, { "epoch": 0.378923463732461, "grad_norm": 22.106565475463867, "learning_rate": 1.2554680664916886e-05, "loss": 1.618, "step": 876 }, { "epoch": 0.37935602476411906, "grad_norm": 19.74208641052246, "learning_rate": 1.2545931758530186e-05, "loss": 1.5391, "step": 877 }, { "epoch": 0.3797885857957771, "grad_norm": 22.241756439208984, "learning_rate": 1.2537182852143483e-05, "loss": 1.597, "step": 878 }, { "epoch": 0.3802211468274352, "grad_norm": 18.778261184692383, "learning_rate": 1.2528433945756782e-05, "loss": 1.5784, "step": 879 }, { "epoch": 0.38065370785909325, "grad_norm": 20.45719337463379, "learning_rate": 1.251968503937008e-05, "loss": 1.5788, "step": 880 }, { "epoch": 0.3810862688907513, "grad_norm": 23.036163330078125, "learning_rate": 1.2510936132983377e-05, "loss": 1.7009, "step": 881 }, { "epoch": 0.3815188299224094, "grad_norm": 20.001171112060547, "learning_rate": 1.2502187226596677e-05, "loss": 1.4824, "step": 882 }, { "epoch": 0.38195139095406744, "grad_norm": 19.137182235717773, "learning_rate": 1.2493438320209974e-05, "loss": 1.5866, "step": 883 }, { "epoch": 0.3823839519857255, "grad_norm": 21.097896575927734, "learning_rate": 1.2484689413823272e-05, "loss": 1.5977, "step": 884 }, { "epoch": 0.38281651301738356, "grad_norm": 20.685922622680664, "learning_rate": 1.2475940507436573e-05, "loss": 1.6055, "step": 885 }, { "epoch": 0.3832490740490416, "grad_norm": 19.775070190429688, "learning_rate": 1.246719160104987e-05, "loss": 1.6436, "step": 886 }, { "epoch": 0.3836816350806997, "grad_norm": 20.196359634399414, "learning_rate": 1.245844269466317e-05, "loss": 1.5265, "step": 887 }, { "epoch": 0.38411419611235775, "grad_norm": 19.682497024536133, "learning_rate": 1.2449693788276467e-05, "loss": 1.5624, "step": 888 }, { "epoch": 0.3845467571440158, "grad_norm": 20.493722915649414, "learning_rate": 1.2440944881889764e-05, "loss": 1.6223, "step": 889 }, { "epoch": 0.3849793181756738, "grad_norm": 19.67377281188965, "learning_rate": 1.2432195975503064e-05, "loss": 1.6164, "step": 890 }, { "epoch": 0.3854118792073319, "grad_norm": 18.345895767211914, "learning_rate": 1.2423447069116361e-05, "loss": 1.6155, "step": 891 }, { "epoch": 0.38584444023898995, "grad_norm": 18.30042266845703, "learning_rate": 1.2414698162729659e-05, "loss": 1.5257, "step": 892 }, { "epoch": 0.386277001270648, "grad_norm": 19.78774070739746, "learning_rate": 1.2405949256342958e-05, "loss": 1.716, "step": 893 }, { "epoch": 0.3867095623023061, "grad_norm": 21.33734130859375, "learning_rate": 1.2397200349956255e-05, "loss": 1.5729, "step": 894 }, { "epoch": 0.38714212333396414, "grad_norm": 18.871843338012695, "learning_rate": 1.2388451443569556e-05, "loss": 1.6478, "step": 895 }, { "epoch": 0.3875746843656222, "grad_norm": 20.553613662719727, "learning_rate": 1.2379702537182854e-05, "loss": 1.6663, "step": 896 }, { "epoch": 0.38800724539728026, "grad_norm": 19.660924911499023, "learning_rate": 1.2370953630796151e-05, "loss": 1.6294, "step": 897 }, { "epoch": 0.38843980642893833, "grad_norm": 17.97296714782715, "learning_rate": 1.236220472440945e-05, "loss": 1.5321, "step": 898 }, { "epoch": 0.3888723674605964, "grad_norm": 20.643564224243164, "learning_rate": 1.2353455818022748e-05, "loss": 1.5242, "step": 899 }, { "epoch": 0.38930492849225445, "grad_norm": 19.187049865722656, "learning_rate": 1.2344706911636047e-05, "loss": 1.5819, "step": 900 }, { "epoch": 0.3897374895239125, "grad_norm": 18.788549423217773, "learning_rate": 1.2335958005249345e-05, "loss": 1.6182, "step": 901 }, { "epoch": 0.3901700505555706, "grad_norm": 20.54323959350586, "learning_rate": 1.2327209098862642e-05, "loss": 1.6235, "step": 902 }, { "epoch": 0.39060261158722864, "grad_norm": 20.15620231628418, "learning_rate": 1.2318460192475941e-05, "loss": 1.5536, "step": 903 }, { "epoch": 0.3910351726188867, "grad_norm": 18.571413040161133, "learning_rate": 1.230971128608924e-05, "loss": 1.6502, "step": 904 }, { "epoch": 0.39146773365054477, "grad_norm": 19.422834396362305, "learning_rate": 1.230096237970254e-05, "loss": 1.5417, "step": 905 }, { "epoch": 0.39190029468220283, "grad_norm": 20.704635620117188, "learning_rate": 1.2292213473315837e-05, "loss": 1.6313, "step": 906 }, { "epoch": 0.3923328557138609, "grad_norm": 17.548553466796875, "learning_rate": 1.2283464566929135e-05, "loss": 1.6678, "step": 907 }, { "epoch": 0.39276541674551896, "grad_norm": 19.540618896484375, "learning_rate": 1.2274715660542434e-05, "loss": 1.5068, "step": 908 }, { "epoch": 0.393197977777177, "grad_norm": 18.6761531829834, "learning_rate": 1.2265966754155732e-05, "loss": 1.5907, "step": 909 }, { "epoch": 0.3936305388088351, "grad_norm": 19.963176727294922, "learning_rate": 1.2257217847769029e-05, "loss": 1.5861, "step": 910 }, { "epoch": 0.39406309984049315, "grad_norm": 20.782676696777344, "learning_rate": 1.2248468941382328e-05, "loss": 1.5452, "step": 911 }, { "epoch": 0.39449566087215115, "grad_norm": 20.450653076171875, "learning_rate": 1.2239720034995626e-05, "loss": 1.5404, "step": 912 }, { "epoch": 0.3949282219038092, "grad_norm": 18.47747230529785, "learning_rate": 1.2230971128608923e-05, "loss": 1.6799, "step": 913 }, { "epoch": 0.3953607829354673, "grad_norm": 21.109474182128906, "learning_rate": 1.2222222222222224e-05, "loss": 1.5815, "step": 914 }, { "epoch": 0.39579334396712534, "grad_norm": 20.80393409729004, "learning_rate": 1.2213473315835522e-05, "loss": 1.6559, "step": 915 }, { "epoch": 0.3962259049987834, "grad_norm": 21.14255714416504, "learning_rate": 1.2204724409448821e-05, "loss": 1.6067, "step": 916 }, { "epoch": 0.39665846603044147, "grad_norm": 18.84864616394043, "learning_rate": 1.2195975503062118e-05, "loss": 1.5728, "step": 917 }, { "epoch": 0.39709102706209953, "grad_norm": 21.547277450561523, "learning_rate": 1.2187226596675416e-05, "loss": 1.5919, "step": 918 }, { "epoch": 0.3975235880937576, "grad_norm": 20.30232048034668, "learning_rate": 1.2178477690288715e-05, "loss": 1.5967, "step": 919 }, { "epoch": 0.39795614912541566, "grad_norm": 19.81118392944336, "learning_rate": 1.2169728783902013e-05, "loss": 1.5498, "step": 920 }, { "epoch": 0.3983887101570737, "grad_norm": 20.070619583129883, "learning_rate": 1.216097987751531e-05, "loss": 1.5291, "step": 921 }, { "epoch": 0.3988212711887318, "grad_norm": 18.752845764160156, "learning_rate": 1.215223097112861e-05, "loss": 1.595, "step": 922 }, { "epoch": 0.39925383222038985, "grad_norm": 20.85921287536621, "learning_rate": 1.2143482064741907e-05, "loss": 1.4615, "step": 923 }, { "epoch": 0.3996863932520479, "grad_norm": 20.928272247314453, "learning_rate": 1.2134733158355208e-05, "loss": 1.5868, "step": 924 }, { "epoch": 0.400118954283706, "grad_norm": 21.330936431884766, "learning_rate": 1.2125984251968505e-05, "loss": 1.5825, "step": 925 }, { "epoch": 0.40055151531536404, "grad_norm": 19.58514404296875, "learning_rate": 1.2117235345581804e-05, "loss": 1.6101, "step": 926 }, { "epoch": 0.4009840763470221, "grad_norm": 19.794700622558594, "learning_rate": 1.2108486439195102e-05, "loss": 1.5484, "step": 927 }, { "epoch": 0.40141663737868016, "grad_norm": 20.019779205322266, "learning_rate": 1.20997375328084e-05, "loss": 1.6467, "step": 928 }, { "epoch": 0.4018491984103382, "grad_norm": 18.108291625976562, "learning_rate": 1.2090988626421699e-05, "loss": 1.568, "step": 929 }, { "epoch": 0.4022817594419963, "grad_norm": 20.311721801757812, "learning_rate": 1.2082239720034996e-05, "loss": 1.4256, "step": 930 }, { "epoch": 0.40271432047365435, "grad_norm": 20.440967559814453, "learning_rate": 1.2073490813648294e-05, "loss": 1.5777, "step": 931 }, { "epoch": 0.4031468815053124, "grad_norm": 20.755502700805664, "learning_rate": 1.2064741907261593e-05, "loss": 1.5757, "step": 932 }, { "epoch": 0.4035794425369704, "grad_norm": 21.64055633544922, "learning_rate": 1.2055993000874892e-05, "loss": 1.6168, "step": 933 }, { "epoch": 0.4040120035686285, "grad_norm": 19.491615295410156, "learning_rate": 1.2047244094488191e-05, "loss": 1.5655, "step": 934 }, { "epoch": 0.40444456460028655, "grad_norm": 19.454198837280273, "learning_rate": 1.2038495188101489e-05, "loss": 1.5896, "step": 935 }, { "epoch": 0.4048771256319446, "grad_norm": 20.64267349243164, "learning_rate": 1.2029746281714786e-05, "loss": 1.5161, "step": 936 }, { "epoch": 0.4053096866636027, "grad_norm": 23.738571166992188, "learning_rate": 1.2020997375328086e-05, "loss": 1.7056, "step": 937 }, { "epoch": 0.40574224769526074, "grad_norm": 22.766353607177734, "learning_rate": 1.2012248468941383e-05, "loss": 1.5408, "step": 938 }, { "epoch": 0.4061748087269188, "grad_norm": 21.712682723999023, "learning_rate": 1.200349956255468e-05, "loss": 1.5679, "step": 939 }, { "epoch": 0.40660736975857686, "grad_norm": 20.704023361206055, "learning_rate": 1.199475065616798e-05, "loss": 1.5291, "step": 940 }, { "epoch": 0.4070399307902349, "grad_norm": 21.490806579589844, "learning_rate": 1.1986001749781277e-05, "loss": 1.5927, "step": 941 }, { "epoch": 0.407472491821893, "grad_norm": 18.206605911254883, "learning_rate": 1.1977252843394575e-05, "loss": 1.5576, "step": 942 }, { "epoch": 0.40790505285355105, "grad_norm": 19.448074340820312, "learning_rate": 1.1968503937007876e-05, "loss": 1.5126, "step": 943 }, { "epoch": 0.4083376138852091, "grad_norm": 19.33980369567871, "learning_rate": 1.1959755030621173e-05, "loss": 1.5412, "step": 944 }, { "epoch": 0.4087701749168672, "grad_norm": 20.316843032836914, "learning_rate": 1.1951006124234472e-05, "loss": 1.697, "step": 945 }, { "epoch": 0.40920273594852524, "grad_norm": 19.3831787109375, "learning_rate": 1.194225721784777e-05, "loss": 1.5501, "step": 946 }, { "epoch": 0.4096352969801833, "grad_norm": 23.11265754699707, "learning_rate": 1.193350831146107e-05, "loss": 1.6124, "step": 947 }, { "epoch": 0.41006785801184137, "grad_norm": 23.18746566772461, "learning_rate": 1.1924759405074367e-05, "loss": 1.61, "step": 948 }, { "epoch": 0.41050041904349943, "grad_norm": 20.464345932006836, "learning_rate": 1.1916010498687664e-05, "loss": 1.6349, "step": 949 }, { "epoch": 0.4109329800751575, "grad_norm": 18.070871353149414, "learning_rate": 1.1907261592300963e-05, "loss": 1.6273, "step": 950 }, { "epoch": 0.41136554110681556, "grad_norm": 19.049097061157227, "learning_rate": 1.1898512685914261e-05, "loss": 1.4902, "step": 951 }, { "epoch": 0.4117981021384736, "grad_norm": 20.535675048828125, "learning_rate": 1.1889763779527562e-05, "loss": 1.5897, "step": 952 }, { "epoch": 0.4122306631701317, "grad_norm": 19.585886001586914, "learning_rate": 1.188101487314086e-05, "loss": 1.6457, "step": 953 }, { "epoch": 0.41266322420178975, "grad_norm": 19.01354217529297, "learning_rate": 1.1872265966754157e-05, "loss": 1.6315, "step": 954 }, { "epoch": 0.41309578523344775, "grad_norm": 21.235271453857422, "learning_rate": 1.1863517060367456e-05, "loss": 1.4923, "step": 955 }, { "epoch": 0.4135283462651058, "grad_norm": 19.011507034301758, "learning_rate": 1.1854768153980754e-05, "loss": 1.5508, "step": 956 }, { "epoch": 0.4139609072967639, "grad_norm": 20.91914176940918, "learning_rate": 1.1846019247594051e-05, "loss": 1.5762, "step": 957 }, { "epoch": 0.41439346832842194, "grad_norm": 23.34760284423828, "learning_rate": 1.183727034120735e-05, "loss": 1.5603, "step": 958 }, { "epoch": 0.41482602936008, "grad_norm": 23.663442611694336, "learning_rate": 1.1828521434820648e-05, "loss": 1.607, "step": 959 }, { "epoch": 0.41525859039173807, "grad_norm": 18.84950065612793, "learning_rate": 1.1819772528433945e-05, "loss": 1.5646, "step": 960 }, { "epoch": 0.41569115142339613, "grad_norm": 20.305644989013672, "learning_rate": 1.1811023622047245e-05, "loss": 1.5428, "step": 961 }, { "epoch": 0.4161237124550542, "grad_norm": 19.10517692565918, "learning_rate": 1.1802274715660544e-05, "loss": 1.5305, "step": 962 }, { "epoch": 0.41655627348671226, "grad_norm": 22.100454330444336, "learning_rate": 1.1793525809273843e-05, "loss": 1.5978, "step": 963 }, { "epoch": 0.4169888345183703, "grad_norm": 19.938411712646484, "learning_rate": 1.178477690288714e-05, "loss": 1.5133, "step": 964 }, { "epoch": 0.4174213955500284, "grad_norm": 20.516897201538086, "learning_rate": 1.1776027996500438e-05, "loss": 1.6113, "step": 965 }, { "epoch": 0.41785395658168645, "grad_norm": 19.226377487182617, "learning_rate": 1.1767279090113737e-05, "loss": 1.5843, "step": 966 }, { "epoch": 0.4182865176133445, "grad_norm": 25.08070182800293, "learning_rate": 1.1758530183727035e-05, "loss": 1.6075, "step": 967 }, { "epoch": 0.4187190786450026, "grad_norm": 20.30937385559082, "learning_rate": 1.1749781277340332e-05, "loss": 1.5732, "step": 968 }, { "epoch": 0.41915163967666064, "grad_norm": 20.013835906982422, "learning_rate": 1.1741032370953631e-05, "loss": 1.6688, "step": 969 }, { "epoch": 0.4195842007083187, "grad_norm": 21.13936424255371, "learning_rate": 1.1732283464566929e-05, "loss": 1.5732, "step": 970 }, { "epoch": 0.42001676173997676, "grad_norm": 20.149782180786133, "learning_rate": 1.1723534558180228e-05, "loss": 1.5247, "step": 971 }, { "epoch": 0.4204493227716348, "grad_norm": 22.26946449279785, "learning_rate": 1.1714785651793527e-05, "loss": 1.5106, "step": 972 }, { "epoch": 0.4208818838032929, "grad_norm": 20.233417510986328, "learning_rate": 1.1706036745406827e-05, "loss": 1.5702, "step": 973 }, { "epoch": 0.42131444483495095, "grad_norm": 20.490550994873047, "learning_rate": 1.1697287839020124e-05, "loss": 1.6082, "step": 974 }, { "epoch": 0.421747005866609, "grad_norm": 19.094743728637695, "learning_rate": 1.1688538932633422e-05, "loss": 1.5653, "step": 975 }, { "epoch": 0.4221795668982671, "grad_norm": 18.951610565185547, "learning_rate": 1.167979002624672e-05, "loss": 1.5294, "step": 976 }, { "epoch": 0.4226121279299251, "grad_norm": 19.141788482666016, "learning_rate": 1.1671041119860018e-05, "loss": 1.5549, "step": 977 }, { "epoch": 0.42304468896158315, "grad_norm": 22.37894630432129, "learning_rate": 1.1662292213473316e-05, "loss": 1.5739, "step": 978 }, { "epoch": 0.4234772499932412, "grad_norm": 21.99265480041504, "learning_rate": 1.1653543307086615e-05, "loss": 1.5732, "step": 979 }, { "epoch": 0.4239098110248993, "grad_norm": 21.509435653686523, "learning_rate": 1.1644794400699913e-05, "loss": 1.6226, "step": 980 }, { "epoch": 0.42434237205655734, "grad_norm": 21.833322525024414, "learning_rate": 1.1636045494313213e-05, "loss": 1.561, "step": 981 }, { "epoch": 0.4247749330882154, "grad_norm": 20.158653259277344, "learning_rate": 1.1627296587926511e-05, "loss": 1.5053, "step": 982 }, { "epoch": 0.42520749411987346, "grad_norm": 23.664594650268555, "learning_rate": 1.1618547681539808e-05, "loss": 1.6409, "step": 983 }, { "epoch": 0.4256400551515315, "grad_norm": 20.181825637817383, "learning_rate": 1.1609798775153108e-05, "loss": 1.615, "step": 984 }, { "epoch": 0.4260726161831896, "grad_norm": 23.865310668945312, "learning_rate": 1.1601049868766405e-05, "loss": 1.6681, "step": 985 }, { "epoch": 0.42650517721484765, "grad_norm": 22.003345489501953, "learning_rate": 1.1592300962379703e-05, "loss": 1.594, "step": 986 }, { "epoch": 0.4269377382465057, "grad_norm": 21.85173797607422, "learning_rate": 1.1583552055993002e-05, "loss": 1.5251, "step": 987 }, { "epoch": 0.4273702992781638, "grad_norm": 22.519914627075195, "learning_rate": 1.15748031496063e-05, "loss": 1.6031, "step": 988 }, { "epoch": 0.42780286030982184, "grad_norm": 18.701091766357422, "learning_rate": 1.1566054243219597e-05, "loss": 1.5812, "step": 989 }, { "epoch": 0.4282354213414799, "grad_norm": 18.49114418029785, "learning_rate": 1.1557305336832896e-05, "loss": 1.573, "step": 990 }, { "epoch": 0.42866798237313797, "grad_norm": 21.027111053466797, "learning_rate": 1.1548556430446195e-05, "loss": 1.5618, "step": 991 }, { "epoch": 0.42910054340479603, "grad_norm": 22.11174964904785, "learning_rate": 1.1539807524059495e-05, "loss": 1.5999, "step": 992 }, { "epoch": 0.4295331044364541, "grad_norm": 23.42853355407715, "learning_rate": 1.1531058617672792e-05, "loss": 1.5065, "step": 993 }, { "epoch": 0.42996566546811216, "grad_norm": 20.36092185974121, "learning_rate": 1.1522309711286091e-05, "loss": 1.5454, "step": 994 }, { "epoch": 0.4303982264997702, "grad_norm": 20.617124557495117, "learning_rate": 1.1513560804899389e-05, "loss": 1.6106, "step": 995 }, { "epoch": 0.4308307875314283, "grad_norm": 21.008230209350586, "learning_rate": 1.1504811898512686e-05, "loss": 1.6075, "step": 996 }, { "epoch": 0.43126334856308635, "grad_norm": 23.843276977539062, "learning_rate": 1.1496062992125985e-05, "loss": 1.5715, "step": 997 }, { "epoch": 0.4316959095947444, "grad_norm": 23.46642303466797, "learning_rate": 1.1487314085739283e-05, "loss": 1.5887, "step": 998 }, { "epoch": 0.4321284706264024, "grad_norm": 32.012420654296875, "learning_rate": 1.147856517935258e-05, "loss": 1.5999, "step": 999 }, { "epoch": 0.4325610316580605, "grad_norm": 22.296770095825195, "learning_rate": 1.1469816272965881e-05, "loss": 1.5889, "step": 1000 }, { "epoch": 0.43299359268971854, "grad_norm": 23.486230850219727, "learning_rate": 1.1461067366579179e-05, "loss": 1.7299, "step": 1001 }, { "epoch": 0.4334261537213766, "grad_norm": 21.794849395751953, "learning_rate": 1.1452318460192478e-05, "loss": 1.6118, "step": 1002 }, { "epoch": 0.43385871475303467, "grad_norm": 20.80159568786621, "learning_rate": 1.1443569553805776e-05, "loss": 1.5558, "step": 1003 }, { "epoch": 0.43429127578469273, "grad_norm": 21.411245346069336, "learning_rate": 1.1434820647419073e-05, "loss": 1.5028, "step": 1004 }, { "epoch": 0.4347238368163508, "grad_norm": 20.596715927124023, "learning_rate": 1.1426071741032372e-05, "loss": 1.5536, "step": 1005 }, { "epoch": 0.43515639784800886, "grad_norm": 22.81339454650879, "learning_rate": 1.141732283464567e-05, "loss": 1.5613, "step": 1006 }, { "epoch": 0.4355889588796669, "grad_norm": 20.697265625, "learning_rate": 1.1408573928258967e-05, "loss": 1.5837, "step": 1007 }, { "epoch": 0.436021519911325, "grad_norm": 18.886920928955078, "learning_rate": 1.1399825021872267e-05, "loss": 1.542, "step": 1008 }, { "epoch": 0.43645408094298305, "grad_norm": 22.665058135986328, "learning_rate": 1.1391076115485564e-05, "loss": 1.6007, "step": 1009 }, { "epoch": 0.4368866419746411, "grad_norm": 20.35870361328125, "learning_rate": 1.1382327209098865e-05, "loss": 1.5594, "step": 1010 }, { "epoch": 0.4373192030062992, "grad_norm": 19.82785987854004, "learning_rate": 1.1373578302712163e-05, "loss": 1.6128, "step": 1011 }, { "epoch": 0.43775176403795724, "grad_norm": 21.4642276763916, "learning_rate": 1.136482939632546e-05, "loss": 1.647, "step": 1012 }, { "epoch": 0.4381843250696153, "grad_norm": 24.7077693939209, "learning_rate": 1.135608048993876e-05, "loss": 1.5577, "step": 1013 }, { "epoch": 0.43861688610127336, "grad_norm": 21.480487823486328, "learning_rate": 1.1347331583552057e-05, "loss": 1.5856, "step": 1014 }, { "epoch": 0.4390494471329314, "grad_norm": 24.280683517456055, "learning_rate": 1.1338582677165354e-05, "loss": 1.5539, "step": 1015 }, { "epoch": 0.4394820081645895, "grad_norm": 22.40320587158203, "learning_rate": 1.1329833770778653e-05, "loss": 1.5617, "step": 1016 }, { "epoch": 0.43991456919624755, "grad_norm": 24.9234619140625, "learning_rate": 1.1321084864391951e-05, "loss": 1.5467, "step": 1017 }, { "epoch": 0.4403471302279056, "grad_norm": 20.776636123657227, "learning_rate": 1.131233595800525e-05, "loss": 1.56, "step": 1018 }, { "epoch": 0.4407796912595637, "grad_norm": 20.39991569519043, "learning_rate": 1.1303587051618548e-05, "loss": 1.557, "step": 1019 }, { "epoch": 0.44121225229122174, "grad_norm": 19.826358795166016, "learning_rate": 1.1294838145231849e-05, "loss": 1.6674, "step": 1020 }, { "epoch": 0.44164481332287975, "grad_norm": 19.581579208374023, "learning_rate": 1.1286089238845146e-05, "loss": 1.5441, "step": 1021 }, { "epoch": 0.4420773743545378, "grad_norm": 22.186416625976562, "learning_rate": 1.1277340332458444e-05, "loss": 1.4218, "step": 1022 }, { "epoch": 0.4425099353861959, "grad_norm": 24.520288467407227, "learning_rate": 1.1268591426071743e-05, "loss": 1.6456, "step": 1023 }, { "epoch": 0.44294249641785394, "grad_norm": 18.8167781829834, "learning_rate": 1.125984251968504e-05, "loss": 1.5243, "step": 1024 }, { "epoch": 0.443375057449512, "grad_norm": 20.95041275024414, "learning_rate": 1.1251093613298338e-05, "loss": 1.6067, "step": 1025 }, { "epoch": 0.44380761848117006, "grad_norm": 22.097684860229492, "learning_rate": 1.1242344706911637e-05, "loss": 1.4956, "step": 1026 }, { "epoch": 0.4442401795128281, "grad_norm": 21.358125686645508, "learning_rate": 1.1233595800524935e-05, "loss": 1.623, "step": 1027 }, { "epoch": 0.4446727405444862, "grad_norm": 19.22201919555664, "learning_rate": 1.1224846894138232e-05, "loss": 1.5917, "step": 1028 }, { "epoch": 0.44510530157614425, "grad_norm": 20.375303268432617, "learning_rate": 1.1216097987751533e-05, "loss": 1.658, "step": 1029 }, { "epoch": 0.4455378626078023, "grad_norm": 22.127113342285156, "learning_rate": 1.120734908136483e-05, "loss": 1.5364, "step": 1030 }, { "epoch": 0.4459704236394604, "grad_norm": 23.640071868896484, "learning_rate": 1.119860017497813e-05, "loss": 1.4492, "step": 1031 }, { "epoch": 0.44640298467111844, "grad_norm": 20.21621322631836, "learning_rate": 1.1189851268591427e-05, "loss": 1.5371, "step": 1032 }, { "epoch": 0.4468355457027765, "grad_norm": 18.869565963745117, "learning_rate": 1.1181102362204725e-05, "loss": 1.5825, "step": 1033 }, { "epoch": 0.44726810673443457, "grad_norm": 19.211931228637695, "learning_rate": 1.1172353455818024e-05, "loss": 1.5023, "step": 1034 }, { "epoch": 0.44770066776609263, "grad_norm": 20.949472427368164, "learning_rate": 1.1163604549431321e-05, "loss": 1.662, "step": 1035 }, { "epoch": 0.4481332287977507, "grad_norm": 19.43843650817871, "learning_rate": 1.1154855643044619e-05, "loss": 1.6401, "step": 1036 }, { "epoch": 0.44856578982940876, "grad_norm": 21.74172592163086, "learning_rate": 1.1146106736657918e-05, "loss": 1.6335, "step": 1037 }, { "epoch": 0.4489983508610668, "grad_norm": 23.03577423095703, "learning_rate": 1.1137357830271216e-05, "loss": 1.5985, "step": 1038 }, { "epoch": 0.4494309118927249, "grad_norm": 20.48150634765625, "learning_rate": 1.1128608923884517e-05, "loss": 1.6556, "step": 1039 }, { "epoch": 0.44986347292438295, "grad_norm": 21.354633331298828, "learning_rate": 1.1119860017497814e-05, "loss": 1.5034, "step": 1040 }, { "epoch": 0.450296033956041, "grad_norm": 22.22134017944336, "learning_rate": 1.1111111111111113e-05, "loss": 1.5633, "step": 1041 }, { "epoch": 0.45072859498769907, "grad_norm": 20.304677963256836, "learning_rate": 1.1102362204724411e-05, "loss": 1.5714, "step": 1042 }, { "epoch": 0.4511611560193571, "grad_norm": 19.70705795288086, "learning_rate": 1.1093613298337708e-05, "loss": 1.6236, "step": 1043 }, { "epoch": 0.45159371705101514, "grad_norm": 20.76530647277832, "learning_rate": 1.1084864391951008e-05, "loss": 1.5905, "step": 1044 }, { "epoch": 0.4520262780826732, "grad_norm": 19.45387840270996, "learning_rate": 1.1076115485564305e-05, "loss": 1.5035, "step": 1045 }, { "epoch": 0.45245883911433127, "grad_norm": 19.863208770751953, "learning_rate": 1.1067366579177603e-05, "loss": 1.5407, "step": 1046 }, { "epoch": 0.45289140014598933, "grad_norm": 21.128826141357422, "learning_rate": 1.1058617672790902e-05, "loss": 1.5474, "step": 1047 }, { "epoch": 0.4533239611776474, "grad_norm": 21.49403953552246, "learning_rate": 1.1049868766404201e-05, "loss": 1.6163, "step": 1048 }, { "epoch": 0.45375652220930546, "grad_norm": 23.149555206298828, "learning_rate": 1.10411198600175e-05, "loss": 1.5622, "step": 1049 }, { "epoch": 0.4541890832409635, "grad_norm": 21.647279739379883, "learning_rate": 1.1032370953630798e-05, "loss": 1.6026, "step": 1050 }, { "epoch": 0.4546216442726216, "grad_norm": 21.786270141601562, "learning_rate": 1.1023622047244095e-05, "loss": 1.5098, "step": 1051 }, { "epoch": 0.45505420530427965, "grad_norm": 23.140596389770508, "learning_rate": 1.1014873140857394e-05, "loss": 1.5424, "step": 1052 }, { "epoch": 0.4554867663359377, "grad_norm": 20.646238327026367, "learning_rate": 1.1006124234470692e-05, "loss": 1.5512, "step": 1053 }, { "epoch": 0.4559193273675958, "grad_norm": 21.41969871520996, "learning_rate": 1.099737532808399e-05, "loss": 1.5698, "step": 1054 }, { "epoch": 0.45635188839925384, "grad_norm": 20.478525161743164, "learning_rate": 1.0988626421697289e-05, "loss": 1.5936, "step": 1055 }, { "epoch": 0.4567844494309119, "grad_norm": 21.19014549255371, "learning_rate": 1.0979877515310586e-05, "loss": 1.5705, "step": 1056 }, { "epoch": 0.45721701046256996, "grad_norm": 21.32801055908203, "learning_rate": 1.0971128608923884e-05, "loss": 1.5308, "step": 1057 }, { "epoch": 0.457649571494228, "grad_norm": 21.304298400878906, "learning_rate": 1.0962379702537185e-05, "loss": 1.5296, "step": 1058 }, { "epoch": 0.4580821325258861, "grad_norm": 19.55242156982422, "learning_rate": 1.0953630796150482e-05, "loss": 1.6441, "step": 1059 }, { "epoch": 0.45851469355754415, "grad_norm": 19.598514556884766, "learning_rate": 1.0944881889763781e-05, "loss": 1.665, "step": 1060 }, { "epoch": 0.4589472545892022, "grad_norm": 21.648754119873047, "learning_rate": 1.0936132983377079e-05, "loss": 1.538, "step": 1061 }, { "epoch": 0.4593798156208603, "grad_norm": 20.485759735107422, "learning_rate": 1.0927384076990376e-05, "loss": 1.5405, "step": 1062 }, { "epoch": 0.45981237665251834, "grad_norm": 21.35075569152832, "learning_rate": 1.0918635170603676e-05, "loss": 1.5241, "step": 1063 }, { "epoch": 0.4602449376841764, "grad_norm": 21.848602294921875, "learning_rate": 1.0909886264216973e-05, "loss": 1.5681, "step": 1064 }, { "epoch": 0.4606774987158344, "grad_norm": 23.42026710510254, "learning_rate": 1.0901137357830272e-05, "loss": 1.5778, "step": 1065 }, { "epoch": 0.4611100597474925, "grad_norm": 22.494203567504883, "learning_rate": 1.089238845144357e-05, "loss": 1.6443, "step": 1066 }, { "epoch": 0.46154262077915054, "grad_norm": 20.862600326538086, "learning_rate": 1.0883639545056867e-05, "loss": 1.5278, "step": 1067 }, { "epoch": 0.4619751818108086, "grad_norm": 20.454578399658203, "learning_rate": 1.0874890638670168e-05, "loss": 1.6292, "step": 1068 }, { "epoch": 0.46240774284246666, "grad_norm": 20.307090759277344, "learning_rate": 1.0866141732283466e-05, "loss": 1.665, "step": 1069 }, { "epoch": 0.4628403038741247, "grad_norm": 20.755657196044922, "learning_rate": 1.0857392825896765e-05, "loss": 1.6456, "step": 1070 }, { "epoch": 0.4632728649057828, "grad_norm": 22.096176147460938, "learning_rate": 1.0848643919510062e-05, "loss": 1.5195, "step": 1071 }, { "epoch": 0.46370542593744085, "grad_norm": 23.748300552368164, "learning_rate": 1.083989501312336e-05, "loss": 1.5168, "step": 1072 }, { "epoch": 0.4641379869690989, "grad_norm": 20.105592727661133, "learning_rate": 1.083114610673666e-05, "loss": 1.5407, "step": 1073 }, { "epoch": 0.464570548000757, "grad_norm": 19.950580596923828, "learning_rate": 1.0822397200349957e-05, "loss": 1.588, "step": 1074 }, { "epoch": 0.46500310903241504, "grad_norm": 20.73526954650879, "learning_rate": 1.0813648293963254e-05, "loss": 1.6316, "step": 1075 }, { "epoch": 0.4654356700640731, "grad_norm": 21.673297882080078, "learning_rate": 1.0804899387576553e-05, "loss": 1.5985, "step": 1076 }, { "epoch": 0.46586823109573117, "grad_norm": 22.8228759765625, "learning_rate": 1.0796150481189853e-05, "loss": 1.5836, "step": 1077 }, { "epoch": 0.46630079212738923, "grad_norm": 22.153406143188477, "learning_rate": 1.0787401574803152e-05, "loss": 1.6233, "step": 1078 }, { "epoch": 0.4667333531590473, "grad_norm": 21.604236602783203, "learning_rate": 1.077865266841645e-05, "loss": 1.5601, "step": 1079 }, { "epoch": 0.46716591419070536, "grad_norm": 21.942169189453125, "learning_rate": 1.0769903762029747e-05, "loss": 1.5527, "step": 1080 }, { "epoch": 0.4675984752223634, "grad_norm": 21.710886001586914, "learning_rate": 1.0761154855643046e-05, "loss": 1.5555, "step": 1081 }, { "epoch": 0.4680310362540215, "grad_norm": 21.006336212158203, "learning_rate": 1.0752405949256344e-05, "loss": 1.6011, "step": 1082 }, { "epoch": 0.46846359728567954, "grad_norm": 21.56496810913086, "learning_rate": 1.0743657042869641e-05, "loss": 1.4629, "step": 1083 }, { "epoch": 0.4688961583173376, "grad_norm": 22.86446189880371, "learning_rate": 1.073490813648294e-05, "loss": 1.521, "step": 1084 }, { "epoch": 0.46932871934899567, "grad_norm": 23.123199462890625, "learning_rate": 1.0726159230096238e-05, "loss": 1.5685, "step": 1085 }, { "epoch": 0.46976128038065373, "grad_norm": 22.753873825073242, "learning_rate": 1.0717410323709537e-05, "loss": 1.668, "step": 1086 }, { "epoch": 0.47019384141231174, "grad_norm": 22.325475692749023, "learning_rate": 1.0708661417322836e-05, "loss": 1.4948, "step": 1087 }, { "epoch": 0.4706264024439698, "grad_norm": 21.678844451904297, "learning_rate": 1.0699912510936135e-05, "loss": 1.5436, "step": 1088 }, { "epoch": 0.47105896347562787, "grad_norm": 21.273897171020508, "learning_rate": 1.0691163604549433e-05, "loss": 1.6103, "step": 1089 }, { "epoch": 0.47149152450728593, "grad_norm": 23.457490921020508, "learning_rate": 1.068241469816273e-05, "loss": 1.5893, "step": 1090 }, { "epoch": 0.471924085538944, "grad_norm": 21.083839416503906, "learning_rate": 1.067366579177603e-05, "loss": 1.5555, "step": 1091 }, { "epoch": 0.47235664657060206, "grad_norm": 25.2357120513916, "learning_rate": 1.0664916885389327e-05, "loss": 1.522, "step": 1092 }, { "epoch": 0.4727892076022601, "grad_norm": 23.40666961669922, "learning_rate": 1.0656167979002625e-05, "loss": 1.4986, "step": 1093 }, { "epoch": 0.4732217686339182, "grad_norm": 20.224884033203125, "learning_rate": 1.0647419072615924e-05, "loss": 1.6008, "step": 1094 }, { "epoch": 0.47365432966557625, "grad_norm": 19.231700897216797, "learning_rate": 1.0638670166229221e-05, "loss": 1.5874, "step": 1095 }, { "epoch": 0.4740868906972343, "grad_norm": 21.34067726135254, "learning_rate": 1.0629921259842522e-05, "loss": 1.5708, "step": 1096 }, { "epoch": 0.47451945172889237, "grad_norm": 20.3139705657959, "learning_rate": 1.062117235345582e-05, "loss": 1.5125, "step": 1097 }, { "epoch": 0.47495201276055043, "grad_norm": 25.667116165161133, "learning_rate": 1.0612423447069117e-05, "loss": 1.5303, "step": 1098 }, { "epoch": 0.4753845737922085, "grad_norm": 22.950712203979492, "learning_rate": 1.0603674540682417e-05, "loss": 1.6314, "step": 1099 }, { "epoch": 0.47581713482386656, "grad_norm": 20.83335304260254, "learning_rate": 1.0594925634295714e-05, "loss": 1.5334, "step": 1100 }, { "epoch": 0.4762496958555246, "grad_norm": 19.24706268310547, "learning_rate": 1.0586176727909012e-05, "loss": 1.6256, "step": 1101 }, { "epoch": 0.4766822568871827, "grad_norm": 20.814455032348633, "learning_rate": 1.057742782152231e-05, "loss": 1.5851, "step": 1102 }, { "epoch": 0.47711481791884075, "grad_norm": 21.661409378051758, "learning_rate": 1.0568678915135608e-05, "loss": 1.5247, "step": 1103 }, { "epoch": 0.4775473789504988, "grad_norm": 20.899145126342773, "learning_rate": 1.0559930008748906e-05, "loss": 1.5708, "step": 1104 }, { "epoch": 0.4779799399821569, "grad_norm": 24.386003494262695, "learning_rate": 1.0551181102362205e-05, "loss": 1.5527, "step": 1105 }, { "epoch": 0.47841250101381494, "grad_norm": 25.34111785888672, "learning_rate": 1.0542432195975504e-05, "loss": 1.5851, "step": 1106 }, { "epoch": 0.478845062045473, "grad_norm": 24.537324905395508, "learning_rate": 1.0533683289588803e-05, "loss": 1.596, "step": 1107 }, { "epoch": 0.47927762307713107, "grad_norm": 21.804595947265625, "learning_rate": 1.0524934383202101e-05, "loss": 1.5322, "step": 1108 }, { "epoch": 0.4797101841087891, "grad_norm": 20.191699981689453, "learning_rate": 1.0516185476815398e-05, "loss": 1.5758, "step": 1109 }, { "epoch": 0.48014274514044714, "grad_norm": 21.43497085571289, "learning_rate": 1.0507436570428698e-05, "loss": 1.508, "step": 1110 }, { "epoch": 0.4805753061721052, "grad_norm": 20.207677841186523, "learning_rate": 1.0498687664041995e-05, "loss": 1.6158, "step": 1111 }, { "epoch": 0.48100786720376326, "grad_norm": 19.458316802978516, "learning_rate": 1.0489938757655294e-05, "loss": 1.5859, "step": 1112 }, { "epoch": 0.4814404282354213, "grad_norm": 24.825956344604492, "learning_rate": 1.0481189851268592e-05, "loss": 1.5391, "step": 1113 }, { "epoch": 0.4818729892670794, "grad_norm": 22.562938690185547, "learning_rate": 1.047244094488189e-05, "loss": 1.5431, "step": 1114 }, { "epoch": 0.48230555029873745, "grad_norm": 22.471332550048828, "learning_rate": 1.0463692038495189e-05, "loss": 1.5292, "step": 1115 }, { "epoch": 0.4827381113303955, "grad_norm": 21.53379249572754, "learning_rate": 1.0454943132108488e-05, "loss": 1.5683, "step": 1116 }, { "epoch": 0.4831706723620536, "grad_norm": 19.84793472290039, "learning_rate": 1.0446194225721787e-05, "loss": 1.4873, "step": 1117 }, { "epoch": 0.48360323339371164, "grad_norm": 21.779499053955078, "learning_rate": 1.0437445319335085e-05, "loss": 1.5814, "step": 1118 }, { "epoch": 0.4840357944253697, "grad_norm": 20.556554794311523, "learning_rate": 1.0428696412948382e-05, "loss": 1.5547, "step": 1119 }, { "epoch": 0.48446835545702777, "grad_norm": 20.402372360229492, "learning_rate": 1.0419947506561681e-05, "loss": 1.5654, "step": 1120 }, { "epoch": 0.48490091648868583, "grad_norm": 22.942962646484375, "learning_rate": 1.0411198600174979e-05, "loss": 1.6105, "step": 1121 }, { "epoch": 0.4853334775203439, "grad_norm": 20.789451599121094, "learning_rate": 1.0402449693788276e-05, "loss": 1.5835, "step": 1122 }, { "epoch": 0.48576603855200196, "grad_norm": 20.95735740661621, "learning_rate": 1.0393700787401575e-05, "loss": 1.6086, "step": 1123 }, { "epoch": 0.48619859958366, "grad_norm": 24.3394775390625, "learning_rate": 1.0384951881014873e-05, "loss": 1.5987, "step": 1124 }, { "epoch": 0.4866311606153181, "grad_norm": 19.394668579101562, "learning_rate": 1.0376202974628174e-05, "loss": 1.5567, "step": 1125 }, { "epoch": 0.48706372164697614, "grad_norm": 23.74590492248535, "learning_rate": 1.0367454068241471e-05, "loss": 1.5349, "step": 1126 }, { "epoch": 0.4874962826786342, "grad_norm": 19.871522903442383, "learning_rate": 1.0358705161854769e-05, "loss": 1.5797, "step": 1127 }, { "epoch": 0.48792884371029227, "grad_norm": 20.489187240600586, "learning_rate": 1.0349956255468068e-05, "loss": 1.5805, "step": 1128 }, { "epoch": 0.48836140474195033, "grad_norm": 22.527732849121094, "learning_rate": 1.0341207349081366e-05, "loss": 1.5333, "step": 1129 }, { "epoch": 0.4887939657736084, "grad_norm": 23.267118453979492, "learning_rate": 1.0332458442694663e-05, "loss": 1.5872, "step": 1130 }, { "epoch": 0.4892265268052664, "grad_norm": 21.272428512573242, "learning_rate": 1.0323709536307962e-05, "loss": 1.5837, "step": 1131 }, { "epoch": 0.48965908783692447, "grad_norm": 24.80011558532715, "learning_rate": 1.031496062992126e-05, "loss": 1.6086, "step": 1132 }, { "epoch": 0.49009164886858253, "grad_norm": 21.551164627075195, "learning_rate": 1.0306211723534559e-05, "loss": 1.5909, "step": 1133 }, { "epoch": 0.4905242099002406, "grad_norm": 19.916059494018555, "learning_rate": 1.0297462817147857e-05, "loss": 1.5726, "step": 1134 }, { "epoch": 0.49095677093189866, "grad_norm": 22.31676483154297, "learning_rate": 1.0288713910761157e-05, "loss": 1.5, "step": 1135 }, { "epoch": 0.4913893319635567, "grad_norm": 21.02925682067871, "learning_rate": 1.0279965004374455e-05, "loss": 1.5122, "step": 1136 }, { "epoch": 0.4918218929952148, "grad_norm": 20.82086181640625, "learning_rate": 1.0271216097987753e-05, "loss": 1.5684, "step": 1137 }, { "epoch": 0.49225445402687285, "grad_norm": 20.781131744384766, "learning_rate": 1.0262467191601052e-05, "loss": 1.6631, "step": 1138 }, { "epoch": 0.4926870150585309, "grad_norm": 20.078981399536133, "learning_rate": 1.025371828521435e-05, "loss": 1.5923, "step": 1139 }, { "epoch": 0.49311957609018897, "grad_norm": 21.633108139038086, "learning_rate": 1.0244969378827647e-05, "loss": 1.5812, "step": 1140 }, { "epoch": 0.49355213712184703, "grad_norm": 21.029897689819336, "learning_rate": 1.0236220472440946e-05, "loss": 1.5658, "step": 1141 }, { "epoch": 0.4939846981535051, "grad_norm": 20.9033203125, "learning_rate": 1.0227471566054243e-05, "loss": 1.5775, "step": 1142 }, { "epoch": 0.49441725918516316, "grad_norm": 21.990711212158203, "learning_rate": 1.0218722659667541e-05, "loss": 1.6188, "step": 1143 }, { "epoch": 0.4948498202168212, "grad_norm": 21.208660125732422, "learning_rate": 1.0209973753280842e-05, "loss": 1.5635, "step": 1144 }, { "epoch": 0.4952823812484793, "grad_norm": 23.516277313232422, "learning_rate": 1.020122484689414e-05, "loss": 1.6485, "step": 1145 }, { "epoch": 0.49571494228013735, "grad_norm": 24.508100509643555, "learning_rate": 1.0192475940507439e-05, "loss": 1.5996, "step": 1146 }, { "epoch": 0.4961475033117954, "grad_norm": 23.392810821533203, "learning_rate": 1.0183727034120736e-05, "loss": 1.5059, "step": 1147 }, { "epoch": 0.4965800643434535, "grad_norm": 20.714237213134766, "learning_rate": 1.0174978127734034e-05, "loss": 1.533, "step": 1148 }, { "epoch": 0.49701262537511154, "grad_norm": 23.301856994628906, "learning_rate": 1.0166229221347333e-05, "loss": 1.5304, "step": 1149 }, { "epoch": 0.4974451864067696, "grad_norm": 21.122617721557617, "learning_rate": 1.015748031496063e-05, "loss": 1.4702, "step": 1150 }, { "epoch": 0.49787774743842766, "grad_norm": 24.017070770263672, "learning_rate": 1.0148731408573928e-05, "loss": 1.5159, "step": 1151 }, { "epoch": 0.4983103084700857, "grad_norm": 22.126039505004883, "learning_rate": 1.0139982502187227e-05, "loss": 1.5913, "step": 1152 }, { "epoch": 0.49874286950174374, "grad_norm": 21.73690414428711, "learning_rate": 1.0131233595800525e-05, "loss": 1.625, "step": 1153 }, { "epoch": 0.4991754305334018, "grad_norm": 21.988893508911133, "learning_rate": 1.0122484689413825e-05, "loss": 1.5914, "step": 1154 }, { "epoch": 0.49960799156505986, "grad_norm": 20.745668411254883, "learning_rate": 1.0113735783027123e-05, "loss": 1.5551, "step": 1155 }, { "epoch": 0.500040552596718, "grad_norm": 21.369321823120117, "learning_rate": 1.010498687664042e-05, "loss": 1.5585, "step": 1156 }, { "epoch": 0.500473113628376, "grad_norm": 21.196489334106445, "learning_rate": 1.009623797025372e-05, "loss": 1.4679, "step": 1157 }, { "epoch": 0.5009056746600341, "grad_norm": 20.842437744140625, "learning_rate": 1.0087489063867017e-05, "loss": 1.5539, "step": 1158 }, { "epoch": 0.5013382356916921, "grad_norm": 22.951557159423828, "learning_rate": 1.0078740157480316e-05, "loss": 1.5833, "step": 1159 }, { "epoch": 0.5017707967233502, "grad_norm": 24.61578941345215, "learning_rate": 1.0069991251093614e-05, "loss": 1.4874, "step": 1160 }, { "epoch": 0.5022033577550082, "grad_norm": 20.68556785583496, "learning_rate": 1.0061242344706911e-05, "loss": 1.5937, "step": 1161 }, { "epoch": 0.5026359187866664, "grad_norm": 23.46632957458496, "learning_rate": 1.005249343832021e-05, "loss": 1.46, "step": 1162 }, { "epoch": 0.5030684798183244, "grad_norm": 21.403005599975586, "learning_rate": 1.0043744531933508e-05, "loss": 1.5609, "step": 1163 }, { "epoch": 0.5035010408499824, "grad_norm": 23.224864959716797, "learning_rate": 1.0034995625546809e-05, "loss": 1.5493, "step": 1164 }, { "epoch": 0.5039336018816405, "grad_norm": 22.939931869506836, "learning_rate": 1.0026246719160107e-05, "loss": 1.5497, "step": 1165 }, { "epoch": 0.5043661629132985, "grad_norm": 21.148677825927734, "learning_rate": 1.0017497812773404e-05, "loss": 1.6063, "step": 1166 }, { "epoch": 0.5047987239449566, "grad_norm": 24.93348503112793, "learning_rate": 1.0008748906386703e-05, "loss": 1.4764, "step": 1167 }, { "epoch": 0.5052312849766146, "grad_norm": 22.30760955810547, "learning_rate": 1e-05, "loss": 1.557, "step": 1168 }, { "epoch": 0.5056638460082727, "grad_norm": 23.62921142578125, "learning_rate": 9.991251093613298e-06, "loss": 1.6243, "step": 1169 }, { "epoch": 0.5060964070399308, "grad_norm": 23.39278221130371, "learning_rate": 9.982502187226598e-06, "loss": 1.5429, "step": 1170 }, { "epoch": 0.5065289680715889, "grad_norm": 21.39177703857422, "learning_rate": 9.973753280839897e-06, "loss": 1.5076, "step": 1171 }, { "epoch": 0.5069615291032469, "grad_norm": 20.722728729248047, "learning_rate": 9.965004374453194e-06, "loss": 1.4859, "step": 1172 }, { "epoch": 0.507394090134905, "grad_norm": 21.878719329833984, "learning_rate": 9.956255468066492e-06, "loss": 1.5704, "step": 1173 }, { "epoch": 0.507826651166563, "grad_norm": 21.7126407623291, "learning_rate": 9.947506561679791e-06, "loss": 1.5717, "step": 1174 }, { "epoch": 0.5082592121982211, "grad_norm": 24.65056037902832, "learning_rate": 9.938757655293088e-06, "loss": 1.5916, "step": 1175 }, { "epoch": 0.5086917732298791, "grad_norm": 22.16351318359375, "learning_rate": 9.930008748906388e-06, "loss": 1.6863, "step": 1176 }, { "epoch": 0.5091243342615372, "grad_norm": 21.595354080200195, "learning_rate": 9.921259842519685e-06, "loss": 1.5668, "step": 1177 }, { "epoch": 0.5095568952931953, "grad_norm": 21.123828887939453, "learning_rate": 9.912510936132984e-06, "loss": 1.6015, "step": 1178 }, { "epoch": 0.5099894563248534, "grad_norm": 19.92538833618164, "learning_rate": 9.903762029746282e-06, "loss": 1.5397, "step": 1179 }, { "epoch": 0.5104220173565114, "grad_norm": 19.72816276550293, "learning_rate": 9.895013123359581e-06, "loss": 1.5909, "step": 1180 }, { "epoch": 0.5108545783881695, "grad_norm": 23.017501831054688, "learning_rate": 9.88626421697288e-06, "loss": 1.5516, "step": 1181 }, { "epoch": 0.5112871394198275, "grad_norm": 20.519594192504883, "learning_rate": 9.877515310586178e-06, "loss": 1.6408, "step": 1182 }, { "epoch": 0.5117197004514856, "grad_norm": 23.874664306640625, "learning_rate": 9.868766404199475e-06, "loss": 1.5156, "step": 1183 }, { "epoch": 0.5121522614831436, "grad_norm": 22.534914016723633, "learning_rate": 9.860017497812775e-06, "loss": 1.5178, "step": 1184 }, { "epoch": 0.5125848225148018, "grad_norm": 23.634246826171875, "learning_rate": 9.851268591426074e-06, "loss": 1.5734, "step": 1185 }, { "epoch": 0.5130173835464598, "grad_norm": 21.05173683166504, "learning_rate": 9.842519685039371e-06, "loss": 1.5886, "step": 1186 }, { "epoch": 0.5134499445781178, "grad_norm": 22.97358512878418, "learning_rate": 9.833770778652669e-06, "loss": 1.5493, "step": 1187 }, { "epoch": 0.5138825056097759, "grad_norm": 23.60778045654297, "learning_rate": 9.825021872265968e-06, "loss": 1.5308, "step": 1188 }, { "epoch": 0.5143150666414339, "grad_norm": 19.591341018676758, "learning_rate": 9.816272965879266e-06, "loss": 1.6701, "step": 1189 }, { "epoch": 0.514747627673092, "grad_norm": 22.59847640991211, "learning_rate": 9.807524059492565e-06, "loss": 1.7357, "step": 1190 }, { "epoch": 0.51518018870475, "grad_norm": 21.31041145324707, "learning_rate": 9.798775153105862e-06, "loss": 1.6354, "step": 1191 }, { "epoch": 0.5156127497364081, "grad_norm": 26.626367568969727, "learning_rate": 9.790026246719161e-06, "loss": 1.5627, "step": 1192 }, { "epoch": 0.5160453107680661, "grad_norm": 20.857505798339844, "learning_rate": 9.781277340332459e-06, "loss": 1.5965, "step": 1193 }, { "epoch": 0.5164778717997243, "grad_norm": 20.571149826049805, "learning_rate": 9.772528433945756e-06, "loss": 1.5661, "step": 1194 }, { "epoch": 0.5169104328313823, "grad_norm": 23.989408493041992, "learning_rate": 9.763779527559056e-06, "loss": 1.5563, "step": 1195 }, { "epoch": 0.5173429938630404, "grad_norm": 21.287750244140625, "learning_rate": 9.755030621172355e-06, "loss": 1.5813, "step": 1196 }, { "epoch": 0.5177755548946984, "grad_norm": 19.994047164916992, "learning_rate": 9.746281714785652e-06, "loss": 1.4655, "step": 1197 }, { "epoch": 0.5182081159263565, "grad_norm": 22.351661682128906, "learning_rate": 9.73753280839895e-06, "loss": 1.5156, "step": 1198 }, { "epoch": 0.5186406769580145, "grad_norm": 21.73786163330078, "learning_rate": 9.728783902012249e-06, "loss": 1.5882, "step": 1199 }, { "epoch": 0.5190732379896726, "grad_norm": 24.997068405151367, "learning_rate": 9.720034995625548e-06, "loss": 1.5392, "step": 1200 }, { "epoch": 0.5195057990213306, "grad_norm": 24.620372772216797, "learning_rate": 9.711286089238846e-06, "loss": 1.6197, "step": 1201 }, { "epoch": 0.5199383600529888, "grad_norm": 20.77781105041504, "learning_rate": 9.702537182852143e-06, "loss": 1.5079, "step": 1202 }, { "epoch": 0.5203709210846468, "grad_norm": 23.355987548828125, "learning_rate": 9.693788276465443e-06, "loss": 1.5842, "step": 1203 }, { "epoch": 0.5208034821163049, "grad_norm": 19.493812561035156, "learning_rate": 9.68503937007874e-06, "loss": 1.4915, "step": 1204 }, { "epoch": 0.5212360431479629, "grad_norm": 20.286083221435547, "learning_rate": 9.67629046369204e-06, "loss": 1.5201, "step": 1205 }, { "epoch": 0.521668604179621, "grad_norm": 19.314844131469727, "learning_rate": 9.667541557305338e-06, "loss": 1.6382, "step": 1206 }, { "epoch": 0.522101165211279, "grad_norm": 24.922771453857422, "learning_rate": 9.658792650918636e-06, "loss": 1.5938, "step": 1207 }, { "epoch": 0.522533726242937, "grad_norm": 23.530563354492188, "learning_rate": 9.650043744531934e-06, "loss": 1.5612, "step": 1208 }, { "epoch": 0.5229662872745952, "grad_norm": 23.83565902709961, "learning_rate": 9.641294838145233e-06, "loss": 1.6478, "step": 1209 }, { "epoch": 0.5233988483062532, "grad_norm": 22.341461181640625, "learning_rate": 9.632545931758532e-06, "loss": 1.5618, "step": 1210 }, { "epoch": 0.5238314093379113, "grad_norm": 20.392044067382812, "learning_rate": 9.62379702537183e-06, "loss": 1.5519, "step": 1211 }, { "epoch": 0.5242639703695693, "grad_norm": 22.426546096801758, "learning_rate": 9.615048118985127e-06, "loss": 1.5484, "step": 1212 }, { "epoch": 0.5246965314012274, "grad_norm": 23.25884437561035, "learning_rate": 9.606299212598426e-06, "loss": 1.5351, "step": 1213 }, { "epoch": 0.5251290924328854, "grad_norm": 20.078369140625, "learning_rate": 9.597550306211725e-06, "loss": 1.5788, "step": 1214 }, { "epoch": 0.5255616534645435, "grad_norm": 21.024873733520508, "learning_rate": 9.588801399825023e-06, "loss": 1.5521, "step": 1215 }, { "epoch": 0.5259942144962015, "grad_norm": 21.520021438598633, "learning_rate": 9.58005249343832e-06, "loss": 1.5283, "step": 1216 }, { "epoch": 0.5264267755278597, "grad_norm": 23.06103515625, "learning_rate": 9.57130358705162e-06, "loss": 1.5912, "step": 1217 }, { "epoch": 0.5268593365595177, "grad_norm": 23.135066986083984, "learning_rate": 9.562554680664917e-06, "loss": 1.5659, "step": 1218 }, { "epoch": 0.5272918975911758, "grad_norm": 22.494586944580078, "learning_rate": 9.553805774278216e-06, "loss": 1.5495, "step": 1219 }, { "epoch": 0.5277244586228338, "grad_norm": 24.494667053222656, "learning_rate": 9.545056867891514e-06, "loss": 1.4783, "step": 1220 }, { "epoch": 0.5281570196544919, "grad_norm": 22.232500076293945, "learning_rate": 9.536307961504813e-06, "loss": 1.6506, "step": 1221 }, { "epoch": 0.5285895806861499, "grad_norm": 20.534900665283203, "learning_rate": 9.52755905511811e-06, "loss": 1.4604, "step": 1222 }, { "epoch": 0.529022141717808, "grad_norm": 22.456693649291992, "learning_rate": 9.518810148731408e-06, "loss": 1.5402, "step": 1223 }, { "epoch": 0.529454702749466, "grad_norm": 24.285858154296875, "learning_rate": 9.510061242344707e-06, "loss": 1.6183, "step": 1224 }, { "epoch": 0.5298872637811242, "grad_norm": 24.39087677001953, "learning_rate": 9.501312335958006e-06, "loss": 1.5955, "step": 1225 }, { "epoch": 0.5303198248127822, "grad_norm": 23.711259841918945, "learning_rate": 9.492563429571304e-06, "loss": 1.4826, "step": 1226 }, { "epoch": 0.5307523858444403, "grad_norm": 23.221359252929688, "learning_rate": 9.483814523184603e-06, "loss": 1.541, "step": 1227 }, { "epoch": 0.5311849468760983, "grad_norm": 22.01033592224121, "learning_rate": 9.4750656167979e-06, "loss": 1.5117, "step": 1228 }, { "epoch": 0.5316175079077564, "grad_norm": 22.62164306640625, "learning_rate": 9.4663167104112e-06, "loss": 1.6338, "step": 1229 }, { "epoch": 0.5320500689394144, "grad_norm": 25.81968879699707, "learning_rate": 9.457567804024497e-06, "loss": 1.5945, "step": 1230 }, { "epoch": 0.5324826299710724, "grad_norm": 22.638782501220703, "learning_rate": 9.448818897637797e-06, "loss": 1.5698, "step": 1231 }, { "epoch": 0.5329151910027305, "grad_norm": 22.303646087646484, "learning_rate": 9.440069991251094e-06, "loss": 1.5867, "step": 1232 }, { "epoch": 0.5333477520343886, "grad_norm": 22.231616973876953, "learning_rate": 9.431321084864393e-06, "loss": 1.5598, "step": 1233 }, { "epoch": 0.5337803130660467, "grad_norm": 21.435476303100586, "learning_rate": 9.422572178477691e-06, "loss": 1.5487, "step": 1234 }, { "epoch": 0.5342128740977047, "grad_norm": 20.271909713745117, "learning_rate": 9.41382327209099e-06, "loss": 1.564, "step": 1235 }, { "epoch": 0.5346454351293628, "grad_norm": 21.8601016998291, "learning_rate": 9.405074365704288e-06, "loss": 1.5758, "step": 1236 }, { "epoch": 0.5350779961610208, "grad_norm": 18.850753784179688, "learning_rate": 9.396325459317585e-06, "loss": 1.6328, "step": 1237 }, { "epoch": 0.5355105571926789, "grad_norm": 20.55095672607422, "learning_rate": 9.387576552930884e-06, "loss": 1.5706, "step": 1238 }, { "epoch": 0.5359431182243369, "grad_norm": 22.207855224609375, "learning_rate": 9.378827646544184e-06, "loss": 1.5461, "step": 1239 }, { "epoch": 0.536375679255995, "grad_norm": 22.02487564086914, "learning_rate": 9.370078740157481e-06, "loss": 1.5796, "step": 1240 }, { "epoch": 0.5368082402876531, "grad_norm": 22.791046142578125, "learning_rate": 9.361329833770779e-06, "loss": 1.5041, "step": 1241 }, { "epoch": 0.5372408013193112, "grad_norm": 21.210519790649414, "learning_rate": 9.352580927384078e-06, "loss": 1.541, "step": 1242 }, { "epoch": 0.5376733623509692, "grad_norm": 23.86467933654785, "learning_rate": 9.343832020997377e-06, "loss": 1.5611, "step": 1243 }, { "epoch": 0.5381059233826273, "grad_norm": 26.53850555419922, "learning_rate": 9.335083114610674e-06, "loss": 1.4797, "step": 1244 }, { "epoch": 0.5385384844142853, "grad_norm": 22.709075927734375, "learning_rate": 9.326334208223972e-06, "loss": 1.5384, "step": 1245 }, { "epoch": 0.5389710454459434, "grad_norm": 22.626386642456055, "learning_rate": 9.317585301837271e-06, "loss": 1.533, "step": 1246 }, { "epoch": 0.5394036064776014, "grad_norm": 22.51513671875, "learning_rate": 9.308836395450569e-06, "loss": 1.4891, "step": 1247 }, { "epoch": 0.5398361675092596, "grad_norm": 25.217557907104492, "learning_rate": 9.300087489063868e-06, "loss": 1.5106, "step": 1248 }, { "epoch": 0.5402687285409176, "grad_norm": 21.37566566467285, "learning_rate": 9.291338582677165e-06, "loss": 1.5115, "step": 1249 }, { "epoch": 0.5407012895725757, "grad_norm": 21.221803665161133, "learning_rate": 9.282589676290465e-06, "loss": 1.6049, "step": 1250 }, { "epoch": 0.5411338506042337, "grad_norm": 22.976816177368164, "learning_rate": 9.273840769903762e-06, "loss": 1.4837, "step": 1251 }, { "epoch": 0.5415664116358917, "grad_norm": 20.9698543548584, "learning_rate": 9.265091863517061e-06, "loss": 1.5605, "step": 1252 }, { "epoch": 0.5419989726675498, "grad_norm": 24.19113540649414, "learning_rate": 9.25634295713036e-06, "loss": 1.5536, "step": 1253 }, { "epoch": 0.5424315336992078, "grad_norm": 21.84588623046875, "learning_rate": 9.247594050743658e-06, "loss": 1.4345, "step": 1254 }, { "epoch": 0.5428640947308659, "grad_norm": 23.569164276123047, "learning_rate": 9.238845144356956e-06, "loss": 1.595, "step": 1255 }, { "epoch": 0.543296655762524, "grad_norm": 24.85906982421875, "learning_rate": 9.230096237970255e-06, "loss": 1.5155, "step": 1256 }, { "epoch": 0.5437292167941821, "grad_norm": 19.473554611206055, "learning_rate": 9.221347331583554e-06, "loss": 1.5188, "step": 1257 }, { "epoch": 0.5441617778258401, "grad_norm": 23.21884536743164, "learning_rate": 9.212598425196852e-06, "loss": 1.506, "step": 1258 }, { "epoch": 0.5445943388574982, "grad_norm": 24.165271759033203, "learning_rate": 9.203849518810149e-06, "loss": 1.6034, "step": 1259 }, { "epoch": 0.5450268998891562, "grad_norm": 22.93230628967285, "learning_rate": 9.195100612423448e-06, "loss": 1.5442, "step": 1260 }, { "epoch": 0.5454594609208143, "grad_norm": 24.058223724365234, "learning_rate": 9.186351706036746e-06, "loss": 1.5162, "step": 1261 }, { "epoch": 0.5458920219524723, "grad_norm": 22.84027862548828, "learning_rate": 9.177602799650045e-06, "loss": 1.4725, "step": 1262 }, { "epoch": 0.5463245829841304, "grad_norm": 20.591529846191406, "learning_rate": 9.168853893263342e-06, "loss": 1.5473, "step": 1263 }, { "epoch": 0.5467571440157885, "grad_norm": 23.851757049560547, "learning_rate": 9.160104986876642e-06, "loss": 1.664, "step": 1264 }, { "epoch": 0.5471897050474466, "grad_norm": 21.493534088134766, "learning_rate": 9.15135608048994e-06, "loss": 1.5328, "step": 1265 }, { "epoch": 0.5476222660791046, "grad_norm": 20.198074340820312, "learning_rate": 9.142607174103237e-06, "loss": 1.5544, "step": 1266 }, { "epoch": 0.5480548271107627, "grad_norm": 22.380517959594727, "learning_rate": 9.133858267716536e-06, "loss": 1.4572, "step": 1267 }, { "epoch": 0.5484873881424207, "grad_norm": 22.481168746948242, "learning_rate": 9.125109361329835e-06, "loss": 1.5962, "step": 1268 }, { "epoch": 0.5489199491740788, "grad_norm": 22.247299194335938, "learning_rate": 9.116360454943133e-06, "loss": 1.5528, "step": 1269 }, { "epoch": 0.5493525102057368, "grad_norm": 19.119709014892578, "learning_rate": 9.10761154855643e-06, "loss": 1.4795, "step": 1270 }, { "epoch": 0.549785071237395, "grad_norm": 23.676963806152344, "learning_rate": 9.09886264216973e-06, "loss": 1.6428, "step": 1271 }, { "epoch": 0.550217632269053, "grad_norm": 22.227872848510742, "learning_rate": 9.090113735783029e-06, "loss": 1.5312, "step": 1272 }, { "epoch": 0.5506501933007111, "grad_norm": 22.329296112060547, "learning_rate": 9.081364829396326e-06, "loss": 1.5003, "step": 1273 }, { "epoch": 0.5510827543323691, "grad_norm": 24.662952423095703, "learning_rate": 9.072615923009625e-06, "loss": 1.5753, "step": 1274 }, { "epoch": 0.5515153153640271, "grad_norm": 23.11702537536621, "learning_rate": 9.063867016622923e-06, "loss": 1.535, "step": 1275 }, { "epoch": 0.5519478763956852, "grad_norm": 21.952640533447266, "learning_rate": 9.05511811023622e-06, "loss": 1.6321, "step": 1276 }, { "epoch": 0.5523804374273432, "grad_norm": 22.44942283630371, "learning_rate": 9.04636920384952e-06, "loss": 1.5539, "step": 1277 }, { "epoch": 0.5528129984590013, "grad_norm": 21.685388565063477, "learning_rate": 9.037620297462819e-06, "loss": 1.6138, "step": 1278 }, { "epoch": 0.5532455594906593, "grad_norm": 22.420637130737305, "learning_rate": 9.028871391076116e-06, "loss": 1.5373, "step": 1279 }, { "epoch": 0.5536781205223175, "grad_norm": 23.004444122314453, "learning_rate": 9.020122484689414e-06, "loss": 1.5647, "step": 1280 }, { "epoch": 0.5541106815539755, "grad_norm": 22.256057739257812, "learning_rate": 9.011373578302713e-06, "loss": 1.4894, "step": 1281 }, { "epoch": 0.5545432425856336, "grad_norm": 26.35243034362793, "learning_rate": 9.002624671916012e-06, "loss": 1.4642, "step": 1282 }, { "epoch": 0.5549758036172916, "grad_norm": 22.870492935180664, "learning_rate": 8.99387576552931e-06, "loss": 1.4622, "step": 1283 }, { "epoch": 0.5554083646489497, "grad_norm": 21.987051010131836, "learning_rate": 8.985126859142607e-06, "loss": 1.598, "step": 1284 }, { "epoch": 0.5558409256806077, "grad_norm": 21.95711898803711, "learning_rate": 8.976377952755906e-06, "loss": 1.557, "step": 1285 }, { "epoch": 0.5562734867122658, "grad_norm": 22.57794189453125, "learning_rate": 8.967629046369206e-06, "loss": 1.5029, "step": 1286 }, { "epoch": 0.5567060477439238, "grad_norm": 21.112510681152344, "learning_rate": 8.958880139982503e-06, "loss": 1.5741, "step": 1287 }, { "epoch": 0.557138608775582, "grad_norm": 24.124717712402344, "learning_rate": 8.9501312335958e-06, "loss": 1.613, "step": 1288 }, { "epoch": 0.55757116980724, "grad_norm": 31.32720375061035, "learning_rate": 8.9413823272091e-06, "loss": 1.6521, "step": 1289 }, { "epoch": 0.5580037308388981, "grad_norm": 23.76942253112793, "learning_rate": 8.932633420822397e-06, "loss": 1.5079, "step": 1290 }, { "epoch": 0.5584362918705561, "grad_norm": 22.088144302368164, "learning_rate": 8.923884514435697e-06, "loss": 1.5162, "step": 1291 }, { "epoch": 0.5588688529022142, "grad_norm": 21.93614387512207, "learning_rate": 8.915135608048994e-06, "loss": 1.4684, "step": 1292 }, { "epoch": 0.5593014139338722, "grad_norm": 23.71426773071289, "learning_rate": 8.906386701662293e-06, "loss": 1.6234, "step": 1293 }, { "epoch": 0.5597339749655303, "grad_norm": 23.15918731689453, "learning_rate": 8.89763779527559e-06, "loss": 1.5453, "step": 1294 }, { "epoch": 0.5601665359971884, "grad_norm": 24.360671997070312, "learning_rate": 8.888888888888888e-06, "loss": 1.5688, "step": 1295 }, { "epoch": 0.5605990970288464, "grad_norm": 22.142484664916992, "learning_rate": 8.880139982502188e-06, "loss": 1.5051, "step": 1296 }, { "epoch": 0.5610316580605045, "grad_norm": 23.395851135253906, "learning_rate": 8.871391076115487e-06, "loss": 1.6383, "step": 1297 }, { "epoch": 0.5614642190921625, "grad_norm": 22.678421020507812, "learning_rate": 8.862642169728784e-06, "loss": 1.5335, "step": 1298 }, { "epoch": 0.5618967801238206, "grad_norm": 23.081527709960938, "learning_rate": 8.853893263342083e-06, "loss": 1.6206, "step": 1299 }, { "epoch": 0.5623293411554786, "grad_norm": 22.97568702697754, "learning_rate": 8.845144356955381e-06, "loss": 1.4891, "step": 1300 }, { "epoch": 0.5627619021871367, "grad_norm": 22.3316707611084, "learning_rate": 8.83639545056868e-06, "loss": 1.6181, "step": 1301 }, { "epoch": 0.5631944632187947, "grad_norm": 21.807104110717773, "learning_rate": 8.827646544181978e-06, "loss": 1.5856, "step": 1302 }, { "epoch": 0.5636270242504529, "grad_norm": 23.432767868041992, "learning_rate": 8.818897637795277e-06, "loss": 1.5661, "step": 1303 }, { "epoch": 0.5640595852821109, "grad_norm": 23.371694564819336, "learning_rate": 8.810148731408574e-06, "loss": 1.5358, "step": 1304 }, { "epoch": 0.564492146313769, "grad_norm": 23.72265625, "learning_rate": 8.801399825021874e-06, "loss": 1.5152, "step": 1305 }, { "epoch": 0.564924707345427, "grad_norm": 20.524398803710938, "learning_rate": 8.792650918635171e-06, "loss": 1.4969, "step": 1306 }, { "epoch": 0.5653572683770851, "grad_norm": 24.223081588745117, "learning_rate": 8.78390201224847e-06, "loss": 1.5267, "step": 1307 }, { "epoch": 0.5657898294087431, "grad_norm": 21.053010940551758, "learning_rate": 8.775153105861768e-06, "loss": 1.5373, "step": 1308 }, { "epoch": 0.5662223904404012, "grad_norm": 24.919174194335938, "learning_rate": 8.766404199475065e-06, "loss": 1.5295, "step": 1309 }, { "epoch": 0.5666549514720592, "grad_norm": 20.551292419433594, "learning_rate": 8.757655293088365e-06, "loss": 1.5628, "step": 1310 }, { "epoch": 0.5670875125037174, "grad_norm": 22.388694763183594, "learning_rate": 8.748906386701664e-06, "loss": 1.5315, "step": 1311 }, { "epoch": 0.5675200735353754, "grad_norm": 22.377506256103516, "learning_rate": 8.740157480314961e-06, "loss": 1.5567, "step": 1312 }, { "epoch": 0.5679526345670335, "grad_norm": 21.797183990478516, "learning_rate": 8.731408573928259e-06, "loss": 1.524, "step": 1313 }, { "epoch": 0.5683851955986915, "grad_norm": 25.161802291870117, "learning_rate": 8.722659667541558e-06, "loss": 1.5708, "step": 1314 }, { "epoch": 0.5688177566303496, "grad_norm": 22.526390075683594, "learning_rate": 8.713910761154857e-06, "loss": 1.543, "step": 1315 }, { "epoch": 0.5692503176620076, "grad_norm": 22.93593978881836, "learning_rate": 8.705161854768155e-06, "loss": 1.5886, "step": 1316 }, { "epoch": 0.5696828786936657, "grad_norm": 21.80086326599121, "learning_rate": 8.696412948381452e-06, "loss": 1.5182, "step": 1317 }, { "epoch": 0.5701154397253237, "grad_norm": 26.52342987060547, "learning_rate": 8.687664041994751e-06, "loss": 1.5266, "step": 1318 }, { "epoch": 0.5705480007569818, "grad_norm": 23.737064361572266, "learning_rate": 8.678915135608049e-06, "loss": 1.5446, "step": 1319 }, { "epoch": 0.5709805617886399, "grad_norm": 23.06212615966797, "learning_rate": 8.670166229221348e-06, "loss": 1.5367, "step": 1320 }, { "epoch": 0.5714131228202979, "grad_norm": 22.22346305847168, "learning_rate": 8.661417322834647e-06, "loss": 1.5132, "step": 1321 }, { "epoch": 0.571845683851956, "grad_norm": 23.046924591064453, "learning_rate": 8.652668416447945e-06, "loss": 1.6097, "step": 1322 }, { "epoch": 0.572278244883614, "grad_norm": 23.361909866333008, "learning_rate": 8.643919510061242e-06, "loss": 1.5469, "step": 1323 }, { "epoch": 0.5727108059152721, "grad_norm": 23.01518440246582, "learning_rate": 8.635170603674542e-06, "loss": 1.5453, "step": 1324 }, { "epoch": 0.5731433669469301, "grad_norm": 22.828317642211914, "learning_rate": 8.62642169728784e-06, "loss": 1.4921, "step": 1325 }, { "epoch": 0.5735759279785883, "grad_norm": 27.066354751586914, "learning_rate": 8.617672790901138e-06, "loss": 1.4901, "step": 1326 }, { "epoch": 0.5740084890102463, "grad_norm": 23.641551971435547, "learning_rate": 8.608923884514436e-06, "loss": 1.5495, "step": 1327 }, { "epoch": 0.5744410500419044, "grad_norm": 22.956571578979492, "learning_rate": 8.600174978127735e-06, "loss": 1.5145, "step": 1328 }, { "epoch": 0.5748736110735624, "grad_norm": 21.42229461669922, "learning_rate": 8.591426071741034e-06, "loss": 1.4778, "step": 1329 }, { "epoch": 0.5753061721052205, "grad_norm": 23.688533782958984, "learning_rate": 8.582677165354332e-06, "loss": 1.4983, "step": 1330 }, { "epoch": 0.5757387331368785, "grad_norm": 24.6751708984375, "learning_rate": 8.57392825896763e-06, "loss": 1.4807, "step": 1331 }, { "epoch": 0.5761712941685366, "grad_norm": 25.89187240600586, "learning_rate": 8.565179352580928e-06, "loss": 1.6793, "step": 1332 }, { "epoch": 0.5766038552001946, "grad_norm": 21.923139572143555, "learning_rate": 8.556430446194226e-06, "loss": 1.4787, "step": 1333 }, { "epoch": 0.5770364162318528, "grad_norm": 27.02901840209961, "learning_rate": 8.547681539807525e-06, "loss": 1.5807, "step": 1334 }, { "epoch": 0.5774689772635108, "grad_norm": 24.652677536010742, "learning_rate": 8.538932633420823e-06, "loss": 1.5291, "step": 1335 }, { "epoch": 0.5779015382951689, "grad_norm": 22.28380584716797, "learning_rate": 8.530183727034122e-06, "loss": 1.5341, "step": 1336 }, { "epoch": 0.5783340993268269, "grad_norm": 27.593717575073242, "learning_rate": 8.52143482064742e-06, "loss": 1.5058, "step": 1337 }, { "epoch": 0.578766660358485, "grad_norm": 23.606679916381836, "learning_rate": 8.512685914260717e-06, "loss": 1.5453, "step": 1338 }, { "epoch": 0.579199221390143, "grad_norm": 19.93859100341797, "learning_rate": 8.503937007874016e-06, "loss": 1.6098, "step": 1339 }, { "epoch": 0.579631782421801, "grad_norm": 21.376535415649414, "learning_rate": 8.495188101487315e-06, "loss": 1.6096, "step": 1340 }, { "epoch": 0.5800643434534591, "grad_norm": 22.244482040405273, "learning_rate": 8.486439195100613e-06, "loss": 1.5739, "step": 1341 }, { "epoch": 0.5804969044851171, "grad_norm": 21.95546531677246, "learning_rate": 8.47769028871391e-06, "loss": 1.5556, "step": 1342 }, { "epoch": 0.5809294655167753, "grad_norm": 23.818946838378906, "learning_rate": 8.46894138232721e-06, "loss": 1.6165, "step": 1343 }, { "epoch": 0.5813620265484333, "grad_norm": 24.244600296020508, "learning_rate": 8.460192475940509e-06, "loss": 1.5368, "step": 1344 }, { "epoch": 0.5817945875800914, "grad_norm": 23.75138282775879, "learning_rate": 8.451443569553806e-06, "loss": 1.5739, "step": 1345 }, { "epoch": 0.5822271486117494, "grad_norm": 21.759607315063477, "learning_rate": 8.442694663167106e-06, "loss": 1.5693, "step": 1346 }, { "epoch": 0.5826597096434075, "grad_norm": 21.369449615478516, "learning_rate": 8.433945756780403e-06, "loss": 1.509, "step": 1347 }, { "epoch": 0.5830922706750655, "grad_norm": 21.341304779052734, "learning_rate": 8.4251968503937e-06, "loss": 1.5313, "step": 1348 }, { "epoch": 0.5835248317067236, "grad_norm": 21.93410873413086, "learning_rate": 8.416447944007e-06, "loss": 1.4952, "step": 1349 }, { "epoch": 0.5839573927383817, "grad_norm": 21.560096740722656, "learning_rate": 8.407699037620299e-06, "loss": 1.587, "step": 1350 }, { "epoch": 0.5843899537700398, "grad_norm": 22.302555084228516, "learning_rate": 8.398950131233596e-06, "loss": 1.4969, "step": 1351 }, { "epoch": 0.5848225148016978, "grad_norm": 24.07124137878418, "learning_rate": 8.390201224846894e-06, "loss": 1.6227, "step": 1352 }, { "epoch": 0.5852550758333559, "grad_norm": 21.081422805786133, "learning_rate": 8.381452318460193e-06, "loss": 1.5545, "step": 1353 }, { "epoch": 0.5856876368650139, "grad_norm": 20.232650756835938, "learning_rate": 8.372703412073492e-06, "loss": 1.5344, "step": 1354 }, { "epoch": 0.586120197896672, "grad_norm": 21.190690994262695, "learning_rate": 8.36395450568679e-06, "loss": 1.6478, "step": 1355 }, { "epoch": 0.58655275892833, "grad_norm": 21.58733367919922, "learning_rate": 8.355205599300087e-06, "loss": 1.577, "step": 1356 }, { "epoch": 0.5869853199599881, "grad_norm": 21.453083038330078, "learning_rate": 8.346456692913387e-06, "loss": 1.5533, "step": 1357 }, { "epoch": 0.5874178809916462, "grad_norm": 20.776641845703125, "learning_rate": 8.337707786526686e-06, "loss": 1.5206, "step": 1358 }, { "epoch": 0.5878504420233043, "grad_norm": 24.146244049072266, "learning_rate": 8.328958880139983e-06, "loss": 1.5626, "step": 1359 }, { "epoch": 0.5882830030549623, "grad_norm": 19.88344383239746, "learning_rate": 8.320209973753281e-06, "loss": 1.5167, "step": 1360 }, { "epoch": 0.5887155640866204, "grad_norm": 21.336252212524414, "learning_rate": 8.31146106736658e-06, "loss": 1.5685, "step": 1361 }, { "epoch": 0.5891481251182784, "grad_norm": 23.71305274963379, "learning_rate": 8.302712160979878e-06, "loss": 1.5559, "step": 1362 }, { "epoch": 0.5895806861499364, "grad_norm": 24.64626693725586, "learning_rate": 8.293963254593177e-06, "loss": 1.5773, "step": 1363 }, { "epoch": 0.5900132471815945, "grad_norm": 24.751689910888672, "learning_rate": 8.285214348206474e-06, "loss": 1.5788, "step": 1364 }, { "epoch": 0.5904458082132525, "grad_norm": 22.55054473876953, "learning_rate": 8.276465441819773e-06, "loss": 1.6049, "step": 1365 }, { "epoch": 0.5908783692449107, "grad_norm": 23.089393615722656, "learning_rate": 8.267716535433071e-06, "loss": 1.5444, "step": 1366 }, { "epoch": 0.5913109302765687, "grad_norm": 20.179182052612305, "learning_rate": 8.25896762904637e-06, "loss": 1.5167, "step": 1367 }, { "epoch": 0.5917434913082268, "grad_norm": 22.71483039855957, "learning_rate": 8.25021872265967e-06, "loss": 1.5535, "step": 1368 }, { "epoch": 0.5921760523398848, "grad_norm": 23.300128936767578, "learning_rate": 8.241469816272967e-06, "loss": 1.5337, "step": 1369 }, { "epoch": 0.5926086133715429, "grad_norm": 23.242700576782227, "learning_rate": 8.232720909886264e-06, "loss": 1.5624, "step": 1370 }, { "epoch": 0.5930411744032009, "grad_norm": 22.0483341217041, "learning_rate": 8.223972003499564e-06, "loss": 1.5575, "step": 1371 }, { "epoch": 0.593473735434859, "grad_norm": 24.035541534423828, "learning_rate": 8.215223097112861e-06, "loss": 1.4662, "step": 1372 }, { "epoch": 0.593906296466517, "grad_norm": 21.212360382080078, "learning_rate": 8.20647419072616e-06, "loss": 1.5534, "step": 1373 }, { "epoch": 0.5943388574981752, "grad_norm": 23.624034881591797, "learning_rate": 8.197725284339458e-06, "loss": 1.5738, "step": 1374 }, { "epoch": 0.5947714185298332, "grad_norm": 23.664756774902344, "learning_rate": 8.188976377952757e-06, "loss": 1.5385, "step": 1375 }, { "epoch": 0.5952039795614913, "grad_norm": 23.286773681640625, "learning_rate": 8.180227471566055e-06, "loss": 1.5215, "step": 1376 }, { "epoch": 0.5956365405931493, "grad_norm": 24.892528533935547, "learning_rate": 8.171478565179354e-06, "loss": 1.471, "step": 1377 }, { "epoch": 0.5960691016248074, "grad_norm": 25.03875160217285, "learning_rate": 8.162729658792651e-06, "loss": 1.6867, "step": 1378 }, { "epoch": 0.5965016626564654, "grad_norm": 24.291664123535156, "learning_rate": 8.15398075240595e-06, "loss": 1.5568, "step": 1379 }, { "epoch": 0.5969342236881235, "grad_norm": 21.74358367919922, "learning_rate": 8.145231846019248e-06, "loss": 1.4808, "step": 1380 }, { "epoch": 0.5973667847197816, "grad_norm": 20.80985450744629, "learning_rate": 8.136482939632546e-06, "loss": 1.6065, "step": 1381 }, { "epoch": 0.5977993457514397, "grad_norm": 23.85978126525879, "learning_rate": 8.127734033245845e-06, "loss": 1.5018, "step": 1382 }, { "epoch": 0.5982319067830977, "grad_norm": 23.80008316040039, "learning_rate": 8.118985126859144e-06, "loss": 1.4797, "step": 1383 }, { "epoch": 0.5986644678147557, "grad_norm": 21.72329330444336, "learning_rate": 8.110236220472441e-06, "loss": 1.5553, "step": 1384 }, { "epoch": 0.5990970288464138, "grad_norm": 23.285947799682617, "learning_rate": 8.101487314085739e-06, "loss": 1.4434, "step": 1385 }, { "epoch": 0.5995295898780718, "grad_norm": 24.382734298706055, "learning_rate": 8.092738407699038e-06, "loss": 1.6215, "step": 1386 }, { "epoch": 0.5999621509097299, "grad_norm": 22.842208862304688, "learning_rate": 8.083989501312337e-06, "loss": 1.5772, "step": 1387 }, { "epoch": 0.6003947119413879, "grad_norm": 22.464563369750977, "learning_rate": 8.075240594925635e-06, "loss": 1.5454, "step": 1388 }, { "epoch": 0.6008272729730461, "grad_norm": 23.2418155670166, "learning_rate": 8.066491688538932e-06, "loss": 1.4773, "step": 1389 }, { "epoch": 0.6012598340047041, "grad_norm": 24.3996639251709, "learning_rate": 8.057742782152232e-06, "loss": 1.4524, "step": 1390 }, { "epoch": 0.6016923950363622, "grad_norm": 21.880083084106445, "learning_rate": 8.04899387576553e-06, "loss": 1.5388, "step": 1391 }, { "epoch": 0.6021249560680202, "grad_norm": 21.42142105102539, "learning_rate": 8.040244969378828e-06, "loss": 1.5757, "step": 1392 }, { "epoch": 0.6025575170996783, "grad_norm": 24.134017944335938, "learning_rate": 8.031496062992128e-06, "loss": 1.5229, "step": 1393 }, { "epoch": 0.6029900781313363, "grad_norm": 23.24073600769043, "learning_rate": 8.022747156605425e-06, "loss": 1.5801, "step": 1394 }, { "epoch": 0.6034226391629944, "grad_norm": 20.904178619384766, "learning_rate": 8.013998250218723e-06, "loss": 1.5884, "step": 1395 }, { "epoch": 0.6038552001946524, "grad_norm": 23.131088256835938, "learning_rate": 8.005249343832022e-06, "loss": 1.489, "step": 1396 }, { "epoch": 0.6042877612263106, "grad_norm": 22.88324546813965, "learning_rate": 7.996500437445321e-06, "loss": 1.5062, "step": 1397 }, { "epoch": 0.6047203222579686, "grad_norm": 22.445697784423828, "learning_rate": 7.987751531058619e-06, "loss": 1.5516, "step": 1398 }, { "epoch": 0.6051528832896267, "grad_norm": 21.125619888305664, "learning_rate": 7.979002624671916e-06, "loss": 1.5458, "step": 1399 }, { "epoch": 0.6055854443212847, "grad_norm": 23.79015350341797, "learning_rate": 7.970253718285215e-06, "loss": 1.4498, "step": 1400 }, { "epoch": 0.6060180053529428, "grad_norm": 22.167564392089844, "learning_rate": 7.961504811898514e-06, "loss": 1.5495, "step": 1401 }, { "epoch": 0.6064505663846008, "grad_norm": 22.303564071655273, "learning_rate": 7.952755905511812e-06, "loss": 1.5881, "step": 1402 }, { "epoch": 0.6068831274162589, "grad_norm": 24.354480743408203, "learning_rate": 7.94400699912511e-06, "loss": 1.4995, "step": 1403 }, { "epoch": 0.607315688447917, "grad_norm": 26.439773559570312, "learning_rate": 7.935258092738409e-06, "loss": 1.5314, "step": 1404 }, { "epoch": 0.607748249479575, "grad_norm": 25.109752655029297, "learning_rate": 7.926509186351706e-06, "loss": 1.4808, "step": 1405 }, { "epoch": 0.6081808105112331, "grad_norm": 21.283794403076172, "learning_rate": 7.917760279965005e-06, "loss": 1.5175, "step": 1406 }, { "epoch": 0.6086133715428911, "grad_norm": 22.0447998046875, "learning_rate": 7.909011373578303e-06, "loss": 1.4999, "step": 1407 }, { "epoch": 0.6090459325745492, "grad_norm": 21.30337905883789, "learning_rate": 7.900262467191602e-06, "loss": 1.4887, "step": 1408 }, { "epoch": 0.6094784936062072, "grad_norm": 24.14295768737793, "learning_rate": 7.8915135608049e-06, "loss": 1.4664, "step": 1409 }, { "epoch": 0.6099110546378653, "grad_norm": 22.371164321899414, "learning_rate": 7.882764654418197e-06, "loss": 1.5782, "step": 1410 }, { "epoch": 0.6103436156695233, "grad_norm": 23.566246032714844, "learning_rate": 7.874015748031496e-06, "loss": 1.519, "step": 1411 }, { "epoch": 0.6107761767011815, "grad_norm": 21.215904235839844, "learning_rate": 7.865266841644796e-06, "loss": 1.5402, "step": 1412 }, { "epoch": 0.6112087377328395, "grad_norm": 21.46411895751953, "learning_rate": 7.856517935258093e-06, "loss": 1.4525, "step": 1413 }, { "epoch": 0.6116412987644976, "grad_norm": 23.8530216217041, "learning_rate": 7.847769028871392e-06, "loss": 1.4487, "step": 1414 }, { "epoch": 0.6120738597961556, "grad_norm": 23.55060386657715, "learning_rate": 7.83902012248469e-06, "loss": 1.5548, "step": 1415 }, { "epoch": 0.6125064208278137, "grad_norm": 31.455612182617188, "learning_rate": 7.830271216097989e-06, "loss": 1.6577, "step": 1416 }, { "epoch": 0.6129389818594717, "grad_norm": 26.714149475097656, "learning_rate": 7.821522309711287e-06, "loss": 1.5606, "step": 1417 }, { "epoch": 0.6133715428911298, "grad_norm": 24.780031204223633, "learning_rate": 7.812773403324586e-06, "loss": 1.5787, "step": 1418 }, { "epoch": 0.6138041039227878, "grad_norm": 23.39690589904785, "learning_rate": 7.804024496937883e-06, "loss": 1.5447, "step": 1419 }, { "epoch": 0.614236664954446, "grad_norm": 21.379409790039062, "learning_rate": 7.79527559055118e-06, "loss": 1.4938, "step": 1420 }, { "epoch": 0.614669225986104, "grad_norm": 22.49406623840332, "learning_rate": 7.78652668416448e-06, "loss": 1.4945, "step": 1421 }, { "epoch": 0.6151017870177621, "grad_norm": 21.45618438720703, "learning_rate": 7.77777777777778e-06, "loss": 1.6223, "step": 1422 }, { "epoch": 0.6155343480494201, "grad_norm": 21.71502113342285, "learning_rate": 7.769028871391077e-06, "loss": 1.594, "step": 1423 }, { "epoch": 0.6159669090810782, "grad_norm": 23.39321517944336, "learning_rate": 7.760279965004374e-06, "loss": 1.4467, "step": 1424 }, { "epoch": 0.6163994701127362, "grad_norm": 22.757183074951172, "learning_rate": 7.751531058617673e-06, "loss": 1.478, "step": 1425 }, { "epoch": 0.6168320311443943, "grad_norm": 34.57364273071289, "learning_rate": 7.742782152230973e-06, "loss": 1.4439, "step": 1426 }, { "epoch": 0.6172645921760523, "grad_norm": 24.04800796508789, "learning_rate": 7.73403324584427e-06, "loss": 1.5368, "step": 1427 }, { "epoch": 0.6176971532077103, "grad_norm": 25.531238555908203, "learning_rate": 7.725284339457568e-06, "loss": 1.6016, "step": 1428 }, { "epoch": 0.6181297142393685, "grad_norm": 25.783323287963867, "learning_rate": 7.716535433070867e-06, "loss": 1.5348, "step": 1429 }, { "epoch": 0.6185622752710265, "grad_norm": 24.652729034423828, "learning_rate": 7.707786526684166e-06, "loss": 1.5232, "step": 1430 }, { "epoch": 0.6189948363026846, "grad_norm": 24.11018180847168, "learning_rate": 7.699037620297464e-06, "loss": 1.5591, "step": 1431 }, { "epoch": 0.6194273973343426, "grad_norm": 24.16909408569336, "learning_rate": 7.690288713910761e-06, "loss": 1.6298, "step": 1432 }, { "epoch": 0.6198599583660007, "grad_norm": 23.931461334228516, "learning_rate": 7.68153980752406e-06, "loss": 1.4956, "step": 1433 }, { "epoch": 0.6202925193976587, "grad_norm": 25.672365188598633, "learning_rate": 7.672790901137358e-06, "loss": 1.5273, "step": 1434 }, { "epoch": 0.6207250804293168, "grad_norm": 22.84345817565918, "learning_rate": 7.664041994750657e-06, "loss": 1.5427, "step": 1435 }, { "epoch": 0.6211576414609749, "grad_norm": 22.920881271362305, "learning_rate": 7.655293088363955e-06, "loss": 1.549, "step": 1436 }, { "epoch": 0.621590202492633, "grad_norm": 22.238073348999023, "learning_rate": 7.646544181977254e-06, "loss": 1.5617, "step": 1437 }, { "epoch": 0.622022763524291, "grad_norm": 21.899429321289062, "learning_rate": 7.637795275590551e-06, "loss": 1.4983, "step": 1438 }, { "epoch": 0.6224553245559491, "grad_norm": 21.77754783630371, "learning_rate": 7.62904636920385e-06, "loss": 1.5032, "step": 1439 }, { "epoch": 0.6228878855876071, "grad_norm": 24.998987197875977, "learning_rate": 7.620297462817149e-06, "loss": 1.3883, "step": 1440 }, { "epoch": 0.6233204466192652, "grad_norm": 23.322284698486328, "learning_rate": 7.611548556430447e-06, "loss": 1.5162, "step": 1441 }, { "epoch": 0.6237530076509232, "grad_norm": 21.041311264038086, "learning_rate": 7.602799650043745e-06, "loss": 1.4762, "step": 1442 }, { "epoch": 0.6241855686825813, "grad_norm": 23.653440475463867, "learning_rate": 7.594050743657043e-06, "loss": 1.531, "step": 1443 }, { "epoch": 0.6246181297142394, "grad_norm": 20.211307525634766, "learning_rate": 7.585301837270341e-06, "loss": 1.5188, "step": 1444 }, { "epoch": 0.6250506907458975, "grad_norm": 25.027973175048828, "learning_rate": 7.576552930883641e-06, "loss": 1.5473, "step": 1445 }, { "epoch": 0.6254832517775555, "grad_norm": 23.346525192260742, "learning_rate": 7.567804024496939e-06, "loss": 1.4762, "step": 1446 }, { "epoch": 0.6259158128092136, "grad_norm": 24.292959213256836, "learning_rate": 7.5590551181102365e-06, "loss": 1.5621, "step": 1447 }, { "epoch": 0.6263483738408716, "grad_norm": 23.621158599853516, "learning_rate": 7.550306211723535e-06, "loss": 1.4602, "step": 1448 }, { "epoch": 0.6267809348725296, "grad_norm": 23.259824752807617, "learning_rate": 7.541557305336834e-06, "loss": 1.5399, "step": 1449 }, { "epoch": 0.6272134959041877, "grad_norm": 24.204967498779297, "learning_rate": 7.532808398950132e-06, "loss": 1.5119, "step": 1450 }, { "epoch": 0.6276460569358457, "grad_norm": 22.07662582397461, "learning_rate": 7.52405949256343e-06, "loss": 1.5115, "step": 1451 }, { "epoch": 0.6280786179675039, "grad_norm": 23.451147079467773, "learning_rate": 7.515310586176728e-06, "loss": 1.6045, "step": 1452 }, { "epoch": 0.6285111789991619, "grad_norm": 23.66141128540039, "learning_rate": 7.506561679790027e-06, "loss": 1.6329, "step": 1453 }, { "epoch": 0.62894374003082, "grad_norm": 22.44936180114746, "learning_rate": 7.497812773403326e-06, "loss": 1.5992, "step": 1454 }, { "epoch": 0.629376301062478, "grad_norm": 22.108919143676758, "learning_rate": 7.489063867016624e-06, "loss": 1.5392, "step": 1455 }, { "epoch": 0.6298088620941361, "grad_norm": 24.86928939819336, "learning_rate": 7.480314960629922e-06, "loss": 1.5499, "step": 1456 }, { "epoch": 0.6302414231257941, "grad_norm": 21.16976547241211, "learning_rate": 7.47156605424322e-06, "loss": 1.588, "step": 1457 }, { "epoch": 0.6306739841574522, "grad_norm": 21.789356231689453, "learning_rate": 7.4628171478565184e-06, "loss": 1.5114, "step": 1458 }, { "epoch": 0.6311065451891102, "grad_norm": 23.476760864257812, "learning_rate": 7.454068241469818e-06, "loss": 1.5496, "step": 1459 }, { "epoch": 0.6315391062207684, "grad_norm": 25.469806671142578, "learning_rate": 7.445319335083115e-06, "loss": 1.6105, "step": 1460 }, { "epoch": 0.6319716672524264, "grad_norm": 25.634347915649414, "learning_rate": 7.4365704286964135e-06, "loss": 1.5353, "step": 1461 }, { "epoch": 0.6324042282840845, "grad_norm": 27.508028030395508, "learning_rate": 7.427821522309712e-06, "loss": 1.5767, "step": 1462 }, { "epoch": 0.6328367893157425, "grad_norm": 23.841432571411133, "learning_rate": 7.419072615923009e-06, "loss": 1.6054, "step": 1463 }, { "epoch": 0.6332693503474006, "grad_norm": 22.372793197631836, "learning_rate": 7.410323709536309e-06, "loss": 1.5289, "step": 1464 }, { "epoch": 0.6337019113790586, "grad_norm": 23.19795036315918, "learning_rate": 7.401574803149607e-06, "loss": 1.5856, "step": 1465 }, { "epoch": 0.6341344724107167, "grad_norm": 22.361799240112305, "learning_rate": 7.392825896762905e-06, "loss": 1.5092, "step": 1466 }, { "epoch": 0.6345670334423748, "grad_norm": 22.069835662841797, "learning_rate": 7.384076990376204e-06, "loss": 1.6036, "step": 1467 }, { "epoch": 0.6349995944740329, "grad_norm": 23.586353302001953, "learning_rate": 7.375328083989501e-06, "loss": 1.518, "step": 1468 }, { "epoch": 0.6354321555056909, "grad_norm": 22.216527938842773, "learning_rate": 7.3665791776028e-06, "loss": 1.5903, "step": 1469 }, { "epoch": 0.635864716537349, "grad_norm": 23.356054306030273, "learning_rate": 7.357830271216099e-06, "loss": 1.4951, "step": 1470 }, { "epoch": 0.636297277569007, "grad_norm": 21.026004791259766, "learning_rate": 7.349081364829397e-06, "loss": 1.618, "step": 1471 }, { "epoch": 0.636729838600665, "grad_norm": 23.06012535095215, "learning_rate": 7.340332458442695e-06, "loss": 1.5405, "step": 1472 }, { "epoch": 0.6371623996323231, "grad_norm": 23.857873916625977, "learning_rate": 7.331583552055994e-06, "loss": 1.5567, "step": 1473 }, { "epoch": 0.6375949606639811, "grad_norm": 23.048160552978516, "learning_rate": 7.322834645669292e-06, "loss": 1.5534, "step": 1474 }, { "epoch": 0.6380275216956393, "grad_norm": 24.181964874267578, "learning_rate": 7.3140857392825906e-06, "loss": 1.5192, "step": 1475 }, { "epoch": 0.6384600827272973, "grad_norm": 22.832210540771484, "learning_rate": 7.305336832895888e-06, "loss": 1.5114, "step": 1476 }, { "epoch": 0.6388926437589554, "grad_norm": 23.764835357666016, "learning_rate": 7.2965879265091864e-06, "loss": 1.4807, "step": 1477 }, { "epoch": 0.6393252047906134, "grad_norm": 24.419448852539062, "learning_rate": 7.287839020122486e-06, "loss": 1.4671, "step": 1478 }, { "epoch": 0.6397577658222715, "grad_norm": 25.90771484375, "learning_rate": 7.279090113735784e-06, "loss": 1.5565, "step": 1479 }, { "epoch": 0.6401903268539295, "grad_norm": 24.24135398864746, "learning_rate": 7.270341207349082e-06, "loss": 1.4725, "step": 1480 }, { "epoch": 0.6406228878855876, "grad_norm": 23.335277557373047, "learning_rate": 7.26159230096238e-06, "loss": 1.5215, "step": 1481 }, { "epoch": 0.6410554489172456, "grad_norm": 23.67999839782715, "learning_rate": 7.252843394575678e-06, "loss": 1.5626, "step": 1482 }, { "epoch": 0.6414880099489038, "grad_norm": 26.6694393157959, "learning_rate": 7.2440944881889774e-06, "loss": 1.4421, "step": 1483 }, { "epoch": 0.6419205709805618, "grad_norm": 23.231159210205078, "learning_rate": 7.235345581802276e-06, "loss": 1.5262, "step": 1484 }, { "epoch": 0.6423531320122199, "grad_norm": 24.250080108642578, "learning_rate": 7.226596675415573e-06, "loss": 1.5133, "step": 1485 }, { "epoch": 0.6427856930438779, "grad_norm": 21.954505920410156, "learning_rate": 7.217847769028872e-06, "loss": 1.5227, "step": 1486 }, { "epoch": 0.643218254075536, "grad_norm": 22.040283203125, "learning_rate": 7.20909886264217e-06, "loss": 1.59, "step": 1487 }, { "epoch": 0.643650815107194, "grad_norm": 23.680068969726562, "learning_rate": 7.200349956255469e-06, "loss": 1.5092, "step": 1488 }, { "epoch": 0.6440833761388521, "grad_norm": 25.096607208251953, "learning_rate": 7.191601049868768e-06, "loss": 1.468, "step": 1489 }, { "epoch": 0.6445159371705101, "grad_norm": 22.989320755004883, "learning_rate": 7.182852143482065e-06, "loss": 1.5895, "step": 1490 }, { "epoch": 0.6449484982021683, "grad_norm": 25.26317024230957, "learning_rate": 7.1741032370953635e-06, "loss": 1.4943, "step": 1491 }, { "epoch": 0.6453810592338263, "grad_norm": 25.946033477783203, "learning_rate": 7.165354330708662e-06, "loss": 1.635, "step": 1492 }, { "epoch": 0.6458136202654843, "grad_norm": 23.764036178588867, "learning_rate": 7.156605424321961e-06, "loss": 1.6082, "step": 1493 }, { "epoch": 0.6462461812971424, "grad_norm": 26.842212677001953, "learning_rate": 7.1478565179352585e-06, "loss": 1.5848, "step": 1494 }, { "epoch": 0.6466787423288004, "grad_norm": 24.90704917907715, "learning_rate": 7.139107611548557e-06, "loss": 1.5782, "step": 1495 }, { "epoch": 0.6471113033604585, "grad_norm": 26.029441833496094, "learning_rate": 7.130358705161855e-06, "loss": 1.5036, "step": 1496 }, { "epoch": 0.6475438643921165, "grad_norm": 25.079856872558594, "learning_rate": 7.1216097987751545e-06, "loss": 1.4376, "step": 1497 }, { "epoch": 0.6479764254237746, "grad_norm": 25.471174240112305, "learning_rate": 7.112860892388452e-06, "loss": 1.3823, "step": 1498 }, { "epoch": 0.6484089864554327, "grad_norm": 21.90082359313965, "learning_rate": 7.10411198600175e-06, "loss": 1.5895, "step": 1499 }, { "epoch": 0.6488415474870908, "grad_norm": 22.75554656982422, "learning_rate": 7.095363079615049e-06, "loss": 1.6006, "step": 1500 }, { "epoch": 0.6492741085187488, "grad_norm": 21.22007179260254, "learning_rate": 7.086614173228347e-06, "loss": 1.5194, "step": 1501 }, { "epoch": 0.6497066695504069, "grad_norm": 22.74795913696289, "learning_rate": 7.077865266841646e-06, "loss": 1.5761, "step": 1502 }, { "epoch": 0.6501392305820649, "grad_norm": 22.385860443115234, "learning_rate": 7.069116360454944e-06, "loss": 1.5468, "step": 1503 }, { "epoch": 0.650571791613723, "grad_norm": 23.7584228515625, "learning_rate": 7.060367454068242e-06, "loss": 1.5242, "step": 1504 }, { "epoch": 0.651004352645381, "grad_norm": 25.09479522705078, "learning_rate": 7.0516185476815405e-06, "loss": 1.5615, "step": 1505 }, { "epoch": 0.6514369136770392, "grad_norm": 24.615421295166016, "learning_rate": 7.042869641294838e-06, "loss": 1.5362, "step": 1506 }, { "epoch": 0.6518694747086972, "grad_norm": 21.421411514282227, "learning_rate": 7.034120734908137e-06, "loss": 1.5744, "step": 1507 }, { "epoch": 0.6523020357403553, "grad_norm": 23.396808624267578, "learning_rate": 7.025371828521436e-06, "loss": 1.5602, "step": 1508 }, { "epoch": 0.6527345967720133, "grad_norm": 22.569921493530273, "learning_rate": 7.016622922134734e-06, "loss": 1.5355, "step": 1509 }, { "epoch": 0.6531671578036714, "grad_norm": 24.40047264099121, "learning_rate": 7.0078740157480315e-06, "loss": 1.5158, "step": 1510 }, { "epoch": 0.6535997188353294, "grad_norm": 24.43023681640625, "learning_rate": 6.99912510936133e-06, "loss": 1.5551, "step": 1511 }, { "epoch": 0.6540322798669875, "grad_norm": 21.310428619384766, "learning_rate": 6.990376202974629e-06, "loss": 1.5114, "step": 1512 }, { "epoch": 0.6544648408986455, "grad_norm": 21.625455856323242, "learning_rate": 6.981627296587927e-06, "loss": 1.5002, "step": 1513 }, { "epoch": 0.6548974019303037, "grad_norm": 24.013227462768555, "learning_rate": 6.972878390201226e-06, "loss": 1.5115, "step": 1514 }, { "epoch": 0.6553299629619617, "grad_norm": 20.982900619506836, "learning_rate": 6.964129483814523e-06, "loss": 1.4118, "step": 1515 }, { "epoch": 0.6557625239936197, "grad_norm": 23.199033737182617, "learning_rate": 6.955380577427822e-06, "loss": 1.5651, "step": 1516 }, { "epoch": 0.6561950850252778, "grad_norm": 21.069751739501953, "learning_rate": 6.946631671041121e-06, "loss": 1.5955, "step": 1517 }, { "epoch": 0.6566276460569358, "grad_norm": 22.66398811340332, "learning_rate": 6.937882764654419e-06, "loss": 1.4432, "step": 1518 }, { "epoch": 0.6570602070885939, "grad_norm": 22.937976837158203, "learning_rate": 6.929133858267717e-06, "loss": 1.5522, "step": 1519 }, { "epoch": 0.6574927681202519, "grad_norm": 22.356679916381836, "learning_rate": 6.920384951881015e-06, "loss": 1.5067, "step": 1520 }, { "epoch": 0.65792532915191, "grad_norm": 23.95052719116211, "learning_rate": 6.911636045494314e-06, "loss": 1.5553, "step": 1521 }, { "epoch": 0.658357890183568, "grad_norm": 24.117918014526367, "learning_rate": 6.902887139107613e-06, "loss": 1.4865, "step": 1522 }, { "epoch": 0.6587904512152262, "grad_norm": 23.363407135009766, "learning_rate": 6.89413823272091e-06, "loss": 1.4588, "step": 1523 }, { "epoch": 0.6592230122468842, "grad_norm": 24.435625076293945, "learning_rate": 6.8853893263342085e-06, "loss": 1.4985, "step": 1524 }, { "epoch": 0.6596555732785423, "grad_norm": 25.14236831665039, "learning_rate": 6.876640419947507e-06, "loss": 1.5323, "step": 1525 }, { "epoch": 0.6600881343102003, "grad_norm": 22.674089431762695, "learning_rate": 6.867891513560806e-06, "loss": 1.5134, "step": 1526 }, { "epoch": 0.6605206953418584, "grad_norm": 24.138879776000977, "learning_rate": 6.859142607174104e-06, "loss": 1.5683, "step": 1527 }, { "epoch": 0.6609532563735164, "grad_norm": 24.909719467163086, "learning_rate": 6.850393700787402e-06, "loss": 1.5579, "step": 1528 }, { "epoch": 0.6613858174051745, "grad_norm": 23.420795440673828, "learning_rate": 6.8416447944007e-06, "loss": 1.4783, "step": 1529 }, { "epoch": 0.6618183784368326, "grad_norm": 22.931936264038086, "learning_rate": 6.832895888013999e-06, "loss": 1.4801, "step": 1530 }, { "epoch": 0.6622509394684907, "grad_norm": 21.55242156982422, "learning_rate": 6.824146981627298e-06, "loss": 1.4999, "step": 1531 }, { "epoch": 0.6626835005001487, "grad_norm": 21.650358200073242, "learning_rate": 6.815398075240595e-06, "loss": 1.5101, "step": 1532 }, { "epoch": 0.6631160615318068, "grad_norm": 23.8084659576416, "learning_rate": 6.806649168853894e-06, "loss": 1.4968, "step": 1533 }, { "epoch": 0.6635486225634648, "grad_norm": 25.13161849975586, "learning_rate": 6.797900262467192e-06, "loss": 1.4169, "step": 1534 }, { "epoch": 0.6639811835951229, "grad_norm": 22.81217384338379, "learning_rate": 6.78915135608049e-06, "loss": 1.4825, "step": 1535 }, { "epoch": 0.6644137446267809, "grad_norm": 25.360429763793945, "learning_rate": 6.78040244969379e-06, "loss": 1.6435, "step": 1536 }, { "epoch": 0.6648463056584389, "grad_norm": 22.05304527282715, "learning_rate": 6.771653543307087e-06, "loss": 1.5476, "step": 1537 }, { "epoch": 0.6652788666900971, "grad_norm": 24.10348892211914, "learning_rate": 6.7629046369203855e-06, "loss": 1.4382, "step": 1538 }, { "epoch": 0.6657114277217551, "grad_norm": 22.268739700317383, "learning_rate": 6.754155730533684e-06, "loss": 1.618, "step": 1539 }, { "epoch": 0.6661439887534132, "grad_norm": 21.813589096069336, "learning_rate": 6.745406824146981e-06, "loss": 1.5424, "step": 1540 }, { "epoch": 0.6665765497850712, "grad_norm": 24.29230308532715, "learning_rate": 6.736657917760281e-06, "loss": 1.5056, "step": 1541 }, { "epoch": 0.6670091108167293, "grad_norm": 21.526540756225586, "learning_rate": 6.727909011373579e-06, "loss": 1.531, "step": 1542 }, { "epoch": 0.6674416718483873, "grad_norm": 22.080156326293945, "learning_rate": 6.719160104986877e-06, "loss": 1.5371, "step": 1543 }, { "epoch": 0.6678742328800454, "grad_norm": 22.51006507873535, "learning_rate": 6.710411198600175e-06, "loss": 1.5069, "step": 1544 }, { "epoch": 0.6683067939117034, "grad_norm": 25.575986862182617, "learning_rate": 6.701662292213474e-06, "loss": 1.511, "step": 1545 }, { "epoch": 0.6687393549433616, "grad_norm": 22.93172836303711, "learning_rate": 6.692913385826772e-06, "loss": 1.501, "step": 1546 }, { "epoch": 0.6691719159750196, "grad_norm": 22.480873107910156, "learning_rate": 6.684164479440071e-06, "loss": 1.5077, "step": 1547 }, { "epoch": 0.6696044770066777, "grad_norm": 22.858264923095703, "learning_rate": 6.675415573053369e-06, "loss": 1.4774, "step": 1548 }, { "epoch": 0.6700370380383357, "grad_norm": 24.259767532348633, "learning_rate": 6.666666666666667e-06, "loss": 1.6461, "step": 1549 }, { "epoch": 0.6704695990699938, "grad_norm": 24.198890686035156, "learning_rate": 6.657917760279966e-06, "loss": 1.5393, "step": 1550 }, { "epoch": 0.6709021601016518, "grad_norm": 24.402128219604492, "learning_rate": 6.649168853893264e-06, "loss": 1.4286, "step": 1551 }, { "epoch": 0.6713347211333099, "grad_norm": 25.59145736694336, "learning_rate": 6.6404199475065626e-06, "loss": 1.4993, "step": 1552 }, { "epoch": 0.671767282164968, "grad_norm": 23.374197006225586, "learning_rate": 6.63167104111986e-06, "loss": 1.5453, "step": 1553 }, { "epoch": 0.6721998431966261, "grad_norm": 26.37010955810547, "learning_rate": 6.6229221347331584e-06, "loss": 1.5741, "step": 1554 }, { "epoch": 0.6726324042282841, "grad_norm": 25.05316162109375, "learning_rate": 6.614173228346458e-06, "loss": 1.539, "step": 1555 }, { "epoch": 0.6730649652599422, "grad_norm": 23.657917022705078, "learning_rate": 6.605424321959756e-06, "loss": 1.5029, "step": 1556 }, { "epoch": 0.6734975262916002, "grad_norm": 22.25530433654785, "learning_rate": 6.5966754155730535e-06, "loss": 1.538, "step": 1557 }, { "epoch": 0.6739300873232583, "grad_norm": 26.930683135986328, "learning_rate": 6.587926509186352e-06, "loss": 1.5289, "step": 1558 }, { "epoch": 0.6743626483549163, "grad_norm": 23.665882110595703, "learning_rate": 6.57917760279965e-06, "loss": 1.5161, "step": 1559 }, { "epoch": 0.6747952093865743, "grad_norm": 22.633747100830078, "learning_rate": 6.5704286964129495e-06, "loss": 1.4837, "step": 1560 }, { "epoch": 0.6752277704182325, "grad_norm": 27.529674530029297, "learning_rate": 6.561679790026248e-06, "loss": 1.5903, "step": 1561 }, { "epoch": 0.6756603314498905, "grad_norm": 23.555315017700195, "learning_rate": 6.552930883639545e-06, "loss": 1.5309, "step": 1562 }, { "epoch": 0.6760928924815486, "grad_norm": 22.861183166503906, "learning_rate": 6.544181977252844e-06, "loss": 1.5248, "step": 1563 }, { "epoch": 0.6765254535132066, "grad_norm": 23.239778518676758, "learning_rate": 6.535433070866142e-06, "loss": 1.4878, "step": 1564 }, { "epoch": 0.6769580145448647, "grad_norm": 24.08327293395996, "learning_rate": 6.526684164479441e-06, "loss": 1.4779, "step": 1565 }, { "epoch": 0.6773905755765227, "grad_norm": 24.711755752563477, "learning_rate": 6.517935258092739e-06, "loss": 1.6168, "step": 1566 }, { "epoch": 0.6778231366081808, "grad_norm": 26.82582664489746, "learning_rate": 6.509186351706037e-06, "loss": 1.4592, "step": 1567 }, { "epoch": 0.6782556976398388, "grad_norm": 25.213279724121094, "learning_rate": 6.5004374453193355e-06, "loss": 1.5868, "step": 1568 }, { "epoch": 0.678688258671497, "grad_norm": 22.981929779052734, "learning_rate": 6.491688538932633e-06, "loss": 1.6304, "step": 1569 }, { "epoch": 0.679120819703155, "grad_norm": 22.704452514648438, "learning_rate": 6.482939632545932e-06, "loss": 1.4529, "step": 1570 }, { "epoch": 0.6795533807348131, "grad_norm": 25.932876586914062, "learning_rate": 6.4741907261592306e-06, "loss": 1.5983, "step": 1571 }, { "epoch": 0.6799859417664711, "grad_norm": 23.21229362487793, "learning_rate": 6.465441819772529e-06, "loss": 1.4824, "step": 1572 }, { "epoch": 0.6804185027981292, "grad_norm": 23.76874351501465, "learning_rate": 6.456692913385827e-06, "loss": 1.4459, "step": 1573 }, { "epoch": 0.6808510638297872, "grad_norm": 24.482866287231445, "learning_rate": 6.4479440069991265e-06, "loss": 1.5083, "step": 1574 }, { "epoch": 0.6812836248614453, "grad_norm": 23.35637855529785, "learning_rate": 6.439195100612424e-06, "loss": 1.5173, "step": 1575 }, { "epoch": 0.6817161858931033, "grad_norm": 22.728713989257812, "learning_rate": 6.430446194225722e-06, "loss": 1.5578, "step": 1576 }, { "epoch": 0.6821487469247615, "grad_norm": 27.757644653320312, "learning_rate": 6.421697287839021e-06, "loss": 1.557, "step": 1577 }, { "epoch": 0.6825813079564195, "grad_norm": 24.904579162597656, "learning_rate": 6.412948381452318e-06, "loss": 1.4613, "step": 1578 }, { "epoch": 0.6830138689880776, "grad_norm": 24.260875701904297, "learning_rate": 6.4041994750656174e-06, "loss": 1.5102, "step": 1579 }, { "epoch": 0.6834464300197356, "grad_norm": 25.12516975402832, "learning_rate": 6.395450568678916e-06, "loss": 1.5472, "step": 1580 }, { "epoch": 0.6838789910513936, "grad_norm": 23.9464054107666, "learning_rate": 6.386701662292214e-06, "loss": 1.5611, "step": 1581 }, { "epoch": 0.6843115520830517, "grad_norm": 22.76926040649414, "learning_rate": 6.3779527559055125e-06, "loss": 1.5379, "step": 1582 }, { "epoch": 0.6847441131147097, "grad_norm": 24.24293327331543, "learning_rate": 6.36920384951881e-06, "loss": 1.5672, "step": 1583 }, { "epoch": 0.6851766741463678, "grad_norm": 24.250621795654297, "learning_rate": 6.360454943132109e-06, "loss": 1.4307, "step": 1584 }, { "epoch": 0.6856092351780259, "grad_norm": 25.41415786743164, "learning_rate": 6.351706036745408e-06, "loss": 1.5174, "step": 1585 }, { "epoch": 0.686041796209684, "grad_norm": 21.934829711914062, "learning_rate": 6.342957130358706e-06, "loss": 1.5329, "step": 1586 }, { "epoch": 0.686474357241342, "grad_norm": 23.609012603759766, "learning_rate": 6.3342082239720035e-06, "loss": 1.5426, "step": 1587 }, { "epoch": 0.6869069182730001, "grad_norm": 25.188135147094727, "learning_rate": 6.325459317585302e-06, "loss": 1.5221, "step": 1588 }, { "epoch": 0.6873394793046581, "grad_norm": 23.130638122558594, "learning_rate": 6.316710411198601e-06, "loss": 1.5202, "step": 1589 }, { "epoch": 0.6877720403363162, "grad_norm": 24.72777557373047, "learning_rate": 6.307961504811899e-06, "loss": 1.5004, "step": 1590 }, { "epoch": 0.6882046013679742, "grad_norm": 23.27838134765625, "learning_rate": 6.299212598425197e-06, "loss": 1.5503, "step": 1591 }, { "epoch": 0.6886371623996324, "grad_norm": 20.87567710876465, "learning_rate": 6.290463692038495e-06, "loss": 1.5856, "step": 1592 }, { "epoch": 0.6890697234312904, "grad_norm": 23.302906036376953, "learning_rate": 6.281714785651794e-06, "loss": 1.524, "step": 1593 }, { "epoch": 0.6895022844629485, "grad_norm": 24.006710052490234, "learning_rate": 6.272965879265093e-06, "loss": 1.4691, "step": 1594 }, { "epoch": 0.6899348454946065, "grad_norm": 24.12818145751953, "learning_rate": 6.264216972878391e-06, "loss": 1.5727, "step": 1595 }, { "epoch": 0.6903674065262646, "grad_norm": 25.554643630981445, "learning_rate": 6.255468066491689e-06, "loss": 1.5754, "step": 1596 }, { "epoch": 0.6907999675579226, "grad_norm": 21.56632423400879, "learning_rate": 6.246719160104987e-06, "loss": 1.5766, "step": 1597 }, { "epoch": 0.6912325285895807, "grad_norm": 26.7813720703125, "learning_rate": 6.237970253718286e-06, "loss": 1.5778, "step": 1598 }, { "epoch": 0.6916650896212387, "grad_norm": 24.2366943359375, "learning_rate": 6.229221347331585e-06, "loss": 1.5162, "step": 1599 }, { "epoch": 0.6920976506528969, "grad_norm": 24.078445434570312, "learning_rate": 6.220472440944882e-06, "loss": 1.4297, "step": 1600 }, { "epoch": 0.6925302116845549, "grad_norm": 26.070178985595703, "learning_rate": 6.2117235345581805e-06, "loss": 1.5393, "step": 1601 }, { "epoch": 0.692962772716213, "grad_norm": 27.011350631713867, "learning_rate": 6.202974628171479e-06, "loss": 1.4795, "step": 1602 }, { "epoch": 0.693395333747871, "grad_norm": 27.4875431060791, "learning_rate": 6.194225721784778e-06, "loss": 1.5485, "step": 1603 }, { "epoch": 0.693827894779529, "grad_norm": 22.1238956451416, "learning_rate": 6.185476815398076e-06, "loss": 1.428, "step": 1604 }, { "epoch": 0.6942604558111871, "grad_norm": 23.28427505493164, "learning_rate": 6.176727909011374e-06, "loss": 1.6064, "step": 1605 }, { "epoch": 0.6946930168428451, "grad_norm": 24.525197982788086, "learning_rate": 6.167979002624672e-06, "loss": 1.5426, "step": 1606 }, { "epoch": 0.6951255778745032, "grad_norm": 26.526762008666992, "learning_rate": 6.159230096237971e-06, "loss": 1.4851, "step": 1607 }, { "epoch": 0.6955581389061612, "grad_norm": 23.86712646484375, "learning_rate": 6.15048118985127e-06, "loss": 1.5635, "step": 1608 }, { "epoch": 0.6959906999378194, "grad_norm": 22.40746307373047, "learning_rate": 6.141732283464567e-06, "loss": 1.5988, "step": 1609 }, { "epoch": 0.6964232609694774, "grad_norm": 22.69624137878418, "learning_rate": 6.132983377077866e-06, "loss": 1.444, "step": 1610 }, { "epoch": 0.6968558220011355, "grad_norm": 22.52001953125, "learning_rate": 6.124234470691164e-06, "loss": 1.4884, "step": 1611 }, { "epoch": 0.6972883830327935, "grad_norm": 25.607025146484375, "learning_rate": 6.115485564304462e-06, "loss": 1.4535, "step": 1612 }, { "epoch": 0.6977209440644516, "grad_norm": 22.519060134887695, "learning_rate": 6.106736657917761e-06, "loss": 1.6537, "step": 1613 }, { "epoch": 0.6981535050961096, "grad_norm": 22.56156349182129, "learning_rate": 6.097987751531059e-06, "loss": 1.5571, "step": 1614 }, { "epoch": 0.6985860661277677, "grad_norm": 22.961685180664062, "learning_rate": 6.0892388451443576e-06, "loss": 1.5163, "step": 1615 }, { "epoch": 0.6990186271594258, "grad_norm": 24.06474494934082, "learning_rate": 6.080489938757655e-06, "loss": 1.5118, "step": 1616 }, { "epoch": 0.6994511881910839, "grad_norm": 23.049427032470703, "learning_rate": 6.0717410323709534e-06, "loss": 1.5018, "step": 1617 }, { "epoch": 0.6998837492227419, "grad_norm": 25.137136459350586, "learning_rate": 6.062992125984253e-06, "loss": 1.5839, "step": 1618 }, { "epoch": 0.7003163102544, "grad_norm": 23.529890060424805, "learning_rate": 6.054243219597551e-06, "loss": 1.5558, "step": 1619 }, { "epoch": 0.700748871286058, "grad_norm": 22.19614601135254, "learning_rate": 6.045494313210849e-06, "loss": 1.5381, "step": 1620 }, { "epoch": 0.7011814323177161, "grad_norm": 26.739168167114258, "learning_rate": 6.036745406824147e-06, "loss": 1.5358, "step": 1621 }, { "epoch": 0.7016139933493741, "grad_norm": 25.52427864074707, "learning_rate": 6.027996500437446e-06, "loss": 1.5421, "step": 1622 }, { "epoch": 0.7020465543810323, "grad_norm": 23.286373138427734, "learning_rate": 6.0192475940507444e-06, "loss": 1.4983, "step": 1623 }, { "epoch": 0.7024791154126903, "grad_norm": 25.0222225189209, "learning_rate": 6.010498687664043e-06, "loss": 1.4992, "step": 1624 }, { "epoch": 0.7029116764443483, "grad_norm": 26.569257736206055, "learning_rate": 6.00174978127734e-06, "loss": 1.5621, "step": 1625 }, { "epoch": 0.7033442374760064, "grad_norm": 23.545291900634766, "learning_rate": 5.993000874890639e-06, "loss": 1.5807, "step": 1626 }, { "epoch": 0.7037767985076644, "grad_norm": 25.80244255065918, "learning_rate": 5.984251968503938e-06, "loss": 1.5377, "step": 1627 }, { "epoch": 0.7042093595393225, "grad_norm": 24.305273056030273, "learning_rate": 5.975503062117236e-06, "loss": 1.5414, "step": 1628 }, { "epoch": 0.7046419205709805, "grad_norm": 24.8024845123291, "learning_rate": 5.966754155730535e-06, "loss": 1.5594, "step": 1629 }, { "epoch": 0.7050744816026386, "grad_norm": 22.716646194458008, "learning_rate": 5.958005249343832e-06, "loss": 1.5006, "step": 1630 }, { "epoch": 0.7055070426342966, "grad_norm": 25.36906623840332, "learning_rate": 5.9492563429571305e-06, "loss": 1.5169, "step": 1631 }, { "epoch": 0.7059396036659548, "grad_norm": 21.435344696044922, "learning_rate": 5.94050743657043e-06, "loss": 1.4967, "step": 1632 }, { "epoch": 0.7063721646976128, "grad_norm": 25.601346969604492, "learning_rate": 5.931758530183728e-06, "loss": 1.4927, "step": 1633 }, { "epoch": 0.7068047257292709, "grad_norm": 23.794708251953125, "learning_rate": 5.9230096237970256e-06, "loss": 1.4312, "step": 1634 }, { "epoch": 0.7072372867609289, "grad_norm": 26.20401954650879, "learning_rate": 5.914260717410324e-06, "loss": 1.488, "step": 1635 }, { "epoch": 0.707669847792587, "grad_norm": 23.398365020751953, "learning_rate": 5.905511811023622e-06, "loss": 1.4526, "step": 1636 }, { "epoch": 0.708102408824245, "grad_norm": 24.44811248779297, "learning_rate": 5.8967629046369215e-06, "loss": 1.5993, "step": 1637 }, { "epoch": 0.7085349698559031, "grad_norm": 24.23762321472168, "learning_rate": 5.888013998250219e-06, "loss": 1.5917, "step": 1638 }, { "epoch": 0.7089675308875611, "grad_norm": 27.343517303466797, "learning_rate": 5.879265091863517e-06, "loss": 1.4431, "step": 1639 }, { "epoch": 0.7094000919192193, "grad_norm": 26.040502548217773, "learning_rate": 5.870516185476816e-06, "loss": 1.5874, "step": 1640 }, { "epoch": 0.7098326529508773, "grad_norm": 25.721101760864258, "learning_rate": 5.861767279090114e-06, "loss": 1.5325, "step": 1641 }, { "epoch": 0.7102652139825354, "grad_norm": 24.612388610839844, "learning_rate": 5.853018372703413e-06, "loss": 1.4986, "step": 1642 }, { "epoch": 0.7106977750141934, "grad_norm": 24.61174964904785, "learning_rate": 5.844269466316711e-06, "loss": 1.5429, "step": 1643 }, { "epoch": 0.7111303360458515, "grad_norm": 31.583274841308594, "learning_rate": 5.835520559930009e-06, "loss": 1.5787, "step": 1644 }, { "epoch": 0.7115628970775095, "grad_norm": 23.573389053344727, "learning_rate": 5.8267716535433075e-06, "loss": 1.5006, "step": 1645 }, { "epoch": 0.7119954581091676, "grad_norm": 28.629196166992188, "learning_rate": 5.818022747156607e-06, "loss": 1.5066, "step": 1646 }, { "epoch": 0.7124280191408257, "grad_norm": 24.07172966003418, "learning_rate": 5.809273840769904e-06, "loss": 1.5167, "step": 1647 }, { "epoch": 0.7128605801724837, "grad_norm": 24.85234260559082, "learning_rate": 5.800524934383203e-06, "loss": 1.4958, "step": 1648 }, { "epoch": 0.7132931412041418, "grad_norm": 24.3563232421875, "learning_rate": 5.791776027996501e-06, "loss": 1.5272, "step": 1649 }, { "epoch": 0.7137257022357998, "grad_norm": 24.525150299072266, "learning_rate": 5.7830271216097985e-06, "loss": 1.4952, "step": 1650 }, { "epoch": 0.7141582632674579, "grad_norm": 26.782798767089844, "learning_rate": 5.774278215223098e-06, "loss": 1.5519, "step": 1651 }, { "epoch": 0.7145908242991159, "grad_norm": 22.02875518798828, "learning_rate": 5.765529308836396e-06, "loss": 1.5515, "step": 1652 }, { "epoch": 0.715023385330774, "grad_norm": 24.18636703491211, "learning_rate": 5.756780402449694e-06, "loss": 1.6241, "step": 1653 }, { "epoch": 0.715455946362432, "grad_norm": 20.57954216003418, "learning_rate": 5.748031496062993e-06, "loss": 1.4652, "step": 1654 }, { "epoch": 0.7158885073940902, "grad_norm": 24.09521484375, "learning_rate": 5.73928258967629e-06, "loss": 1.3606, "step": 1655 }, { "epoch": 0.7163210684257482, "grad_norm": 26.569869995117188, "learning_rate": 5.7305336832895895e-06, "loss": 1.5271, "step": 1656 }, { "epoch": 0.7167536294574063, "grad_norm": 24.32697296142578, "learning_rate": 5.721784776902888e-06, "loss": 1.5423, "step": 1657 }, { "epoch": 0.7171861904890643, "grad_norm": 24.233909606933594, "learning_rate": 5.713035870516186e-06, "loss": 1.6511, "step": 1658 }, { "epoch": 0.7176187515207224, "grad_norm": 26.0461483001709, "learning_rate": 5.704286964129484e-06, "loss": 1.4889, "step": 1659 }, { "epoch": 0.7180513125523804, "grad_norm": 24.910232543945312, "learning_rate": 5.695538057742782e-06, "loss": 1.5006, "step": 1660 }, { "epoch": 0.7184838735840385, "grad_norm": 25.941944122314453, "learning_rate": 5.686789151356081e-06, "loss": 1.4548, "step": 1661 }, { "epoch": 0.7189164346156965, "grad_norm": 21.430110931396484, "learning_rate": 5.67804024496938e-06, "loss": 1.4481, "step": 1662 }, { "epoch": 0.7193489956473547, "grad_norm": 27.890432357788086, "learning_rate": 5.669291338582677e-06, "loss": 1.4916, "step": 1663 }, { "epoch": 0.7197815566790127, "grad_norm": 25.04297637939453, "learning_rate": 5.6605424321959755e-06, "loss": 1.4469, "step": 1664 }, { "epoch": 0.7202141177106708, "grad_norm": 24.669153213500977, "learning_rate": 5.651793525809274e-06, "loss": 1.4939, "step": 1665 }, { "epoch": 0.7206466787423288, "grad_norm": 27.076704025268555, "learning_rate": 5.643044619422573e-06, "loss": 1.569, "step": 1666 }, { "epoch": 0.7210792397739869, "grad_norm": 22.596973419189453, "learning_rate": 5.6342957130358714e-06, "loss": 1.5492, "step": 1667 }, { "epoch": 0.7215118008056449, "grad_norm": 25.775602340698242, "learning_rate": 5.625546806649169e-06, "loss": 1.6141, "step": 1668 }, { "epoch": 0.7219443618373029, "grad_norm": 23.130680084228516, "learning_rate": 5.616797900262467e-06, "loss": 1.5137, "step": 1669 }, { "epoch": 0.722376922868961, "grad_norm": 32.87105941772461, "learning_rate": 5.6080489938757665e-06, "loss": 1.5107, "step": 1670 }, { "epoch": 0.722809483900619, "grad_norm": 22.43234634399414, "learning_rate": 5.599300087489065e-06, "loss": 1.5348, "step": 1671 }, { "epoch": 0.7232420449322772, "grad_norm": 24.693084716796875, "learning_rate": 5.590551181102362e-06, "loss": 1.5212, "step": 1672 }, { "epoch": 0.7236746059639352, "grad_norm": 27.20313835144043, "learning_rate": 5.581802274715661e-06, "loss": 1.5393, "step": 1673 }, { "epoch": 0.7241071669955933, "grad_norm": 23.293119430541992, "learning_rate": 5.573053368328959e-06, "loss": 1.4915, "step": 1674 }, { "epoch": 0.7245397280272513, "grad_norm": 26.227306365966797, "learning_rate": 5.564304461942258e-06, "loss": 1.513, "step": 1675 }, { "epoch": 0.7249722890589094, "grad_norm": 24.33916473388672, "learning_rate": 5.555555555555557e-06, "loss": 1.4589, "step": 1676 }, { "epoch": 0.7254048500905674, "grad_norm": 24.176958084106445, "learning_rate": 5.546806649168854e-06, "loss": 1.5841, "step": 1677 }, { "epoch": 0.7258374111222256, "grad_norm": 22.859975814819336, "learning_rate": 5.5380577427821525e-06, "loss": 1.5555, "step": 1678 }, { "epoch": 0.7262699721538836, "grad_norm": 24.679033279418945, "learning_rate": 5.529308836395451e-06, "loss": 1.544, "step": 1679 }, { "epoch": 0.7267025331855417, "grad_norm": 23.848819732666016, "learning_rate": 5.52055993000875e-06, "loss": 1.4953, "step": 1680 }, { "epoch": 0.7271350942171997, "grad_norm": 23.183744430541992, "learning_rate": 5.511811023622048e-06, "loss": 1.55, "step": 1681 }, { "epoch": 0.7275676552488578, "grad_norm": 25.85765838623047, "learning_rate": 5.503062117235346e-06, "loss": 1.543, "step": 1682 }, { "epoch": 0.7280002162805158, "grad_norm": 25.841501235961914, "learning_rate": 5.494313210848644e-06, "loss": 1.5411, "step": 1683 }, { "epoch": 0.7284327773121739, "grad_norm": 25.476608276367188, "learning_rate": 5.485564304461942e-06, "loss": 1.502, "step": 1684 }, { "epoch": 0.7288653383438319, "grad_norm": 24.201326370239258, "learning_rate": 5.476815398075241e-06, "loss": 1.5492, "step": 1685 }, { "epoch": 0.72929789937549, "grad_norm": 26.180383682250977, "learning_rate": 5.468066491688539e-06, "loss": 1.5663, "step": 1686 }, { "epoch": 0.7297304604071481, "grad_norm": 23.471723556518555, "learning_rate": 5.459317585301838e-06, "loss": 1.4523, "step": 1687 }, { "epoch": 0.7301630214388062, "grad_norm": 28.88964080810547, "learning_rate": 5.450568678915136e-06, "loss": 1.5238, "step": 1688 }, { "epoch": 0.7305955824704642, "grad_norm": 24.187883377075195, "learning_rate": 5.441819772528434e-06, "loss": 1.6256, "step": 1689 }, { "epoch": 0.7310281435021223, "grad_norm": 23.76744270324707, "learning_rate": 5.433070866141733e-06, "loss": 1.5351, "step": 1690 }, { "epoch": 0.7314607045337803, "grad_norm": 25.365325927734375, "learning_rate": 5.424321959755031e-06, "loss": 1.4795, "step": 1691 }, { "epoch": 0.7318932655654383, "grad_norm": 27.68093490600586, "learning_rate": 5.41557305336833e-06, "loss": 1.6068, "step": 1692 }, { "epoch": 0.7323258265970964, "grad_norm": 26.40992546081543, "learning_rate": 5.406824146981627e-06, "loss": 1.497, "step": 1693 }, { "epoch": 0.7327583876287544, "grad_norm": 26.072214126586914, "learning_rate": 5.398075240594926e-06, "loss": 1.5387, "step": 1694 }, { "epoch": 0.7331909486604126, "grad_norm": 22.766809463500977, "learning_rate": 5.389326334208225e-06, "loss": 1.6233, "step": 1695 }, { "epoch": 0.7336235096920706, "grad_norm": 24.374536514282227, "learning_rate": 5.380577427821523e-06, "loss": 1.5207, "step": 1696 }, { "epoch": 0.7340560707237287, "grad_norm": 26.02762794494629, "learning_rate": 5.3718285214348205e-06, "loss": 1.5178, "step": 1697 }, { "epoch": 0.7344886317553867, "grad_norm": 24.251422882080078, "learning_rate": 5.363079615048119e-06, "loss": 1.5194, "step": 1698 }, { "epoch": 0.7349211927870448, "grad_norm": 28.283960342407227, "learning_rate": 5.354330708661418e-06, "loss": 1.5407, "step": 1699 }, { "epoch": 0.7353537538187028, "grad_norm": 24.403696060180664, "learning_rate": 5.3455818022747165e-06, "loss": 1.5313, "step": 1700 }, { "epoch": 0.735786314850361, "grad_norm": 25.62765884399414, "learning_rate": 5.336832895888015e-06, "loss": 1.5413, "step": 1701 }, { "epoch": 0.736218875882019, "grad_norm": 26.185192108154297, "learning_rate": 5.328083989501312e-06, "loss": 1.4735, "step": 1702 }, { "epoch": 0.7366514369136771, "grad_norm": 22.629499435424805, "learning_rate": 5.319335083114611e-06, "loss": 1.4404, "step": 1703 }, { "epoch": 0.7370839979453351, "grad_norm": 21.76143455505371, "learning_rate": 5.31058617672791e-06, "loss": 1.5389, "step": 1704 }, { "epoch": 0.7375165589769932, "grad_norm": 24.350290298461914, "learning_rate": 5.301837270341208e-06, "loss": 1.5272, "step": 1705 }, { "epoch": 0.7379491200086512, "grad_norm": 25.934980392456055, "learning_rate": 5.293088363954506e-06, "loss": 1.5434, "step": 1706 }, { "epoch": 0.7383816810403093, "grad_norm": 26.150161743164062, "learning_rate": 5.284339457567804e-06, "loss": 1.4915, "step": 1707 }, { "epoch": 0.7388142420719673, "grad_norm": 26.145160675048828, "learning_rate": 5.2755905511811025e-06, "loss": 1.4885, "step": 1708 }, { "epoch": 0.7392468031036254, "grad_norm": 27.158536911010742, "learning_rate": 5.266841644794402e-06, "loss": 1.4621, "step": 1709 }, { "epoch": 0.7396793641352835, "grad_norm": 27.394515991210938, "learning_rate": 5.258092738407699e-06, "loss": 1.5549, "step": 1710 }, { "epoch": 0.7401119251669416, "grad_norm": 24.41299819946289, "learning_rate": 5.2493438320209976e-06, "loss": 1.4812, "step": 1711 }, { "epoch": 0.7405444861985996, "grad_norm": 26.23542022705078, "learning_rate": 5.240594925634296e-06, "loss": 1.452, "step": 1712 }, { "epoch": 0.7409770472302576, "grad_norm": 23.603107452392578, "learning_rate": 5.231846019247594e-06, "loss": 1.5045, "step": 1713 }, { "epoch": 0.7414096082619157, "grad_norm": 25.918743133544922, "learning_rate": 5.2230971128608935e-06, "loss": 1.5866, "step": 1714 }, { "epoch": 0.7418421692935737, "grad_norm": 23.324562072753906, "learning_rate": 5.214348206474191e-06, "loss": 1.4572, "step": 1715 }, { "epoch": 0.7422747303252318, "grad_norm": 23.02033233642578, "learning_rate": 5.205599300087489e-06, "loss": 1.4901, "step": 1716 }, { "epoch": 0.7427072913568898, "grad_norm": 25.430767059326172, "learning_rate": 5.196850393700788e-06, "loss": 1.4732, "step": 1717 }, { "epoch": 0.743139852388548, "grad_norm": 25.643844604492188, "learning_rate": 5.188101487314087e-06, "loss": 1.5468, "step": 1718 }, { "epoch": 0.743572413420206, "grad_norm": 22.182497024536133, "learning_rate": 5.1793525809273845e-06, "loss": 1.5145, "step": 1719 }, { "epoch": 0.7440049744518641, "grad_norm": 25.15062713623047, "learning_rate": 5.170603674540683e-06, "loss": 1.5308, "step": 1720 }, { "epoch": 0.7444375354835221, "grad_norm": 27.65828514099121, "learning_rate": 5.161854768153981e-06, "loss": 1.5002, "step": 1721 }, { "epoch": 0.7448700965151802, "grad_norm": 23.610389709472656, "learning_rate": 5.1531058617672795e-06, "loss": 1.593, "step": 1722 }, { "epoch": 0.7453026575468382, "grad_norm": 22.645122528076172, "learning_rate": 5.144356955380579e-06, "loss": 1.5042, "step": 1723 }, { "epoch": 0.7457352185784963, "grad_norm": 26.383073806762695, "learning_rate": 5.135608048993876e-06, "loss": 1.5823, "step": 1724 }, { "epoch": 0.7461677796101543, "grad_norm": 24.08680534362793, "learning_rate": 5.126859142607175e-06, "loss": 1.4827, "step": 1725 }, { "epoch": 0.7466003406418125, "grad_norm": 28.29230308532715, "learning_rate": 5.118110236220473e-06, "loss": 1.5249, "step": 1726 }, { "epoch": 0.7470329016734705, "grad_norm": 24.57362937927246, "learning_rate": 5.1093613298337705e-06, "loss": 1.4491, "step": 1727 }, { "epoch": 0.7474654627051286, "grad_norm": 23.962614059448242, "learning_rate": 5.10061242344707e-06, "loss": 1.5894, "step": 1728 }, { "epoch": 0.7478980237367866, "grad_norm": 23.7990779876709, "learning_rate": 5.091863517060368e-06, "loss": 1.5585, "step": 1729 }, { "epoch": 0.7483305847684447, "grad_norm": 24.12856674194336, "learning_rate": 5.083114610673666e-06, "loss": 1.5773, "step": 1730 }, { "epoch": 0.7487631458001027, "grad_norm": 23.70834732055664, "learning_rate": 5.074365704286964e-06, "loss": 1.4753, "step": 1731 }, { "epoch": 0.7491957068317608, "grad_norm": 23.08877182006836, "learning_rate": 5.065616797900262e-06, "loss": 1.6544, "step": 1732 }, { "epoch": 0.7496282678634189, "grad_norm": 26.548851013183594, "learning_rate": 5.0568678915135615e-06, "loss": 1.5633, "step": 1733 }, { "epoch": 0.750060828895077, "grad_norm": 24.939672470092773, "learning_rate": 5.04811898512686e-06, "loss": 1.5656, "step": 1734 }, { "epoch": 0.750493389926735, "grad_norm": 23.20163345336914, "learning_rate": 5.039370078740158e-06, "loss": 1.5713, "step": 1735 }, { "epoch": 0.750925950958393, "grad_norm": 22.919689178466797, "learning_rate": 5.030621172353456e-06, "loss": 1.6136, "step": 1736 }, { "epoch": 0.7513585119900511, "grad_norm": 24.276479721069336, "learning_rate": 5.021872265966754e-06, "loss": 1.5749, "step": 1737 }, { "epoch": 0.7517910730217091, "grad_norm": 23.846363067626953, "learning_rate": 5.013123359580053e-06, "loss": 1.4735, "step": 1738 }, { "epoch": 0.7522236340533672, "grad_norm": 25.617586135864258, "learning_rate": 5.004374453193352e-06, "loss": 1.5511, "step": 1739 }, { "epoch": 0.7526561950850252, "grad_norm": 22.6473445892334, "learning_rate": 4.995625546806649e-06, "loss": 1.504, "step": 1740 }, { "epoch": 0.7530887561166834, "grad_norm": 24.413610458374023, "learning_rate": 4.986876640419948e-06, "loss": 1.5588, "step": 1741 }, { "epoch": 0.7535213171483414, "grad_norm": 24.66851806640625, "learning_rate": 4.978127734033246e-06, "loss": 1.5151, "step": 1742 }, { "epoch": 0.7539538781799995, "grad_norm": 23.771665573120117, "learning_rate": 4.969378827646544e-06, "loss": 1.4847, "step": 1743 }, { "epoch": 0.7543864392116575, "grad_norm": 24.447561264038086, "learning_rate": 4.960629921259843e-06, "loss": 1.55, "step": 1744 }, { "epoch": 0.7548190002433156, "grad_norm": 22.75802230834961, "learning_rate": 4.951881014873141e-06, "loss": 1.5186, "step": 1745 }, { "epoch": 0.7552515612749736, "grad_norm": 26.083370208740234, "learning_rate": 4.94313210848644e-06, "loss": 1.4842, "step": 1746 }, { "epoch": 0.7556841223066317, "grad_norm": 29.3764705657959, "learning_rate": 4.934383202099738e-06, "loss": 1.4959, "step": 1747 }, { "epoch": 0.7561166833382897, "grad_norm": 21.326786041259766, "learning_rate": 4.925634295713037e-06, "loss": 1.5026, "step": 1748 }, { "epoch": 0.7565492443699479, "grad_norm": 23.675289154052734, "learning_rate": 4.916885389326334e-06, "loss": 1.5059, "step": 1749 }, { "epoch": 0.7569818054016059, "grad_norm": 25.60411834716797, "learning_rate": 4.908136482939633e-06, "loss": 1.5985, "step": 1750 }, { "epoch": 0.757414366433264, "grad_norm": 24.007932662963867, "learning_rate": 4.899387576552931e-06, "loss": 1.4974, "step": 1751 }, { "epoch": 0.757846927464922, "grad_norm": 28.738752365112305, "learning_rate": 4.8906386701662295e-06, "loss": 1.4758, "step": 1752 }, { "epoch": 0.7582794884965801, "grad_norm": 24.201730728149414, "learning_rate": 4.881889763779528e-06, "loss": 1.5549, "step": 1753 }, { "epoch": 0.7587120495282381, "grad_norm": 24.409114837646484, "learning_rate": 4.873140857392826e-06, "loss": 1.5193, "step": 1754 }, { "epoch": 0.7591446105598962, "grad_norm": 22.474170684814453, "learning_rate": 4.8643919510061246e-06, "loss": 1.6365, "step": 1755 }, { "epoch": 0.7595771715915542, "grad_norm": 22.524370193481445, "learning_rate": 4.855643044619423e-06, "loss": 1.6074, "step": 1756 }, { "epoch": 0.7600097326232123, "grad_norm": 26.538591384887695, "learning_rate": 4.846894138232721e-06, "loss": 1.5673, "step": 1757 }, { "epoch": 0.7604422936548704, "grad_norm": 25.404539108276367, "learning_rate": 4.83814523184602e-06, "loss": 1.5393, "step": 1758 }, { "epoch": 0.7608748546865284, "grad_norm": 23.63199806213379, "learning_rate": 4.829396325459318e-06, "loss": 1.5652, "step": 1759 }, { "epoch": 0.7613074157181865, "grad_norm": 26.102270126342773, "learning_rate": 4.820647419072616e-06, "loss": 1.5968, "step": 1760 }, { "epoch": 0.7617399767498445, "grad_norm": 27.475393295288086, "learning_rate": 4.811898512685915e-06, "loss": 1.5657, "step": 1761 }, { "epoch": 0.7621725377815026, "grad_norm": 24.71074676513672, "learning_rate": 4.803149606299213e-06, "loss": 1.5442, "step": 1762 }, { "epoch": 0.7626050988131606, "grad_norm": 25.49890899658203, "learning_rate": 4.7944006999125114e-06, "loss": 1.4888, "step": 1763 }, { "epoch": 0.7630376598448187, "grad_norm": 23.629859924316406, "learning_rate": 4.78565179352581e-06, "loss": 1.5163, "step": 1764 }, { "epoch": 0.7634702208764768, "grad_norm": 24.619083404541016, "learning_rate": 4.776902887139108e-06, "loss": 1.5097, "step": 1765 }, { "epoch": 0.7639027819081349, "grad_norm": 21.8817138671875, "learning_rate": 4.7681539807524065e-06, "loss": 1.5555, "step": 1766 }, { "epoch": 0.7643353429397929, "grad_norm": 23.76637840270996, "learning_rate": 4.759405074365704e-06, "loss": 1.4962, "step": 1767 }, { "epoch": 0.764767903971451, "grad_norm": 24.75080108642578, "learning_rate": 4.750656167979003e-06, "loss": 1.5414, "step": 1768 }, { "epoch": 0.765200465003109, "grad_norm": 24.92739486694336, "learning_rate": 4.741907261592302e-06, "loss": 1.5096, "step": 1769 }, { "epoch": 0.7656330260347671, "grad_norm": 24.84958839416504, "learning_rate": 4.7331583552056e-06, "loss": 1.5318, "step": 1770 }, { "epoch": 0.7660655870664251, "grad_norm": 24.568798065185547, "learning_rate": 4.724409448818898e-06, "loss": 1.5618, "step": 1771 }, { "epoch": 0.7664981480980833, "grad_norm": 23.898578643798828, "learning_rate": 4.715660542432197e-06, "loss": 1.58, "step": 1772 }, { "epoch": 0.7669307091297413, "grad_norm": 24.494558334350586, "learning_rate": 4.706911636045495e-06, "loss": 1.4997, "step": 1773 }, { "epoch": 0.7673632701613994, "grad_norm": 21.843421936035156, "learning_rate": 4.6981627296587926e-06, "loss": 1.5825, "step": 1774 }, { "epoch": 0.7677958311930574, "grad_norm": 25.82838249206543, "learning_rate": 4.689413823272092e-06, "loss": 1.5319, "step": 1775 }, { "epoch": 0.7682283922247155, "grad_norm": 24.003957748413086, "learning_rate": 4.680664916885389e-06, "loss": 1.4995, "step": 1776 }, { "epoch": 0.7686609532563735, "grad_norm": 24.655353546142578, "learning_rate": 4.6719160104986885e-06, "loss": 1.456, "step": 1777 }, { "epoch": 0.7690935142880316, "grad_norm": 24.714584350585938, "learning_rate": 4.663167104111986e-06, "loss": 1.4928, "step": 1778 }, { "epoch": 0.7695260753196896, "grad_norm": 26.724472045898438, "learning_rate": 4.654418197725284e-06, "loss": 1.5625, "step": 1779 }, { "epoch": 0.7699586363513476, "grad_norm": 24.749422073364258, "learning_rate": 4.645669291338583e-06, "loss": 1.5352, "step": 1780 }, { "epoch": 0.7703911973830058, "grad_norm": 25.769168853759766, "learning_rate": 4.636920384951881e-06, "loss": 1.525, "step": 1781 }, { "epoch": 0.7708237584146638, "grad_norm": 27.238039016723633, "learning_rate": 4.62817147856518e-06, "loss": 1.4711, "step": 1782 }, { "epoch": 0.7712563194463219, "grad_norm": 28.644994735717773, "learning_rate": 4.619422572178478e-06, "loss": 1.5006, "step": 1783 }, { "epoch": 0.7716888804779799, "grad_norm": 27.155855178833008, "learning_rate": 4.610673665791777e-06, "loss": 1.5224, "step": 1784 }, { "epoch": 0.772121441509638, "grad_norm": 24.1883487701416, "learning_rate": 4.6019247594050745e-06, "loss": 1.5639, "step": 1785 }, { "epoch": 0.772554002541296, "grad_norm": 23.588848114013672, "learning_rate": 4.593175853018373e-06, "loss": 1.4729, "step": 1786 }, { "epoch": 0.7729865635729541, "grad_norm": 23.275426864624023, "learning_rate": 4.584426946631671e-06, "loss": 1.5298, "step": 1787 }, { "epoch": 0.7734191246046122, "grad_norm": 24.45703887939453, "learning_rate": 4.57567804024497e-06, "loss": 1.5589, "step": 1788 }, { "epoch": 0.7738516856362703, "grad_norm": 25.52694320678711, "learning_rate": 4.566929133858268e-06, "loss": 1.4867, "step": 1789 }, { "epoch": 0.7742842466679283, "grad_norm": 24.09499740600586, "learning_rate": 4.558180227471566e-06, "loss": 1.5231, "step": 1790 }, { "epoch": 0.7747168076995864, "grad_norm": 24.523792266845703, "learning_rate": 4.549431321084865e-06, "loss": 1.5179, "step": 1791 }, { "epoch": 0.7751493687312444, "grad_norm": 23.561460494995117, "learning_rate": 4.540682414698163e-06, "loss": 1.5205, "step": 1792 }, { "epoch": 0.7755819297629025, "grad_norm": 26.378007888793945, "learning_rate": 4.531933508311461e-06, "loss": 1.5417, "step": 1793 }, { "epoch": 0.7760144907945605, "grad_norm": 23.561283111572266, "learning_rate": 4.52318460192476e-06, "loss": 1.589, "step": 1794 }, { "epoch": 0.7764470518262186, "grad_norm": 27.071557998657227, "learning_rate": 4.514435695538058e-06, "loss": 1.4608, "step": 1795 }, { "epoch": 0.7768796128578767, "grad_norm": 26.57014274597168, "learning_rate": 4.5056867891513565e-06, "loss": 1.4033, "step": 1796 }, { "epoch": 0.7773121738895348, "grad_norm": 24.375045776367188, "learning_rate": 4.496937882764655e-06, "loss": 1.5522, "step": 1797 }, { "epoch": 0.7777447349211928, "grad_norm": 25.525135040283203, "learning_rate": 4.488188976377953e-06, "loss": 1.6045, "step": 1798 }, { "epoch": 0.7781772959528509, "grad_norm": 28.482580184936523, "learning_rate": 4.4794400699912516e-06, "loss": 1.5458, "step": 1799 }, { "epoch": 0.7786098569845089, "grad_norm": 25.094606399536133, "learning_rate": 4.47069116360455e-06, "loss": 1.4793, "step": 1800 }, { "epoch": 0.7790424180161669, "grad_norm": 26.066743850708008, "learning_rate": 4.461942257217848e-06, "loss": 1.4703, "step": 1801 }, { "epoch": 0.779474979047825, "grad_norm": 25.871166229248047, "learning_rate": 4.453193350831147e-06, "loss": 1.5241, "step": 1802 }, { "epoch": 0.779907540079483, "grad_norm": 24.948637008666992, "learning_rate": 4.444444444444444e-06, "loss": 1.517, "step": 1803 }, { "epoch": 0.7803401011111412, "grad_norm": 25.19872283935547, "learning_rate": 4.435695538057743e-06, "loss": 1.552, "step": 1804 }, { "epoch": 0.7807726621427992, "grad_norm": 24.39336395263672, "learning_rate": 4.426946631671042e-06, "loss": 1.3931, "step": 1805 }, { "epoch": 0.7812052231744573, "grad_norm": 24.73307228088379, "learning_rate": 4.41819772528434e-06, "loss": 1.5159, "step": 1806 }, { "epoch": 0.7816377842061153, "grad_norm": 24.064722061157227, "learning_rate": 4.4094488188976384e-06, "loss": 1.5075, "step": 1807 }, { "epoch": 0.7820703452377734, "grad_norm": 24.808164596557617, "learning_rate": 4.400699912510937e-06, "loss": 1.4825, "step": 1808 }, { "epoch": 0.7825029062694314, "grad_norm": 27.774396896362305, "learning_rate": 4.391951006124235e-06, "loss": 1.5035, "step": 1809 }, { "epoch": 0.7829354673010895, "grad_norm": 27.53242301940918, "learning_rate": 4.383202099737533e-06, "loss": 1.5277, "step": 1810 }, { "epoch": 0.7833680283327475, "grad_norm": 24.68132781982422, "learning_rate": 4.374453193350832e-06, "loss": 1.4304, "step": 1811 }, { "epoch": 0.7838005893644057, "grad_norm": 26.517210006713867, "learning_rate": 4.365704286964129e-06, "loss": 1.5441, "step": 1812 }, { "epoch": 0.7842331503960637, "grad_norm": 25.836271286010742, "learning_rate": 4.356955380577429e-06, "loss": 1.46, "step": 1813 }, { "epoch": 0.7846657114277218, "grad_norm": 22.400842666625977, "learning_rate": 4.348206474190726e-06, "loss": 1.5226, "step": 1814 }, { "epoch": 0.7850982724593798, "grad_norm": 23.194393157958984, "learning_rate": 4.3394575678040245e-06, "loss": 1.5745, "step": 1815 }, { "epoch": 0.7855308334910379, "grad_norm": 23.99626922607422, "learning_rate": 4.330708661417324e-06, "loss": 1.4476, "step": 1816 }, { "epoch": 0.7859633945226959, "grad_norm": 25.947378158569336, "learning_rate": 4.321959755030621e-06, "loss": 1.5264, "step": 1817 }, { "epoch": 0.786395955554354, "grad_norm": 26.823701858520508, "learning_rate": 4.31321084864392e-06, "loss": 1.6003, "step": 1818 }, { "epoch": 0.786828516586012, "grad_norm": 26.935449600219727, "learning_rate": 4.304461942257218e-06, "loss": 1.5052, "step": 1819 }, { "epoch": 0.7872610776176702, "grad_norm": 24.033824920654297, "learning_rate": 4.295713035870517e-06, "loss": 1.5205, "step": 1820 }, { "epoch": 0.7876936386493282, "grad_norm": 21.987449645996094, "learning_rate": 4.286964129483815e-06, "loss": 1.461, "step": 1821 }, { "epoch": 0.7881261996809863, "grad_norm": 28.15187644958496, "learning_rate": 4.278215223097113e-06, "loss": 1.4807, "step": 1822 }, { "epoch": 0.7885587607126443, "grad_norm": 23.641508102416992, "learning_rate": 4.269466316710411e-06, "loss": 1.5484, "step": 1823 }, { "epoch": 0.7889913217443023, "grad_norm": 24.58562469482422, "learning_rate": 4.26071741032371e-06, "loss": 1.4434, "step": 1824 }, { "epoch": 0.7894238827759604, "grad_norm": 23.53512954711914, "learning_rate": 4.251968503937008e-06, "loss": 1.5811, "step": 1825 }, { "epoch": 0.7898564438076184, "grad_norm": 23.88713264465332, "learning_rate": 4.2432195975503064e-06, "loss": 1.5309, "step": 1826 }, { "epoch": 0.7902890048392766, "grad_norm": 25.259674072265625, "learning_rate": 4.234470691163605e-06, "loss": 1.6166, "step": 1827 }, { "epoch": 0.7907215658709346, "grad_norm": 22.053977966308594, "learning_rate": 4.225721784776903e-06, "loss": 1.5886, "step": 1828 }, { "epoch": 0.7911541269025927, "grad_norm": 23.540071487426758, "learning_rate": 4.2169728783902015e-06, "loss": 1.5388, "step": 1829 }, { "epoch": 0.7915866879342507, "grad_norm": 23.088363647460938, "learning_rate": 4.2082239720035e-06, "loss": 1.5588, "step": 1830 }, { "epoch": 0.7920192489659088, "grad_norm": 24.821250915527344, "learning_rate": 4.199475065616798e-06, "loss": 1.5518, "step": 1831 }, { "epoch": 0.7924518099975668, "grad_norm": 25.494277954101562, "learning_rate": 4.190726159230097e-06, "loss": 1.5499, "step": 1832 }, { "epoch": 0.7928843710292249, "grad_norm": 25.74853515625, "learning_rate": 4.181977252843395e-06, "loss": 1.5049, "step": 1833 }, { "epoch": 0.7933169320608829, "grad_norm": 24.494779586791992, "learning_rate": 4.173228346456693e-06, "loss": 1.4597, "step": 1834 }, { "epoch": 0.7937494930925411, "grad_norm": 23.52466583251953, "learning_rate": 4.164479440069992e-06, "loss": 1.4917, "step": 1835 }, { "epoch": 0.7941820541241991, "grad_norm": 24.15924072265625, "learning_rate": 4.15573053368329e-06, "loss": 1.5182, "step": 1836 }, { "epoch": 0.7946146151558572, "grad_norm": 24.49439239501953, "learning_rate": 4.146981627296588e-06, "loss": 1.5663, "step": 1837 }, { "epoch": 0.7950471761875152, "grad_norm": 25.374263763427734, "learning_rate": 4.138232720909887e-06, "loss": 1.4795, "step": 1838 }, { "epoch": 0.7954797372191733, "grad_norm": 25.588665008544922, "learning_rate": 4.129483814523185e-06, "loss": 1.531, "step": 1839 }, { "epoch": 0.7959122982508313, "grad_norm": 26.306419372558594, "learning_rate": 4.1207349081364835e-06, "loss": 1.5243, "step": 1840 }, { "epoch": 0.7963448592824894, "grad_norm": 24.638086318969727, "learning_rate": 4.111986001749782e-06, "loss": 1.4911, "step": 1841 }, { "epoch": 0.7967774203141474, "grad_norm": 25.27682113647461, "learning_rate": 4.10323709536308e-06, "loss": 1.4854, "step": 1842 }, { "epoch": 0.7972099813458056, "grad_norm": 28.576251983642578, "learning_rate": 4.0944881889763785e-06, "loss": 1.4369, "step": 1843 }, { "epoch": 0.7976425423774636, "grad_norm": 26.261287689208984, "learning_rate": 4.085739282589677e-06, "loss": 1.5401, "step": 1844 }, { "epoch": 0.7980751034091216, "grad_norm": 24.996610641479492, "learning_rate": 4.076990376202975e-06, "loss": 1.5306, "step": 1845 }, { "epoch": 0.7985076644407797, "grad_norm": 23.960554122924805, "learning_rate": 4.068241469816273e-06, "loss": 1.5065, "step": 1846 }, { "epoch": 0.7989402254724377, "grad_norm": 31.743310928344727, "learning_rate": 4.059492563429572e-06, "loss": 1.59, "step": 1847 }, { "epoch": 0.7993727865040958, "grad_norm": 23.445873260498047, "learning_rate": 4.0507436570428695e-06, "loss": 1.5049, "step": 1848 }, { "epoch": 0.7998053475357538, "grad_norm": 24.140777587890625, "learning_rate": 4.041994750656169e-06, "loss": 1.5721, "step": 1849 }, { "epoch": 0.800237908567412, "grad_norm": 23.796100616455078, "learning_rate": 4.033245844269466e-06, "loss": 1.5244, "step": 1850 }, { "epoch": 0.80067046959907, "grad_norm": 29.071531295776367, "learning_rate": 4.024496937882765e-06, "loss": 1.5071, "step": 1851 }, { "epoch": 0.8011030306307281, "grad_norm": 24.83522605895996, "learning_rate": 4.015748031496064e-06, "loss": 1.5093, "step": 1852 }, { "epoch": 0.8015355916623861, "grad_norm": 25.007980346679688, "learning_rate": 4.006999125109361e-06, "loss": 1.51, "step": 1853 }, { "epoch": 0.8019681526940442, "grad_norm": 23.728660583496094, "learning_rate": 3.9982502187226605e-06, "loss": 1.475, "step": 1854 }, { "epoch": 0.8024007137257022, "grad_norm": 26.348461151123047, "learning_rate": 3.989501312335958e-06, "loss": 1.5531, "step": 1855 }, { "epoch": 0.8028332747573603, "grad_norm": 25.381500244140625, "learning_rate": 3.980752405949257e-06, "loss": 1.4599, "step": 1856 }, { "epoch": 0.8032658357890183, "grad_norm": 24.115501403808594, "learning_rate": 3.972003499562555e-06, "loss": 1.5345, "step": 1857 }, { "epoch": 0.8036983968206765, "grad_norm": 24.597143173217773, "learning_rate": 3.963254593175853e-06, "loss": 1.4688, "step": 1858 }, { "epoch": 0.8041309578523345, "grad_norm": 24.78520965576172, "learning_rate": 3.9545056867891515e-06, "loss": 1.4823, "step": 1859 }, { "epoch": 0.8045635188839926, "grad_norm": 23.59719467163086, "learning_rate": 3.94575678040245e-06, "loss": 1.4657, "step": 1860 }, { "epoch": 0.8049960799156506, "grad_norm": 26.260265350341797, "learning_rate": 3.937007874015748e-06, "loss": 1.5238, "step": 1861 }, { "epoch": 0.8054286409473087, "grad_norm": 26.81787872314453, "learning_rate": 3.9282589676290465e-06, "loss": 1.5246, "step": 1862 }, { "epoch": 0.8058612019789667, "grad_norm": 24.150634765625, "learning_rate": 3.919510061242345e-06, "loss": 1.4304, "step": 1863 }, { "epoch": 0.8062937630106248, "grad_norm": 25.50913429260254, "learning_rate": 3.910761154855643e-06, "loss": 1.482, "step": 1864 }, { "epoch": 0.8067263240422828, "grad_norm": 27.578847885131836, "learning_rate": 3.902012248468942e-06, "loss": 1.4806, "step": 1865 }, { "epoch": 0.8071588850739408, "grad_norm": 28.148900985717773, "learning_rate": 3.89326334208224e-06, "loss": 1.5452, "step": 1866 }, { "epoch": 0.807591446105599, "grad_norm": 25.29563331604004, "learning_rate": 3.884514435695538e-06, "loss": 1.4855, "step": 1867 }, { "epoch": 0.808024007137257, "grad_norm": 26.5310001373291, "learning_rate": 3.875765529308837e-06, "loss": 1.4845, "step": 1868 }, { "epoch": 0.8084565681689151, "grad_norm": 24.8538818359375, "learning_rate": 3.867016622922135e-06, "loss": 1.5439, "step": 1869 }, { "epoch": 0.8088891292005731, "grad_norm": 24.38702964782715, "learning_rate": 3.858267716535433e-06, "loss": 1.524, "step": 1870 }, { "epoch": 0.8093216902322312, "grad_norm": 25.538375854492188, "learning_rate": 3.849518810148732e-06, "loss": 1.5586, "step": 1871 }, { "epoch": 0.8097542512638892, "grad_norm": 24.558469772338867, "learning_rate": 3.84076990376203e-06, "loss": 1.5687, "step": 1872 }, { "epoch": 0.8101868122955473, "grad_norm": 24.07853126525879, "learning_rate": 3.8320209973753285e-06, "loss": 1.5702, "step": 1873 }, { "epoch": 0.8106193733272054, "grad_norm": 23.100337982177734, "learning_rate": 3.823272090988627e-06, "loss": 1.5732, "step": 1874 }, { "epoch": 0.8110519343588635, "grad_norm": 24.077980041503906, "learning_rate": 3.814523184601925e-06, "loss": 1.4838, "step": 1875 }, { "epoch": 0.8114844953905215, "grad_norm": 24.32474708557129, "learning_rate": 3.8057742782152236e-06, "loss": 1.5323, "step": 1876 }, { "epoch": 0.8119170564221796, "grad_norm": 26.63454246520996, "learning_rate": 3.7970253718285215e-06, "loss": 1.581, "step": 1877 }, { "epoch": 0.8123496174538376, "grad_norm": 26.129480361938477, "learning_rate": 3.7882764654418203e-06, "loss": 1.5964, "step": 1878 }, { "epoch": 0.8127821784854957, "grad_norm": 25.31804847717285, "learning_rate": 3.7795275590551182e-06, "loss": 1.5183, "step": 1879 }, { "epoch": 0.8132147395171537, "grad_norm": 25.525976181030273, "learning_rate": 3.770778652668417e-06, "loss": 1.5983, "step": 1880 }, { "epoch": 0.8136473005488118, "grad_norm": 23.734786987304688, "learning_rate": 3.762029746281715e-06, "loss": 1.555, "step": 1881 }, { "epoch": 0.8140798615804699, "grad_norm": 23.824871063232422, "learning_rate": 3.7532808398950133e-06, "loss": 1.5189, "step": 1882 }, { "epoch": 0.814512422612128, "grad_norm": 26.2841854095459, "learning_rate": 3.744531933508312e-06, "loss": 1.543, "step": 1883 }, { "epoch": 0.814944983643786, "grad_norm": 27.476755142211914, "learning_rate": 3.73578302712161e-06, "loss": 1.5171, "step": 1884 }, { "epoch": 0.8153775446754441, "grad_norm": 24.633068084716797, "learning_rate": 3.727034120734909e-06, "loss": 1.5069, "step": 1885 }, { "epoch": 0.8158101057071021, "grad_norm": 26.17974281311035, "learning_rate": 3.7182852143482068e-06, "loss": 1.5122, "step": 1886 }, { "epoch": 0.8162426667387602, "grad_norm": 24.839109420776367, "learning_rate": 3.7095363079615047e-06, "loss": 1.5153, "step": 1887 }, { "epoch": 0.8166752277704182, "grad_norm": 21.22071647644043, "learning_rate": 3.7007874015748035e-06, "loss": 1.5247, "step": 1888 }, { "epoch": 0.8171077888020762, "grad_norm": 25.769725799560547, "learning_rate": 3.692038495188102e-06, "loss": 1.4744, "step": 1889 }, { "epoch": 0.8175403498337344, "grad_norm": 24.709821701049805, "learning_rate": 3.6832895888014e-06, "loss": 1.5333, "step": 1890 }, { "epoch": 0.8179729108653924, "grad_norm": 25.408071517944336, "learning_rate": 3.6745406824146986e-06, "loss": 1.5651, "step": 1891 }, { "epoch": 0.8184054718970505, "grad_norm": 24.688087463378906, "learning_rate": 3.665791776027997e-06, "loss": 1.4822, "step": 1892 }, { "epoch": 0.8188380329287085, "grad_norm": 24.41063117980957, "learning_rate": 3.6570428696412953e-06, "loss": 1.5928, "step": 1893 }, { "epoch": 0.8192705939603666, "grad_norm": 25.023773193359375, "learning_rate": 3.6482939632545932e-06, "loss": 1.5182, "step": 1894 }, { "epoch": 0.8197031549920246, "grad_norm": 26.05735969543457, "learning_rate": 3.639545056867892e-06, "loss": 1.5007, "step": 1895 }, { "epoch": 0.8201357160236827, "grad_norm": 27.816280364990234, "learning_rate": 3.63079615048119e-06, "loss": 1.4621, "step": 1896 }, { "epoch": 0.8205682770553407, "grad_norm": 27.703954696655273, "learning_rate": 3.6220472440944887e-06, "loss": 1.4638, "step": 1897 }, { "epoch": 0.8210008380869989, "grad_norm": 26.362079620361328, "learning_rate": 3.6132983377077867e-06, "loss": 1.5282, "step": 1898 }, { "epoch": 0.8214333991186569, "grad_norm": 25.99664306640625, "learning_rate": 3.604549431321085e-06, "loss": 1.4904, "step": 1899 }, { "epoch": 0.821865960150315, "grad_norm": 24.36591911315918, "learning_rate": 3.595800524934384e-06, "loss": 1.555, "step": 1900 }, { "epoch": 0.822298521181973, "grad_norm": 25.207164764404297, "learning_rate": 3.5870516185476817e-06, "loss": 1.5043, "step": 1901 }, { "epoch": 0.8227310822136311, "grad_norm": 27.927532196044922, "learning_rate": 3.5783027121609805e-06, "loss": 1.5988, "step": 1902 }, { "epoch": 0.8231636432452891, "grad_norm": 23.66588020324707, "learning_rate": 3.5695538057742785e-06, "loss": 1.5167, "step": 1903 }, { "epoch": 0.8235962042769472, "grad_norm": 25.874834060668945, "learning_rate": 3.5608048993875772e-06, "loss": 1.4365, "step": 1904 }, { "epoch": 0.8240287653086052, "grad_norm": 23.785438537597656, "learning_rate": 3.552055993000875e-06, "loss": 1.5468, "step": 1905 }, { "epoch": 0.8244613263402634, "grad_norm": 27.696264266967773, "learning_rate": 3.5433070866141735e-06, "loss": 1.5845, "step": 1906 }, { "epoch": 0.8248938873719214, "grad_norm": 24.913143157958984, "learning_rate": 3.534558180227472e-06, "loss": 1.6031, "step": 1907 }, { "epoch": 0.8253264484035795, "grad_norm": 25.623226165771484, "learning_rate": 3.5258092738407703e-06, "loss": 1.4782, "step": 1908 }, { "epoch": 0.8257590094352375, "grad_norm": 26.468833923339844, "learning_rate": 3.5170603674540686e-06, "loss": 1.5557, "step": 1909 }, { "epoch": 0.8261915704668955, "grad_norm": 25.889429092407227, "learning_rate": 3.508311461067367e-06, "loss": 1.4884, "step": 1910 }, { "epoch": 0.8266241314985536, "grad_norm": 23.6556453704834, "learning_rate": 3.499562554680665e-06, "loss": 1.5173, "step": 1911 }, { "epoch": 0.8270566925302116, "grad_norm": 26.38465690612793, "learning_rate": 3.4908136482939637e-06, "loss": 1.4916, "step": 1912 }, { "epoch": 0.8274892535618698, "grad_norm": 22.687150955200195, "learning_rate": 3.4820647419072616e-06, "loss": 1.4722, "step": 1913 }, { "epoch": 0.8279218145935278, "grad_norm": 23.410667419433594, "learning_rate": 3.4733158355205604e-06, "loss": 1.4913, "step": 1914 }, { "epoch": 0.8283543756251859, "grad_norm": 28.485891342163086, "learning_rate": 3.4645669291338583e-06, "loss": 1.4458, "step": 1915 }, { "epoch": 0.8287869366568439, "grad_norm": 25.982906341552734, "learning_rate": 3.455818022747157e-06, "loss": 1.4411, "step": 1916 }, { "epoch": 0.829219497688502, "grad_norm": 25.231077194213867, "learning_rate": 3.447069116360455e-06, "loss": 1.4979, "step": 1917 }, { "epoch": 0.82965205872016, "grad_norm": 23.16214370727539, "learning_rate": 3.4383202099737534e-06, "loss": 1.4828, "step": 1918 }, { "epoch": 0.8300846197518181, "grad_norm": 26.24925994873047, "learning_rate": 3.429571303587052e-06, "loss": 1.5102, "step": 1919 }, { "epoch": 0.8305171807834761, "grad_norm": 21.732011795043945, "learning_rate": 3.42082239720035e-06, "loss": 1.5264, "step": 1920 }, { "epoch": 0.8309497418151343, "grad_norm": 26.923465728759766, "learning_rate": 3.412073490813649e-06, "loss": 1.4386, "step": 1921 }, { "epoch": 0.8313823028467923, "grad_norm": 26.28173828125, "learning_rate": 3.403324584426947e-06, "loss": 1.5147, "step": 1922 }, { "epoch": 0.8318148638784504, "grad_norm": 27.070018768310547, "learning_rate": 3.394575678040245e-06, "loss": 1.4853, "step": 1923 }, { "epoch": 0.8322474249101084, "grad_norm": 23.679351806640625, "learning_rate": 3.3858267716535436e-06, "loss": 1.5418, "step": 1924 }, { "epoch": 0.8326799859417665, "grad_norm": 27.61638832092285, "learning_rate": 3.377077865266842e-06, "loss": 1.5201, "step": 1925 }, { "epoch": 0.8331125469734245, "grad_norm": 26.128068923950195, "learning_rate": 3.3683289588801403e-06, "loss": 1.5359, "step": 1926 }, { "epoch": 0.8335451080050826, "grad_norm": 24.252161026000977, "learning_rate": 3.3595800524934387e-06, "loss": 1.4833, "step": 1927 }, { "epoch": 0.8339776690367406, "grad_norm": 26.1506404876709, "learning_rate": 3.350831146106737e-06, "loss": 1.5333, "step": 1928 }, { "epoch": 0.8344102300683988, "grad_norm": 25.713539123535156, "learning_rate": 3.3420822397200354e-06, "loss": 1.5724, "step": 1929 }, { "epoch": 0.8348427911000568, "grad_norm": 24.772716522216797, "learning_rate": 3.3333333333333333e-06, "loss": 1.5876, "step": 1930 }, { "epoch": 0.8352753521317149, "grad_norm": 26.96317481994629, "learning_rate": 3.324584426946632e-06, "loss": 1.6016, "step": 1931 }, { "epoch": 0.8357079131633729, "grad_norm": 25.352603912353516, "learning_rate": 3.31583552055993e-06, "loss": 1.4939, "step": 1932 }, { "epoch": 0.8361404741950309, "grad_norm": 24.081235885620117, "learning_rate": 3.307086614173229e-06, "loss": 1.4755, "step": 1933 }, { "epoch": 0.836573035226689, "grad_norm": 26.306793212890625, "learning_rate": 3.2983377077865268e-06, "loss": 1.5863, "step": 1934 }, { "epoch": 0.837005596258347, "grad_norm": 27.727188110351562, "learning_rate": 3.289588801399825e-06, "loss": 1.5823, "step": 1935 }, { "epoch": 0.8374381572900051, "grad_norm": 25.65025520324707, "learning_rate": 3.280839895013124e-06, "loss": 1.4594, "step": 1936 }, { "epoch": 0.8378707183216632, "grad_norm": 23.665359497070312, "learning_rate": 3.272090988626422e-06, "loss": 1.6313, "step": 1937 }, { "epoch": 0.8383032793533213, "grad_norm": 23.80894660949707, "learning_rate": 3.2633420822397206e-06, "loss": 1.5172, "step": 1938 }, { "epoch": 0.8387358403849793, "grad_norm": 26.22355842590332, "learning_rate": 3.2545931758530186e-06, "loss": 1.564, "step": 1939 }, { "epoch": 0.8391684014166374, "grad_norm": 24.528112411499023, "learning_rate": 3.2458442694663165e-06, "loss": 1.519, "step": 1940 }, { "epoch": 0.8396009624482954, "grad_norm": 26.274965286254883, "learning_rate": 3.2370953630796153e-06, "loss": 1.5012, "step": 1941 }, { "epoch": 0.8400335234799535, "grad_norm": 26.261157989501953, "learning_rate": 3.2283464566929136e-06, "loss": 1.481, "step": 1942 }, { "epoch": 0.8404660845116115, "grad_norm": 27.06733512878418, "learning_rate": 3.219597550306212e-06, "loss": 1.5262, "step": 1943 }, { "epoch": 0.8408986455432697, "grad_norm": 23.958738327026367, "learning_rate": 3.2108486439195104e-06, "loss": 1.5564, "step": 1944 }, { "epoch": 0.8413312065749277, "grad_norm": 24.236982345581055, "learning_rate": 3.2020997375328087e-06, "loss": 1.5062, "step": 1945 }, { "epoch": 0.8417637676065858, "grad_norm": 24.281795501708984, "learning_rate": 3.193350831146107e-06, "loss": 1.5618, "step": 1946 }, { "epoch": 0.8421963286382438, "grad_norm": 23.478912353515625, "learning_rate": 3.184601924759405e-06, "loss": 1.5643, "step": 1947 }, { "epoch": 0.8426288896699019, "grad_norm": 24.562108993530273, "learning_rate": 3.175853018372704e-06, "loss": 1.4998, "step": 1948 }, { "epoch": 0.8430614507015599, "grad_norm": 24.686317443847656, "learning_rate": 3.1671041119860017e-06, "loss": 1.4872, "step": 1949 }, { "epoch": 0.843494011733218, "grad_norm": 25.728281021118164, "learning_rate": 3.1583552055993005e-06, "loss": 1.505, "step": 1950 }, { "epoch": 0.843926572764876, "grad_norm": 26.217914581298828, "learning_rate": 3.1496062992125985e-06, "loss": 1.4911, "step": 1951 }, { "epoch": 0.8443591337965342, "grad_norm": 25.863449096679688, "learning_rate": 3.140857392825897e-06, "loss": 1.6666, "step": 1952 }, { "epoch": 0.8447916948281922, "grad_norm": 25.692018508911133, "learning_rate": 3.1321084864391956e-06, "loss": 1.5049, "step": 1953 }, { "epoch": 0.8452242558598502, "grad_norm": 25.147676467895508, "learning_rate": 3.1233595800524935e-06, "loss": 1.5504, "step": 1954 }, { "epoch": 0.8456568168915083, "grad_norm": 26.268417358398438, "learning_rate": 3.1146106736657923e-06, "loss": 1.4939, "step": 1955 }, { "epoch": 0.8460893779231663, "grad_norm": 22.62088394165039, "learning_rate": 3.1058617672790903e-06, "loss": 1.5196, "step": 1956 }, { "epoch": 0.8465219389548244, "grad_norm": 23.995553970336914, "learning_rate": 3.097112860892389e-06, "loss": 1.4776, "step": 1957 }, { "epoch": 0.8469544999864824, "grad_norm": 24.5943546295166, "learning_rate": 3.088363954505687e-06, "loss": 1.5271, "step": 1958 }, { "epoch": 0.8473870610181405, "grad_norm": 24.14809799194336, "learning_rate": 3.0796150481189853e-06, "loss": 1.5425, "step": 1959 }, { "epoch": 0.8478196220497985, "grad_norm": 22.18802833557129, "learning_rate": 3.0708661417322837e-06, "loss": 1.5071, "step": 1960 }, { "epoch": 0.8482521830814567, "grad_norm": 23.465879440307617, "learning_rate": 3.062117235345582e-06, "loss": 1.4322, "step": 1961 }, { "epoch": 0.8486847441131147, "grad_norm": 25.830198287963867, "learning_rate": 3.0533683289588804e-06, "loss": 1.5651, "step": 1962 }, { "epoch": 0.8491173051447728, "grad_norm": 28.602500915527344, "learning_rate": 3.0446194225721788e-06, "loss": 1.3828, "step": 1963 }, { "epoch": 0.8495498661764308, "grad_norm": 24.01179313659668, "learning_rate": 3.0358705161854767e-06, "loss": 1.5234, "step": 1964 }, { "epoch": 0.8499824272080889, "grad_norm": 26.218347549438477, "learning_rate": 3.0271216097987755e-06, "loss": 1.4781, "step": 1965 }, { "epoch": 0.8504149882397469, "grad_norm": 26.33125877380371, "learning_rate": 3.0183727034120734e-06, "loss": 1.4859, "step": 1966 }, { "epoch": 0.850847549271405, "grad_norm": 25.678081512451172, "learning_rate": 3.0096237970253722e-06, "loss": 1.4866, "step": 1967 }, { "epoch": 0.851280110303063, "grad_norm": 29.468505859375, "learning_rate": 3.00087489063867e-06, "loss": 1.5086, "step": 1968 }, { "epoch": 0.8517126713347212, "grad_norm": 23.861677169799805, "learning_rate": 2.992125984251969e-06, "loss": 1.5575, "step": 1969 }, { "epoch": 0.8521452323663792, "grad_norm": 25.250822067260742, "learning_rate": 2.9833770778652673e-06, "loss": 1.4855, "step": 1970 }, { "epoch": 0.8525777933980373, "grad_norm": 26.805891036987305, "learning_rate": 2.9746281714785652e-06, "loss": 1.4851, "step": 1971 }, { "epoch": 0.8530103544296953, "grad_norm": 23.66602897644043, "learning_rate": 2.965879265091864e-06, "loss": 1.5238, "step": 1972 }, { "epoch": 0.8534429154613534, "grad_norm": 25.112504959106445, "learning_rate": 2.957130358705162e-06, "loss": 1.4722, "step": 1973 }, { "epoch": 0.8538754764930114, "grad_norm": 24.553518295288086, "learning_rate": 2.9483814523184607e-06, "loss": 1.5377, "step": 1974 }, { "epoch": 0.8543080375246696, "grad_norm": 27.337066650390625, "learning_rate": 2.9396325459317587e-06, "loss": 1.5673, "step": 1975 }, { "epoch": 0.8547405985563276, "grad_norm": 28.423484802246094, "learning_rate": 2.930883639545057e-06, "loss": 1.5152, "step": 1976 }, { "epoch": 0.8551731595879856, "grad_norm": 25.569091796875, "learning_rate": 2.9221347331583554e-06, "loss": 1.409, "step": 1977 }, { "epoch": 0.8556057206196437, "grad_norm": 22.94341468811035, "learning_rate": 2.9133858267716538e-06, "loss": 1.4882, "step": 1978 }, { "epoch": 0.8560382816513017, "grad_norm": 23.691856384277344, "learning_rate": 2.904636920384952e-06, "loss": 1.4557, "step": 1979 }, { "epoch": 0.8564708426829598, "grad_norm": 27.209903717041016, "learning_rate": 2.8958880139982505e-06, "loss": 1.51, "step": 1980 }, { "epoch": 0.8569034037146178, "grad_norm": 26.904014587402344, "learning_rate": 2.887139107611549e-06, "loss": 1.6156, "step": 1981 }, { "epoch": 0.8573359647462759, "grad_norm": 24.399385452270508, "learning_rate": 2.878390201224847e-06, "loss": 1.5173, "step": 1982 }, { "epoch": 0.8577685257779339, "grad_norm": 23.10108184814453, "learning_rate": 2.869641294838145e-06, "loss": 1.4796, "step": 1983 }, { "epoch": 0.8582010868095921, "grad_norm": 27.3771915435791, "learning_rate": 2.860892388451444e-06, "loss": 1.5105, "step": 1984 }, { "epoch": 0.8586336478412501, "grad_norm": 24.214082717895508, "learning_rate": 2.852143482064742e-06, "loss": 1.5859, "step": 1985 }, { "epoch": 0.8590662088729082, "grad_norm": 25.662439346313477, "learning_rate": 2.8433945756780406e-06, "loss": 1.4917, "step": 1986 }, { "epoch": 0.8594987699045662, "grad_norm": 26.082138061523438, "learning_rate": 2.8346456692913386e-06, "loss": 1.5926, "step": 1987 }, { "epoch": 0.8599313309362243, "grad_norm": 25.768564224243164, "learning_rate": 2.825896762904637e-06, "loss": 1.5071, "step": 1988 }, { "epoch": 0.8603638919678823, "grad_norm": 26.158370971679688, "learning_rate": 2.8171478565179357e-06, "loss": 1.4536, "step": 1989 }, { "epoch": 0.8607964529995404, "grad_norm": 25.917757034301758, "learning_rate": 2.8083989501312337e-06, "loss": 1.4675, "step": 1990 }, { "epoch": 0.8612290140311984, "grad_norm": 24.782541275024414, "learning_rate": 2.7996500437445324e-06, "loss": 1.4918, "step": 1991 }, { "epoch": 0.8616615750628566, "grad_norm": 24.238004684448242, "learning_rate": 2.7909011373578304e-06, "loss": 1.55, "step": 1992 }, { "epoch": 0.8620941360945146, "grad_norm": 28.761098861694336, "learning_rate": 2.782152230971129e-06, "loss": 1.5032, "step": 1993 }, { "epoch": 0.8625266971261727, "grad_norm": 28.440311431884766, "learning_rate": 2.773403324584427e-06, "loss": 1.4544, "step": 1994 }, { "epoch": 0.8629592581578307, "grad_norm": 24.347803115844727, "learning_rate": 2.7646544181977255e-06, "loss": 1.4856, "step": 1995 }, { "epoch": 0.8633918191894888, "grad_norm": 23.017044067382812, "learning_rate": 2.755905511811024e-06, "loss": 1.513, "step": 1996 }, { "epoch": 0.8638243802211468, "grad_norm": 24.277076721191406, "learning_rate": 2.747156605424322e-06, "loss": 1.5532, "step": 1997 }, { "epoch": 0.8642569412528048, "grad_norm": 26.928024291992188, "learning_rate": 2.7384076990376205e-06, "loss": 1.5431, "step": 1998 }, { "epoch": 0.864689502284463, "grad_norm": 25.287187576293945, "learning_rate": 2.729658792650919e-06, "loss": 1.5137, "step": 1999 }, { "epoch": 0.865122063316121, "grad_norm": 23.46804428100586, "learning_rate": 2.720909886264217e-06, "loss": 1.5105, "step": 2000 }, { "epoch": 0.8655546243477791, "grad_norm": 26.317270278930664, "learning_rate": 2.7121609798775156e-06, "loss": 1.4855, "step": 2001 }, { "epoch": 0.8659871853794371, "grad_norm": 23.94223403930664, "learning_rate": 2.7034120734908135e-06, "loss": 1.5016, "step": 2002 }, { "epoch": 0.8664197464110952, "grad_norm": 25.011159896850586, "learning_rate": 2.6946631671041123e-06, "loss": 1.5259, "step": 2003 }, { "epoch": 0.8668523074427532, "grad_norm": 26.059602737426758, "learning_rate": 2.6859142607174103e-06, "loss": 1.4911, "step": 2004 }, { "epoch": 0.8672848684744113, "grad_norm": 25.560832977294922, "learning_rate": 2.677165354330709e-06, "loss": 1.5273, "step": 2005 }, { "epoch": 0.8677174295060693, "grad_norm": 24.12883186340332, "learning_rate": 2.6684164479440074e-06, "loss": 1.5653, "step": 2006 }, { "epoch": 0.8681499905377275, "grad_norm": 24.72055435180664, "learning_rate": 2.6596675415573053e-06, "loss": 1.532, "step": 2007 }, { "epoch": 0.8685825515693855, "grad_norm": 23.121793746948242, "learning_rate": 2.650918635170604e-06, "loss": 1.532, "step": 2008 }, { "epoch": 0.8690151126010436, "grad_norm": 27.0283145904541, "learning_rate": 2.642169728783902e-06, "loss": 1.4829, "step": 2009 }, { "epoch": 0.8694476736327016, "grad_norm": 25.010900497436523, "learning_rate": 2.633420822397201e-06, "loss": 1.5846, "step": 2010 }, { "epoch": 0.8698802346643597, "grad_norm": 26.372800827026367, "learning_rate": 2.6246719160104988e-06, "loss": 1.5037, "step": 2011 }, { "epoch": 0.8703127956960177, "grad_norm": 24.331981658935547, "learning_rate": 2.615923009623797e-06, "loss": 1.431, "step": 2012 }, { "epoch": 0.8707453567276758, "grad_norm": 24.65950584411621, "learning_rate": 2.6071741032370955e-06, "loss": 1.4707, "step": 2013 }, { "epoch": 0.8711779177593338, "grad_norm": 25.584592819213867, "learning_rate": 2.598425196850394e-06, "loss": 1.4639, "step": 2014 }, { "epoch": 0.871610478790992, "grad_norm": 24.871679306030273, "learning_rate": 2.5896762904636922e-06, "loss": 1.4434, "step": 2015 }, { "epoch": 0.87204303982265, "grad_norm": 28.186784744262695, "learning_rate": 2.5809273840769906e-06, "loss": 1.5004, "step": 2016 }, { "epoch": 0.8724756008543081, "grad_norm": 23.437673568725586, "learning_rate": 2.5721784776902894e-06, "loss": 1.5752, "step": 2017 }, { "epoch": 0.8729081618859661, "grad_norm": 24.34725570678711, "learning_rate": 2.5634295713035873e-06, "loss": 1.5573, "step": 2018 }, { "epoch": 0.8733407229176242, "grad_norm": 26.968994140625, "learning_rate": 2.5546806649168852e-06, "loss": 1.466, "step": 2019 }, { "epoch": 0.8737732839492822, "grad_norm": 23.812091827392578, "learning_rate": 2.545931758530184e-06, "loss": 1.4728, "step": 2020 }, { "epoch": 0.8742058449809402, "grad_norm": 26.316484451293945, "learning_rate": 2.537182852143482e-06, "loss": 1.495, "step": 2021 }, { "epoch": 0.8746384060125983, "grad_norm": 26.36723518371582, "learning_rate": 2.5284339457567807e-06, "loss": 1.4384, "step": 2022 }, { "epoch": 0.8750709670442564, "grad_norm": 27.980514526367188, "learning_rate": 2.519685039370079e-06, "loss": 1.5268, "step": 2023 }, { "epoch": 0.8755035280759145, "grad_norm": 25.934511184692383, "learning_rate": 2.510936132983377e-06, "loss": 1.5593, "step": 2024 }, { "epoch": 0.8759360891075725, "grad_norm": 24.022119522094727, "learning_rate": 2.502187226596676e-06, "loss": 1.5023, "step": 2025 }, { "epoch": 0.8763686501392306, "grad_norm": 24.290563583374023, "learning_rate": 2.493438320209974e-06, "loss": 1.4827, "step": 2026 }, { "epoch": 0.8768012111708886, "grad_norm": 24.21749496459961, "learning_rate": 2.484689413823272e-06, "loss": 1.425, "step": 2027 }, { "epoch": 0.8772337722025467, "grad_norm": 26.60518455505371, "learning_rate": 2.4759405074365705e-06, "loss": 1.5017, "step": 2028 }, { "epoch": 0.8776663332342047, "grad_norm": 25.764251708984375, "learning_rate": 2.467191601049869e-06, "loss": 1.5647, "step": 2029 }, { "epoch": 0.8780988942658629, "grad_norm": 24.295669555664062, "learning_rate": 2.458442694663167e-06, "loss": 1.4745, "step": 2030 }, { "epoch": 0.8785314552975209, "grad_norm": 28.384014129638672, "learning_rate": 2.4496937882764656e-06, "loss": 1.554, "step": 2031 }, { "epoch": 0.878964016329179, "grad_norm": 23.786191940307617, "learning_rate": 2.440944881889764e-06, "loss": 1.5333, "step": 2032 }, { "epoch": 0.879396577360837, "grad_norm": 25.375789642333984, "learning_rate": 2.4321959755030623e-06, "loss": 1.5183, "step": 2033 }, { "epoch": 0.8798291383924951, "grad_norm": 24.33387565612793, "learning_rate": 2.4234470691163606e-06, "loss": 1.4466, "step": 2034 }, { "epoch": 0.8802616994241531, "grad_norm": 23.24261474609375, "learning_rate": 2.414698162729659e-06, "loss": 1.5362, "step": 2035 }, { "epoch": 0.8806942604558112, "grad_norm": 24.91956329345703, "learning_rate": 2.4059492563429574e-06, "loss": 1.4217, "step": 2036 }, { "epoch": 0.8811268214874692, "grad_norm": 24.583356857299805, "learning_rate": 2.3972003499562557e-06, "loss": 1.4938, "step": 2037 }, { "epoch": 0.8815593825191274, "grad_norm": 25.635778427124023, "learning_rate": 2.388451443569554e-06, "loss": 1.5102, "step": 2038 }, { "epoch": 0.8819919435507854, "grad_norm": 24.82320785522461, "learning_rate": 2.379702537182852e-06, "loss": 1.5763, "step": 2039 }, { "epoch": 0.8824245045824435, "grad_norm": 24.23630714416504, "learning_rate": 2.370953630796151e-06, "loss": 1.5198, "step": 2040 }, { "epoch": 0.8828570656141015, "grad_norm": 22.778940200805664, "learning_rate": 2.362204724409449e-06, "loss": 1.5602, "step": 2041 }, { "epoch": 0.8832896266457595, "grad_norm": 23.935001373291016, "learning_rate": 2.3534558180227475e-06, "loss": 1.4802, "step": 2042 }, { "epoch": 0.8837221876774176, "grad_norm": 27.67841148376465, "learning_rate": 2.344706911636046e-06, "loss": 1.5749, "step": 2043 }, { "epoch": 0.8841547487090756, "grad_norm": 25.99941635131836, "learning_rate": 2.3359580052493442e-06, "loss": 1.4838, "step": 2044 }, { "epoch": 0.8845873097407337, "grad_norm": 26.464733123779297, "learning_rate": 2.327209098862642e-06, "loss": 1.4711, "step": 2045 }, { "epoch": 0.8850198707723917, "grad_norm": 24.384105682373047, "learning_rate": 2.3184601924759405e-06, "loss": 1.5368, "step": 2046 }, { "epoch": 0.8854524318040499, "grad_norm": 26.473554611206055, "learning_rate": 2.309711286089239e-06, "loss": 1.5003, "step": 2047 }, { "epoch": 0.8858849928357079, "grad_norm": 23.190519332885742, "learning_rate": 2.3009623797025373e-06, "loss": 1.4441, "step": 2048 }, { "epoch": 0.886317553867366, "grad_norm": 25.446199417114258, "learning_rate": 2.2922134733158356e-06, "loss": 1.5295, "step": 2049 }, { "epoch": 0.886750114899024, "grad_norm": 25.416261672973633, "learning_rate": 2.283464566929134e-06, "loss": 1.4706, "step": 2050 }, { "epoch": 0.8871826759306821, "grad_norm": 23.452497482299805, "learning_rate": 2.2747156605424323e-06, "loss": 1.54, "step": 2051 }, { "epoch": 0.8876152369623401, "grad_norm": 26.014728546142578, "learning_rate": 2.2659667541557307e-06, "loss": 1.531, "step": 2052 }, { "epoch": 0.8880477979939982, "grad_norm": 28.156362533569336, "learning_rate": 2.257217847769029e-06, "loss": 1.6611, "step": 2053 }, { "epoch": 0.8884803590256563, "grad_norm": 28.15045928955078, "learning_rate": 2.2484689413823274e-06, "loss": 1.4967, "step": 2054 }, { "epoch": 0.8889129200573144, "grad_norm": 26.710493087768555, "learning_rate": 2.2397200349956258e-06, "loss": 1.5291, "step": 2055 }, { "epoch": 0.8893454810889724, "grad_norm": 26.1806640625, "learning_rate": 2.230971128608924e-06, "loss": 1.6146, "step": 2056 }, { "epoch": 0.8897780421206305, "grad_norm": 28.079967498779297, "learning_rate": 2.222222222222222e-06, "loss": 1.5331, "step": 2057 }, { "epoch": 0.8902106031522885, "grad_norm": 26.610944747924805, "learning_rate": 2.213473315835521e-06, "loss": 1.4869, "step": 2058 }, { "epoch": 0.8906431641839466, "grad_norm": 24.159992218017578, "learning_rate": 2.2047244094488192e-06, "loss": 1.5627, "step": 2059 }, { "epoch": 0.8910757252156046, "grad_norm": 25.268817901611328, "learning_rate": 2.1959755030621176e-06, "loss": 1.513, "step": 2060 }, { "epoch": 0.8915082862472627, "grad_norm": 23.28951644897461, "learning_rate": 2.187226596675416e-06, "loss": 1.5356, "step": 2061 }, { "epoch": 0.8919408472789208, "grad_norm": 24.559032440185547, "learning_rate": 2.1784776902887143e-06, "loss": 1.5505, "step": 2062 }, { "epoch": 0.8923734083105789, "grad_norm": 22.86376190185547, "learning_rate": 2.1697287839020122e-06, "loss": 1.4958, "step": 2063 }, { "epoch": 0.8928059693422369, "grad_norm": 25.818510055541992, "learning_rate": 2.1609798775153106e-06, "loss": 1.5579, "step": 2064 }, { "epoch": 0.8932385303738949, "grad_norm": 23.554603576660156, "learning_rate": 2.152230971128609e-06, "loss": 1.5331, "step": 2065 }, { "epoch": 0.893671091405553, "grad_norm": 22.988496780395508, "learning_rate": 2.1434820647419073e-06, "loss": 1.4828, "step": 2066 }, { "epoch": 0.894103652437211, "grad_norm": 25.738300323486328, "learning_rate": 2.1347331583552057e-06, "loss": 1.5182, "step": 2067 }, { "epoch": 0.8945362134688691, "grad_norm": 26.744293212890625, "learning_rate": 2.125984251968504e-06, "loss": 1.4402, "step": 2068 }, { "epoch": 0.8949687745005271, "grad_norm": 26.04250144958496, "learning_rate": 2.1172353455818024e-06, "loss": 1.4826, "step": 2069 }, { "epoch": 0.8954013355321853, "grad_norm": 24.584280014038086, "learning_rate": 2.1084864391951008e-06, "loss": 1.5665, "step": 2070 }, { "epoch": 0.8958338965638433, "grad_norm": 28.59994888305664, "learning_rate": 2.099737532808399e-06, "loss": 1.4376, "step": 2071 }, { "epoch": 0.8962664575955014, "grad_norm": 24.084014892578125, "learning_rate": 2.0909886264216975e-06, "loss": 1.4553, "step": 2072 }, { "epoch": 0.8966990186271594, "grad_norm": 25.38064193725586, "learning_rate": 2.082239720034996e-06, "loss": 1.5537, "step": 2073 }, { "epoch": 0.8971315796588175, "grad_norm": 24.919994354248047, "learning_rate": 2.073490813648294e-06, "loss": 1.508, "step": 2074 }, { "epoch": 0.8975641406904755, "grad_norm": 24.744308471679688, "learning_rate": 2.0647419072615926e-06, "loss": 1.5346, "step": 2075 }, { "epoch": 0.8979967017221336, "grad_norm": 24.991436004638672, "learning_rate": 2.055993000874891e-06, "loss": 1.5086, "step": 2076 }, { "epoch": 0.8984292627537916, "grad_norm": 26.734596252441406, "learning_rate": 2.0472440944881893e-06, "loss": 1.4796, "step": 2077 }, { "epoch": 0.8988618237854498, "grad_norm": 24.347599029541016, "learning_rate": 2.0384951881014876e-06, "loss": 1.5146, "step": 2078 }, { "epoch": 0.8992943848171078, "grad_norm": 24.754878997802734, "learning_rate": 2.029746281714786e-06, "loss": 1.5116, "step": 2079 }, { "epoch": 0.8997269458487659, "grad_norm": 23.960243225097656, "learning_rate": 2.0209973753280844e-06, "loss": 1.5233, "step": 2080 }, { "epoch": 0.9001595068804239, "grad_norm": 24.335981369018555, "learning_rate": 2.0122484689413823e-06, "loss": 1.5004, "step": 2081 }, { "epoch": 0.900592067912082, "grad_norm": 25.127904891967773, "learning_rate": 2.0034995625546807e-06, "loss": 1.4853, "step": 2082 }, { "epoch": 0.90102462894374, "grad_norm": 27.572782516479492, "learning_rate": 1.994750656167979e-06, "loss": 1.4948, "step": 2083 }, { "epoch": 0.9014571899753981, "grad_norm": 25.628870010375977, "learning_rate": 1.9860017497812774e-06, "loss": 1.5031, "step": 2084 }, { "epoch": 0.9018897510070562, "grad_norm": 24.450708389282227, "learning_rate": 1.9772528433945757e-06, "loss": 1.4799, "step": 2085 }, { "epoch": 0.9023223120387142, "grad_norm": 23.836322784423828, "learning_rate": 1.968503937007874e-06, "loss": 1.571, "step": 2086 }, { "epoch": 0.9027548730703723, "grad_norm": 26.30670928955078, "learning_rate": 1.9597550306211725e-06, "loss": 1.6103, "step": 2087 }, { "epoch": 0.9031874341020303, "grad_norm": 25.850881576538086, "learning_rate": 1.951006124234471e-06, "loss": 1.5946, "step": 2088 }, { "epoch": 0.9036199951336884, "grad_norm": 21.866104125976562, "learning_rate": 1.942257217847769e-06, "loss": 1.5118, "step": 2089 }, { "epoch": 0.9040525561653464, "grad_norm": 24.595714569091797, "learning_rate": 1.9335083114610675e-06, "loss": 1.4713, "step": 2090 }, { "epoch": 0.9044851171970045, "grad_norm": 28.41547393798828, "learning_rate": 1.924759405074366e-06, "loss": 1.6027, "step": 2091 }, { "epoch": 0.9049176782286625, "grad_norm": 27.03108787536621, "learning_rate": 1.9160104986876642e-06, "loss": 1.5333, "step": 2092 }, { "epoch": 0.9053502392603207, "grad_norm": 24.61553955078125, "learning_rate": 1.9072615923009624e-06, "loss": 1.4534, "step": 2093 }, { "epoch": 0.9057828002919787, "grad_norm": 23.870311737060547, "learning_rate": 1.8985126859142608e-06, "loss": 1.5434, "step": 2094 }, { "epoch": 0.9062153613236368, "grad_norm": 23.868675231933594, "learning_rate": 1.8897637795275591e-06, "loss": 1.5228, "step": 2095 }, { "epoch": 0.9066479223552948, "grad_norm": 24.33563804626465, "learning_rate": 1.8810148731408575e-06, "loss": 1.5551, "step": 2096 }, { "epoch": 0.9070804833869529, "grad_norm": 24.130783081054688, "learning_rate": 1.872265966754156e-06, "loss": 1.5063, "step": 2097 }, { "epoch": 0.9075130444186109, "grad_norm": 22.190542221069336, "learning_rate": 1.8635170603674544e-06, "loss": 1.4745, "step": 2098 }, { "epoch": 0.907945605450269, "grad_norm": 27.16103744506836, "learning_rate": 1.8547681539807523e-06, "loss": 1.4179, "step": 2099 }, { "epoch": 0.908378166481927, "grad_norm": 23.1556339263916, "learning_rate": 1.846019247594051e-06, "loss": 1.5758, "step": 2100 }, { "epoch": 0.9088107275135852, "grad_norm": 23.74553871154785, "learning_rate": 1.8372703412073493e-06, "loss": 1.5291, "step": 2101 }, { "epoch": 0.9092432885452432, "grad_norm": 24.70638084411621, "learning_rate": 1.8285214348206476e-06, "loss": 1.5172, "step": 2102 }, { "epoch": 0.9096758495769013, "grad_norm": 28.004220962524414, "learning_rate": 1.819772528433946e-06, "loss": 1.4554, "step": 2103 }, { "epoch": 0.9101084106085593, "grad_norm": 25.194591522216797, "learning_rate": 1.8110236220472444e-06, "loss": 1.4265, "step": 2104 }, { "epoch": 0.9105409716402174, "grad_norm": 26.836763381958008, "learning_rate": 1.8022747156605425e-06, "loss": 1.4578, "step": 2105 }, { "epoch": 0.9109735326718754, "grad_norm": 22.51131248474121, "learning_rate": 1.7935258092738409e-06, "loss": 1.445, "step": 2106 }, { "epoch": 0.9114060937035335, "grad_norm": 23.413801193237305, "learning_rate": 1.7847769028871392e-06, "loss": 1.5159, "step": 2107 }, { "epoch": 0.9118386547351915, "grad_norm": 25.643709182739258, "learning_rate": 1.7760279965004376e-06, "loss": 1.4713, "step": 2108 }, { "epoch": 0.9122712157668496, "grad_norm": 26.521026611328125, "learning_rate": 1.767279090113736e-06, "loss": 1.5231, "step": 2109 }, { "epoch": 0.9127037767985077, "grad_norm": 26.597665786743164, "learning_rate": 1.7585301837270343e-06, "loss": 1.4962, "step": 2110 }, { "epoch": 0.9131363378301657, "grad_norm": 24.662185668945312, "learning_rate": 1.7497812773403325e-06, "loss": 1.4917, "step": 2111 }, { "epoch": 0.9135688988618238, "grad_norm": 25.956602096557617, "learning_rate": 1.7410323709536308e-06, "loss": 1.4278, "step": 2112 }, { "epoch": 0.9140014598934818, "grad_norm": 25.572887420654297, "learning_rate": 1.7322834645669292e-06, "loss": 1.5098, "step": 2113 }, { "epoch": 0.9144340209251399, "grad_norm": 26.01473045349121, "learning_rate": 1.7235345581802275e-06, "loss": 1.5062, "step": 2114 }, { "epoch": 0.9148665819567979, "grad_norm": 23.53647804260254, "learning_rate": 1.714785651793526e-06, "loss": 1.5157, "step": 2115 }, { "epoch": 0.915299142988456, "grad_norm": 26.578235626220703, "learning_rate": 1.7060367454068245e-06, "loss": 1.5472, "step": 2116 }, { "epoch": 0.9157317040201141, "grad_norm": 24.396869659423828, "learning_rate": 1.6972878390201224e-06, "loss": 1.5047, "step": 2117 }, { "epoch": 0.9161642650517722, "grad_norm": 21.657743453979492, "learning_rate": 1.688538932633421e-06, "loss": 1.5496, "step": 2118 }, { "epoch": 0.9165968260834302, "grad_norm": 26.7194766998291, "learning_rate": 1.6797900262467193e-06, "loss": 1.5533, "step": 2119 }, { "epoch": 0.9170293871150883, "grad_norm": 28.471969604492188, "learning_rate": 1.6710411198600177e-06, "loss": 1.5192, "step": 2120 }, { "epoch": 0.9174619481467463, "grad_norm": 25.8294620513916, "learning_rate": 1.662292213473316e-06, "loss": 1.572, "step": 2121 }, { "epoch": 0.9178945091784044, "grad_norm": 24.569931030273438, "learning_rate": 1.6535433070866144e-06, "loss": 1.4803, "step": 2122 }, { "epoch": 0.9183270702100624, "grad_norm": 28.031354904174805, "learning_rate": 1.6447944006999126e-06, "loss": 1.402, "step": 2123 }, { "epoch": 0.9187596312417206, "grad_norm": 23.158405303955078, "learning_rate": 1.636045494313211e-06, "loss": 1.5053, "step": 2124 }, { "epoch": 0.9191921922733786, "grad_norm": 24.734914779663086, "learning_rate": 1.6272965879265093e-06, "loss": 1.4882, "step": 2125 }, { "epoch": 0.9196247533050367, "grad_norm": 25.3793888092041, "learning_rate": 1.6185476815398076e-06, "loss": 1.5247, "step": 2126 }, { "epoch": 0.9200573143366947, "grad_norm": 26.39560890197754, "learning_rate": 1.609798775153106e-06, "loss": 1.5502, "step": 2127 }, { "epoch": 0.9204898753683528, "grad_norm": 25.682659149169922, "learning_rate": 1.6010498687664044e-06, "loss": 1.4543, "step": 2128 }, { "epoch": 0.9209224364000108, "grad_norm": 25.748123168945312, "learning_rate": 1.5923009623797025e-06, "loss": 1.4761, "step": 2129 }, { "epoch": 0.9213549974316688, "grad_norm": 23.998931884765625, "learning_rate": 1.5835520559930009e-06, "loss": 1.4683, "step": 2130 }, { "epoch": 0.9217875584633269, "grad_norm": 31.975799560546875, "learning_rate": 1.5748031496062992e-06, "loss": 1.5012, "step": 2131 }, { "epoch": 0.922220119494985, "grad_norm": 26.674463272094727, "learning_rate": 1.5660542432195978e-06, "loss": 1.4607, "step": 2132 }, { "epoch": 0.9226526805266431, "grad_norm": 22.74907684326172, "learning_rate": 1.5573053368328962e-06, "loss": 1.4775, "step": 2133 }, { "epoch": 0.9230852415583011, "grad_norm": 28.162485122680664, "learning_rate": 1.5485564304461945e-06, "loss": 1.4859, "step": 2134 }, { "epoch": 0.9235178025899592, "grad_norm": 25.198293685913086, "learning_rate": 1.5398075240594927e-06, "loss": 1.4851, "step": 2135 }, { "epoch": 0.9239503636216172, "grad_norm": 27.7397403717041, "learning_rate": 1.531058617672791e-06, "loss": 1.5154, "step": 2136 }, { "epoch": 0.9243829246532753, "grad_norm": 26.483898162841797, "learning_rate": 1.5223097112860894e-06, "loss": 1.5139, "step": 2137 }, { "epoch": 0.9248154856849333, "grad_norm": 23.793954849243164, "learning_rate": 1.5135608048993877e-06, "loss": 1.5656, "step": 2138 }, { "epoch": 0.9252480467165914, "grad_norm": 27.676443099975586, "learning_rate": 1.5048118985126861e-06, "loss": 1.5341, "step": 2139 }, { "epoch": 0.9256806077482495, "grad_norm": 27.942245483398438, "learning_rate": 1.4960629921259845e-06, "loss": 1.5453, "step": 2140 }, { "epoch": 0.9261131687799076, "grad_norm": 23.83132553100586, "learning_rate": 1.4873140857392826e-06, "loss": 1.5125, "step": 2141 }, { "epoch": 0.9265457298115656, "grad_norm": 25.71536636352539, "learning_rate": 1.478565179352581e-06, "loss": 1.4835, "step": 2142 }, { "epoch": 0.9269782908432237, "grad_norm": 22.844636917114258, "learning_rate": 1.4698162729658793e-06, "loss": 1.5344, "step": 2143 }, { "epoch": 0.9274108518748817, "grad_norm": 26.83110237121582, "learning_rate": 1.4610673665791777e-06, "loss": 1.4896, "step": 2144 }, { "epoch": 0.9278434129065398, "grad_norm": 23.741893768310547, "learning_rate": 1.452318460192476e-06, "loss": 1.5107, "step": 2145 }, { "epoch": 0.9282759739381978, "grad_norm": 29.269779205322266, "learning_rate": 1.4435695538057744e-06, "loss": 1.5069, "step": 2146 }, { "epoch": 0.928708534969856, "grad_norm": 24.56169319152832, "learning_rate": 1.4348206474190726e-06, "loss": 1.4771, "step": 2147 }, { "epoch": 0.929141096001514, "grad_norm": 26.20133399963379, "learning_rate": 1.426071741032371e-06, "loss": 1.5259, "step": 2148 }, { "epoch": 0.9295736570331721, "grad_norm": 25.875614166259766, "learning_rate": 1.4173228346456693e-06, "loss": 1.4957, "step": 2149 }, { "epoch": 0.9300062180648301, "grad_norm": 28.533090591430664, "learning_rate": 1.4085739282589679e-06, "loss": 1.5897, "step": 2150 }, { "epoch": 0.9304387790964882, "grad_norm": 26.25223731994629, "learning_rate": 1.3998250218722662e-06, "loss": 1.5214, "step": 2151 }, { "epoch": 0.9308713401281462, "grad_norm": 23.350339889526367, "learning_rate": 1.3910761154855646e-06, "loss": 1.5911, "step": 2152 }, { "epoch": 0.9313039011598042, "grad_norm": 25.860279083251953, "learning_rate": 1.3823272090988627e-06, "loss": 1.4472, "step": 2153 }, { "epoch": 0.9317364621914623, "grad_norm": 25.473173141479492, "learning_rate": 1.373578302712161e-06, "loss": 1.5506, "step": 2154 }, { "epoch": 0.9321690232231203, "grad_norm": 24.590129852294922, "learning_rate": 1.3648293963254594e-06, "loss": 1.5507, "step": 2155 }, { "epoch": 0.9326015842547785, "grad_norm": 26.397546768188477, "learning_rate": 1.3560804899387578e-06, "loss": 1.5377, "step": 2156 }, { "epoch": 0.9330341452864365, "grad_norm": 27.98323631286621, "learning_rate": 1.3473315835520562e-06, "loss": 1.5116, "step": 2157 }, { "epoch": 0.9334667063180946, "grad_norm": 24.400859832763672, "learning_rate": 1.3385826771653545e-06, "loss": 1.5775, "step": 2158 }, { "epoch": 0.9338992673497526, "grad_norm": 26.30158233642578, "learning_rate": 1.3298337707786527e-06, "loss": 1.4523, "step": 2159 }, { "epoch": 0.9343318283814107, "grad_norm": 23.896175384521484, "learning_rate": 1.321084864391951e-06, "loss": 1.4708, "step": 2160 }, { "epoch": 0.9347643894130687, "grad_norm": 24.987720489501953, "learning_rate": 1.3123359580052494e-06, "loss": 1.4932, "step": 2161 }, { "epoch": 0.9351969504447268, "grad_norm": 23.302629470825195, "learning_rate": 1.3035870516185478e-06, "loss": 1.463, "step": 2162 }, { "epoch": 0.9356295114763848, "grad_norm": 26.809837341308594, "learning_rate": 1.2948381452318461e-06, "loss": 1.5367, "step": 2163 }, { "epoch": 0.936062072508043, "grad_norm": 23.170120239257812, "learning_rate": 1.2860892388451447e-06, "loss": 1.4773, "step": 2164 }, { "epoch": 0.936494633539701, "grad_norm": 24.36250114440918, "learning_rate": 1.2773403324584426e-06, "loss": 1.5286, "step": 2165 }, { "epoch": 0.9369271945713591, "grad_norm": 26.31621551513672, "learning_rate": 1.268591426071741e-06, "loss": 1.4818, "step": 2166 }, { "epoch": 0.9373597556030171, "grad_norm": 27.618297576904297, "learning_rate": 1.2598425196850396e-06, "loss": 1.4788, "step": 2167 }, { "epoch": 0.9377923166346752, "grad_norm": 25.09479522705078, "learning_rate": 1.251093613298338e-06, "loss": 1.5178, "step": 2168 }, { "epoch": 0.9382248776663332, "grad_norm": 26.131237030029297, "learning_rate": 1.242344706911636e-06, "loss": 1.5872, "step": 2169 }, { "epoch": 0.9386574386979913, "grad_norm": 25.003849029541016, "learning_rate": 1.2335958005249344e-06, "loss": 1.5635, "step": 2170 }, { "epoch": 0.9390899997296493, "grad_norm": 26.515464782714844, "learning_rate": 1.2248468941382328e-06, "loss": 1.5046, "step": 2171 }, { "epoch": 0.9395225607613075, "grad_norm": 26.55370330810547, "learning_rate": 1.2160979877515311e-06, "loss": 1.5933, "step": 2172 }, { "epoch": 0.9399551217929655, "grad_norm": 24.8226375579834, "learning_rate": 1.2073490813648295e-06, "loss": 1.5164, "step": 2173 }, { "epoch": 0.9403876828246235, "grad_norm": 24.112558364868164, "learning_rate": 1.1986001749781279e-06, "loss": 1.5325, "step": 2174 }, { "epoch": 0.9408202438562816, "grad_norm": 25.355792999267578, "learning_rate": 1.189851268591426e-06, "loss": 1.5334, "step": 2175 }, { "epoch": 0.9412528048879396, "grad_norm": 26.321578979492188, "learning_rate": 1.1811023622047246e-06, "loss": 1.4804, "step": 2176 }, { "epoch": 0.9416853659195977, "grad_norm": 25.188034057617188, "learning_rate": 1.172353455818023e-06, "loss": 1.5346, "step": 2177 }, { "epoch": 0.9421179269512557, "grad_norm": 26.958105087280273, "learning_rate": 1.163604549431321e-06, "loss": 1.5058, "step": 2178 }, { "epoch": 0.9425504879829139, "grad_norm": 23.981430053710938, "learning_rate": 1.1548556430446194e-06, "loss": 1.5336, "step": 2179 }, { "epoch": 0.9429830490145719, "grad_norm": 24.413782119750977, "learning_rate": 1.1461067366579178e-06, "loss": 1.5212, "step": 2180 }, { "epoch": 0.94341561004623, "grad_norm": 25.38534164428711, "learning_rate": 1.1373578302712162e-06, "loss": 1.4876, "step": 2181 }, { "epoch": 0.943848171077888, "grad_norm": 27.410280227661133, "learning_rate": 1.1286089238845145e-06, "loss": 1.495, "step": 2182 }, { "epoch": 0.9442807321095461, "grad_norm": 24.01247787475586, "learning_rate": 1.1198600174978129e-06, "loss": 1.4992, "step": 2183 }, { "epoch": 0.9447132931412041, "grad_norm": 28.51532554626465, "learning_rate": 1.111111111111111e-06, "loss": 1.4806, "step": 2184 }, { "epoch": 0.9451458541728622, "grad_norm": 24.75975227355957, "learning_rate": 1.1023622047244096e-06, "loss": 1.5124, "step": 2185 }, { "epoch": 0.9455784152045202, "grad_norm": 24.5733585357666, "learning_rate": 1.093613298337708e-06, "loss": 1.5007, "step": 2186 }, { "epoch": 0.9460109762361784, "grad_norm": 25.291410446166992, "learning_rate": 1.0848643919510061e-06, "loss": 1.4037, "step": 2187 }, { "epoch": 0.9464435372678364, "grad_norm": 22.40592384338379, "learning_rate": 1.0761154855643045e-06, "loss": 1.506, "step": 2188 }, { "epoch": 0.9468760982994945, "grad_norm": 25.94204330444336, "learning_rate": 1.0673665791776028e-06, "loss": 1.4871, "step": 2189 }, { "epoch": 0.9473086593311525, "grad_norm": 23.761117935180664, "learning_rate": 1.0586176727909012e-06, "loss": 1.5323, "step": 2190 }, { "epoch": 0.9477412203628106, "grad_norm": 25.79743766784668, "learning_rate": 1.0498687664041996e-06, "loss": 1.5, "step": 2191 }, { "epoch": 0.9481737813944686, "grad_norm": 28.8542423248291, "learning_rate": 1.041119860017498e-06, "loss": 1.4627, "step": 2192 }, { "epoch": 0.9486063424261267, "grad_norm": 24.279600143432617, "learning_rate": 1.0323709536307963e-06, "loss": 1.5207, "step": 2193 }, { "epoch": 0.9490389034577847, "grad_norm": 32.73613357543945, "learning_rate": 1.0236220472440946e-06, "loss": 1.5151, "step": 2194 }, { "epoch": 0.9494714644894429, "grad_norm": 25.603696823120117, "learning_rate": 1.014873140857393e-06, "loss": 1.4946, "step": 2195 }, { "epoch": 0.9499040255211009, "grad_norm": 29.79012680053711, "learning_rate": 1.0061242344706911e-06, "loss": 1.5321, "step": 2196 }, { "epoch": 0.9503365865527589, "grad_norm": 25.095794677734375, "learning_rate": 9.973753280839895e-07, "loss": 1.5347, "step": 2197 }, { "epoch": 0.950769147584417, "grad_norm": 24.920623779296875, "learning_rate": 9.886264216972879e-07, "loss": 1.4609, "step": 2198 }, { "epoch": 0.951201708616075, "grad_norm": 24.81159019470215, "learning_rate": 9.798775153105862e-07, "loss": 1.4245, "step": 2199 }, { "epoch": 0.9516342696477331, "grad_norm": 25.507144927978516, "learning_rate": 9.711286089238846e-07, "loss": 1.5234, "step": 2200 }, { "epoch": 0.9520668306793911, "grad_norm": 25.74333381652832, "learning_rate": 9.62379702537183e-07, "loss": 1.4334, "step": 2201 }, { "epoch": 0.9524993917110492, "grad_norm": 24.9305477142334, "learning_rate": 9.536307961504812e-07, "loss": 1.4945, "step": 2202 }, { "epoch": 0.9529319527427073, "grad_norm": 25.96180534362793, "learning_rate": 9.448818897637796e-07, "loss": 1.4937, "step": 2203 }, { "epoch": 0.9533645137743654, "grad_norm": 27.482013702392578, "learning_rate": 9.36132983377078e-07, "loss": 1.5735, "step": 2204 }, { "epoch": 0.9537970748060234, "grad_norm": 27.310945510864258, "learning_rate": 9.273840769903762e-07, "loss": 1.6501, "step": 2205 }, { "epoch": 0.9542296358376815, "grad_norm": 23.459266662597656, "learning_rate": 9.186351706036746e-07, "loss": 1.4727, "step": 2206 }, { "epoch": 0.9546621968693395, "grad_norm": 23.028839111328125, "learning_rate": 9.09886264216973e-07, "loss": 1.5571, "step": 2207 }, { "epoch": 0.9550947579009976, "grad_norm": 24.47319984436035, "learning_rate": 9.011373578302713e-07, "loss": 1.4652, "step": 2208 }, { "epoch": 0.9555273189326556, "grad_norm": 27.27988052368164, "learning_rate": 8.923884514435696e-07, "loss": 1.6036, "step": 2209 }, { "epoch": 0.9559598799643138, "grad_norm": 27.1883544921875, "learning_rate": 8.83639545056868e-07, "loss": 1.4815, "step": 2210 }, { "epoch": 0.9563924409959718, "grad_norm": 27.201946258544922, "learning_rate": 8.748906386701662e-07, "loss": 1.4541, "step": 2211 }, { "epoch": 0.9568250020276299, "grad_norm": 27.259296417236328, "learning_rate": 8.661417322834646e-07, "loss": 1.5087, "step": 2212 }, { "epoch": 0.9572575630592879, "grad_norm": 25.546985626220703, "learning_rate": 8.57392825896763e-07, "loss": 1.5417, "step": 2213 }, { "epoch": 0.957690124090946, "grad_norm": 26.166793823242188, "learning_rate": 8.486439195100612e-07, "loss": 1.4723, "step": 2214 }, { "epoch": 0.958122685122604, "grad_norm": 26.597272872924805, "learning_rate": 8.398950131233597e-07, "loss": 1.5238, "step": 2215 }, { "epoch": 0.9585552461542621, "grad_norm": 24.605998992919922, "learning_rate": 8.31146106736658e-07, "loss": 1.5571, "step": 2216 }, { "epoch": 0.9589878071859201, "grad_norm": 24.375625610351562, "learning_rate": 8.223972003499563e-07, "loss": 1.4745, "step": 2217 }, { "epoch": 0.9594203682175781, "grad_norm": 26.005847930908203, "learning_rate": 8.136482939632546e-07, "loss": 1.5447, "step": 2218 }, { "epoch": 0.9598529292492363, "grad_norm": 27.76963996887207, "learning_rate": 8.04899387576553e-07, "loss": 1.5089, "step": 2219 }, { "epoch": 0.9602854902808943, "grad_norm": 25.300994873046875, "learning_rate": 7.961504811898513e-07, "loss": 1.5734, "step": 2220 }, { "epoch": 0.9607180513125524, "grad_norm": 26.90599822998047, "learning_rate": 7.874015748031496e-07, "loss": 1.5282, "step": 2221 }, { "epoch": 0.9611506123442104, "grad_norm": 25.654613494873047, "learning_rate": 7.786526684164481e-07, "loss": 1.5522, "step": 2222 }, { "epoch": 0.9615831733758685, "grad_norm": 23.53044319152832, "learning_rate": 7.699037620297463e-07, "loss": 1.575, "step": 2223 }, { "epoch": 0.9620157344075265, "grad_norm": 25.62672996520996, "learning_rate": 7.611548556430447e-07, "loss": 1.5661, "step": 2224 }, { "epoch": 0.9624482954391846, "grad_norm": 24.500530242919922, "learning_rate": 7.524059492563431e-07, "loss": 1.5151, "step": 2225 }, { "epoch": 0.9628808564708426, "grad_norm": 25.327373504638672, "learning_rate": 7.436570428696413e-07, "loss": 1.5006, "step": 2226 }, { "epoch": 0.9633134175025008, "grad_norm": 28.519956588745117, "learning_rate": 7.349081364829397e-07, "loss": 1.5537, "step": 2227 }, { "epoch": 0.9637459785341588, "grad_norm": 25.847875595092773, "learning_rate": 7.26159230096238e-07, "loss": 1.4917, "step": 2228 }, { "epoch": 0.9641785395658169, "grad_norm": 22.345125198364258, "learning_rate": 7.174103237095363e-07, "loss": 1.4354, "step": 2229 }, { "epoch": 0.9646111005974749, "grad_norm": 24.792699813842773, "learning_rate": 7.086614173228346e-07, "loss": 1.4301, "step": 2230 }, { "epoch": 0.965043661629133, "grad_norm": 25.965551376342773, "learning_rate": 6.999125109361331e-07, "loss": 1.5495, "step": 2231 }, { "epoch": 0.965476222660791, "grad_norm": 26.518917083740234, "learning_rate": 6.911636045494314e-07, "loss": 1.5347, "step": 2232 }, { "epoch": 0.9659087836924491, "grad_norm": 22.920303344726562, "learning_rate": 6.824146981627297e-07, "loss": 1.4384, "step": 2233 }, { "epoch": 0.9663413447241072, "grad_norm": 23.388620376586914, "learning_rate": 6.736657917760281e-07, "loss": 1.4741, "step": 2234 }, { "epoch": 0.9667739057557653, "grad_norm": 22.90789222717285, "learning_rate": 6.649168853893263e-07, "loss": 1.4389, "step": 2235 }, { "epoch": 0.9672064667874233, "grad_norm": 27.07576560974121, "learning_rate": 6.561679790026247e-07, "loss": 1.5521, "step": 2236 }, { "epoch": 0.9676390278190814, "grad_norm": 25.001564025878906, "learning_rate": 6.474190726159231e-07, "loss": 1.5137, "step": 2237 }, { "epoch": 0.9680715888507394, "grad_norm": 23.1292724609375, "learning_rate": 6.386701662292213e-07, "loss": 1.4152, "step": 2238 }, { "epoch": 0.9685041498823975, "grad_norm": 24.822486877441406, "learning_rate": 6.299212598425198e-07, "loss": 1.4223, "step": 2239 }, { "epoch": 0.9689367109140555, "grad_norm": 23.44011116027832, "learning_rate": 6.21172353455818e-07, "loss": 1.5192, "step": 2240 }, { "epoch": 0.9693692719457135, "grad_norm": 25.917972564697266, "learning_rate": 6.124234470691164e-07, "loss": 1.5659, "step": 2241 }, { "epoch": 0.9698018329773717, "grad_norm": 24.592409133911133, "learning_rate": 6.036745406824148e-07, "loss": 1.5472, "step": 2242 }, { "epoch": 0.9702343940090297, "grad_norm": 25.710756301879883, "learning_rate": 5.94925634295713e-07, "loss": 1.4849, "step": 2243 }, { "epoch": 0.9706669550406878, "grad_norm": 30.253639221191406, "learning_rate": 5.861767279090115e-07, "loss": 1.5312, "step": 2244 }, { "epoch": 0.9710995160723458, "grad_norm": 27.50519371032715, "learning_rate": 5.774278215223097e-07, "loss": 1.54, "step": 2245 }, { "epoch": 0.9715320771040039, "grad_norm": 26.179685592651367, "learning_rate": 5.686789151356081e-07, "loss": 1.5439, "step": 2246 }, { "epoch": 0.9719646381356619, "grad_norm": 24.04256248474121, "learning_rate": 5.599300087489064e-07, "loss": 1.5165, "step": 2247 }, { "epoch": 0.97239719916732, "grad_norm": 25.094362258911133, "learning_rate": 5.511811023622048e-07, "loss": 1.5274, "step": 2248 }, { "epoch": 0.972829760198978, "grad_norm": 22.677230834960938, "learning_rate": 5.424321959755031e-07, "loss": 1.4912, "step": 2249 }, { "epoch": 0.9732623212306362, "grad_norm": 27.18683624267578, "learning_rate": 5.336832895888014e-07, "loss": 1.4782, "step": 2250 }, { "epoch": 0.9736948822622942, "grad_norm": 25.23105239868164, "learning_rate": 5.249343832020998e-07, "loss": 1.5043, "step": 2251 }, { "epoch": 0.9741274432939523, "grad_norm": 24.919349670410156, "learning_rate": 5.161854768153981e-07, "loss": 1.5297, "step": 2252 }, { "epoch": 0.9745600043256103, "grad_norm": 26.34563636779785, "learning_rate": 5.074365704286965e-07, "loss": 1.4319, "step": 2253 }, { "epoch": 0.9749925653572684, "grad_norm": 24.52495002746582, "learning_rate": 4.986876640419948e-07, "loss": 1.6465, "step": 2254 }, { "epoch": 0.9754251263889264, "grad_norm": 24.07614517211914, "learning_rate": 4.899387576552931e-07, "loss": 1.6281, "step": 2255 }, { "epoch": 0.9758576874205845, "grad_norm": 26.345617294311523, "learning_rate": 4.811898512685915e-07, "loss": 1.5466, "step": 2256 }, { "epoch": 0.9762902484522425, "grad_norm": 27.2408390045166, "learning_rate": 4.724409448818898e-07, "loss": 1.5557, "step": 2257 }, { "epoch": 0.9767228094839007, "grad_norm": 23.67082977294922, "learning_rate": 4.636920384951881e-07, "loss": 1.5431, "step": 2258 }, { "epoch": 0.9771553705155587, "grad_norm": 23.821247100830078, "learning_rate": 4.549431321084865e-07, "loss": 1.5224, "step": 2259 }, { "epoch": 0.9775879315472168, "grad_norm": 24.50645637512207, "learning_rate": 4.461942257217848e-07, "loss": 1.4193, "step": 2260 }, { "epoch": 0.9780204925788748, "grad_norm": 24.018909454345703, "learning_rate": 4.374453193350831e-07, "loss": 1.5108, "step": 2261 }, { "epoch": 0.9784530536105328, "grad_norm": 24.982769012451172, "learning_rate": 4.286964129483815e-07, "loss": 1.4694, "step": 2262 }, { "epoch": 0.9788856146421909, "grad_norm": 26.493566513061523, "learning_rate": 4.1994750656167983e-07, "loss": 1.4441, "step": 2263 }, { "epoch": 0.9793181756738489, "grad_norm": 22.410762786865234, "learning_rate": 4.1119860017497814e-07, "loss": 1.5065, "step": 2264 }, { "epoch": 0.979750736705507, "grad_norm": 24.40163230895996, "learning_rate": 4.024496937882765e-07, "loss": 1.5389, "step": 2265 }, { "epoch": 0.9801832977371651, "grad_norm": 26.319671630859375, "learning_rate": 3.937007874015748e-07, "loss": 1.4334, "step": 2266 }, { "epoch": 0.9806158587688232, "grad_norm": 22.440717697143555, "learning_rate": 3.8495188101487317e-07, "loss": 1.5234, "step": 2267 }, { "epoch": 0.9810484198004812, "grad_norm": 23.569488525390625, "learning_rate": 3.7620297462817153e-07, "loss": 1.4278, "step": 2268 }, { "epoch": 0.9814809808321393, "grad_norm": 24.376298904418945, "learning_rate": 3.6745406824146983e-07, "loss": 1.5538, "step": 2269 }, { "epoch": 0.9819135418637973, "grad_norm": 25.128238677978516, "learning_rate": 3.5870516185476814e-07, "loss": 1.5044, "step": 2270 }, { "epoch": 0.9823461028954554, "grad_norm": 24.355140686035156, "learning_rate": 3.4995625546806655e-07, "loss": 1.4869, "step": 2271 }, { "epoch": 0.9827786639271134, "grad_norm": 26.226696014404297, "learning_rate": 3.4120734908136486e-07, "loss": 1.4047, "step": 2272 }, { "epoch": 0.9832112249587716, "grad_norm": 24.68053436279297, "learning_rate": 3.3245844269466317e-07, "loss": 1.4378, "step": 2273 }, { "epoch": 0.9836437859904296, "grad_norm": 23.87020492553711, "learning_rate": 3.2370953630796153e-07, "loss": 1.4567, "step": 2274 }, { "epoch": 0.9840763470220877, "grad_norm": 26.111167907714844, "learning_rate": 3.149606299212599e-07, "loss": 1.4861, "step": 2275 }, { "epoch": 0.9845089080537457, "grad_norm": 24.464523315429688, "learning_rate": 3.062117235345582e-07, "loss": 1.4386, "step": 2276 }, { "epoch": 0.9849414690854038, "grad_norm": 25.67591094970703, "learning_rate": 2.974628171478565e-07, "loss": 1.4829, "step": 2277 }, { "epoch": 0.9853740301170618, "grad_norm": 25.558279037475586, "learning_rate": 2.8871391076115486e-07, "loss": 1.487, "step": 2278 }, { "epoch": 0.9858065911487199, "grad_norm": 27.101078033447266, "learning_rate": 2.799650043744532e-07, "loss": 1.5293, "step": 2279 }, { "epoch": 0.9862391521803779, "grad_norm": 23.139122009277344, "learning_rate": 2.7121609798775153e-07, "loss": 1.491, "step": 2280 }, { "epoch": 0.9866717132120361, "grad_norm": 24.572038650512695, "learning_rate": 2.624671916010499e-07, "loss": 1.4598, "step": 2281 }, { "epoch": 0.9871042742436941, "grad_norm": 24.457639694213867, "learning_rate": 2.5371828521434825e-07, "loss": 1.5327, "step": 2282 }, { "epoch": 0.9875368352753522, "grad_norm": 26.919748306274414, "learning_rate": 2.4496937882764656e-07, "loss": 1.5191, "step": 2283 }, { "epoch": 0.9879693963070102, "grad_norm": 24.509489059448242, "learning_rate": 2.362204724409449e-07, "loss": 1.5107, "step": 2284 }, { "epoch": 0.9884019573386682, "grad_norm": 24.133081436157227, "learning_rate": 2.2747156605424325e-07, "loss": 1.4597, "step": 2285 }, { "epoch": 0.9888345183703263, "grad_norm": 27.456026077270508, "learning_rate": 2.1872265966754156e-07, "loss": 1.5723, "step": 2286 }, { "epoch": 0.9892670794019843, "grad_norm": 25.23147964477539, "learning_rate": 2.0997375328083992e-07, "loss": 1.472, "step": 2287 }, { "epoch": 0.9896996404336424, "grad_norm": 25.515771865844727, "learning_rate": 2.0122484689413825e-07, "loss": 1.5353, "step": 2288 }, { "epoch": 0.9901322014653005, "grad_norm": 25.060243606567383, "learning_rate": 1.9247594050743658e-07, "loss": 1.4872, "step": 2289 }, { "epoch": 0.9905647624969586, "grad_norm": 25.204627990722656, "learning_rate": 1.8372703412073492e-07, "loss": 1.5119, "step": 2290 }, { "epoch": 0.9909973235286166, "grad_norm": 26.539810180664062, "learning_rate": 1.7497812773403328e-07, "loss": 1.5112, "step": 2291 }, { "epoch": 0.9914298845602747, "grad_norm": 25.467899322509766, "learning_rate": 1.6622922134733158e-07, "loss": 1.4897, "step": 2292 }, { "epoch": 0.9918624455919327, "grad_norm": 26.030517578125, "learning_rate": 1.5748031496062994e-07, "loss": 1.5333, "step": 2293 }, { "epoch": 0.9922950066235908, "grad_norm": 26.485551834106445, "learning_rate": 1.4873140857392825e-07, "loss": 1.5446, "step": 2294 }, { "epoch": 0.9927275676552488, "grad_norm": 26.391727447509766, "learning_rate": 1.399825021872266e-07, "loss": 1.4711, "step": 2295 }, { "epoch": 0.993160128686907, "grad_norm": 23.88796615600586, "learning_rate": 1.3123359580052494e-07, "loss": 1.4731, "step": 2296 }, { "epoch": 0.993592689718565, "grad_norm": 24.405637741088867, "learning_rate": 1.2248468941382328e-07, "loss": 1.5611, "step": 2297 }, { "epoch": 0.9940252507502231, "grad_norm": 26.79961585998535, "learning_rate": 1.1373578302712162e-07, "loss": 1.547, "step": 2298 }, { "epoch": 0.9944578117818811, "grad_norm": 27.189603805541992, "learning_rate": 1.0498687664041996e-07, "loss": 1.5443, "step": 2299 }, { "epoch": 0.9948903728135392, "grad_norm": 27.315834045410156, "learning_rate": 9.623797025371829e-08, "loss": 1.3623, "step": 2300 }, { "epoch": 0.9953229338451972, "grad_norm": 27.901859283447266, "learning_rate": 8.748906386701664e-08, "loss": 1.6198, "step": 2301 }, { "epoch": 0.9957554948768553, "grad_norm": 26.05302619934082, "learning_rate": 7.874015748031497e-08, "loss": 1.4631, "step": 2302 }, { "epoch": 0.9961880559085133, "grad_norm": 25.083433151245117, "learning_rate": 6.99912510936133e-08, "loss": 1.5496, "step": 2303 }, { "epoch": 0.9966206169401715, "grad_norm": 25.853899002075195, "learning_rate": 6.124234470691164e-08, "loss": 1.5728, "step": 2304 }, { "epoch": 0.9970531779718295, "grad_norm": 24.412639617919922, "learning_rate": 5.249343832020998e-08, "loss": 1.5394, "step": 2305 }, { "epoch": 0.9974857390034875, "grad_norm": 26.720706939697266, "learning_rate": 4.374453193350832e-08, "loss": 1.4967, "step": 2306 }, { "epoch": 0.9979183000351456, "grad_norm": 27.0684814453125, "learning_rate": 3.499562554680665e-08, "loss": 1.5429, "step": 2307 }, { "epoch": 0.9983508610668036, "grad_norm": 24.51413917541504, "learning_rate": 2.624671916010499e-08, "loss": 1.5263, "step": 2308 }, { "epoch": 0.9987834220984617, "grad_norm": 27.829641342163086, "learning_rate": 1.7497812773403326e-08, "loss": 1.492, "step": 2309 }, { "epoch": 0.9992159831301197, "grad_norm": 24.01323127746582, "learning_rate": 8.748906386701663e-09, "loss": 1.5018, "step": 2310 }, { "epoch": 0.9996485441617778, "grad_norm": 24.6170597076416, "learning_rate": 0.0, "loss": 1.533, "step": 2311 } ], "logging_steps": 1, "max_steps": 2311, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.868910410465792e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }