{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9994323557237466, "eval_steps": 500, "global_step": 3963, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0007568590350047304, "grad_norm": 123.05032348632812, "learning_rate": 0.0, "loss": 2.0667, "step": 1 }, { "epoch": 0.0015137180700094607, "grad_norm": 59.42024612426758, "learning_rate": 3.5436764027111585e-06, "loss": 1.9891, "step": 2 }, { "epoch": 0.002270577105014191, "grad_norm": 74.41653442382812, "learning_rate": 5.61659421298763e-06, "loss": 1.9745, "step": 3 }, { "epoch": 0.0030274361400189215, "grad_norm": 65.0828857421875, "learning_rate": 7.087352805422317e-06, "loss": 1.9442, "step": 4 }, { "epoch": 0.003784295175023652, "grad_norm": 77.46288299560547, "learning_rate": 8.228161798644422e-06, "loss": 1.911, "step": 5 }, { "epoch": 0.004541154210028382, "grad_norm": 140.7876434326172, "learning_rate": 9.160270615698787e-06, "loss": 1.9, "step": 6 }, { "epoch": 0.005298013245033113, "grad_norm": 39.34813690185547, "learning_rate": 9.948357391330555e-06, "loss": 1.8421, "step": 7 }, { "epoch": 0.006054872280037843, "grad_norm": 42.30311584472656, "learning_rate": 1.0631029208133474e-05, "loss": 1.8634, "step": 8 }, { "epoch": 0.006811731315042573, "grad_norm": 56.799530029296875, "learning_rate": 1.123318842597526e-05, "loss": 1.8391, "step": 9 }, { "epoch": 0.007568590350047304, "grad_norm": 74.55519104003906, "learning_rate": 1.1771838201355582e-05, "loss": 1.7531, "step": 10 }, { "epoch": 0.008325449385052034, "grad_norm": 31.099952697753906, "learning_rate": 1.2259106193757859e-05, "loss": 1.7675, "step": 11 }, { "epoch": 0.009082308420056764, "grad_norm": 49.344966888427734, "learning_rate": 1.2703947018409945e-05, "loss": 1.7896, "step": 12 }, { "epoch": 0.009839167455061495, "grad_norm": 48.00835418701172, "learning_rate": 1.311316090883172e-05, "loss": 1.8585, "step": 13 }, { "epoch": 0.010596026490066225, "grad_norm": 38.080078125, "learning_rate": 1.3492033794041713e-05, "loss": 1.7329, "step": 14 }, { "epoch": 0.011352885525070956, "grad_norm": 45.796382904052734, "learning_rate": 1.384475601163205e-05, "loss": 1.8033, "step": 15 }, { "epoch": 0.012109744560075686, "grad_norm": 35.87776184082031, "learning_rate": 1.4174705610844634e-05, "loss": 1.7784, "step": 16 }, { "epoch": 0.012866603595080416, "grad_norm": 25.678325653076172, "learning_rate": 1.4484645617497535e-05, "loss": 1.7741, "step": 17 }, { "epoch": 0.013623462630085147, "grad_norm": 28.66301918029785, "learning_rate": 1.4776864828686414e-05, "loss": 1.7957, "step": 18 }, { "epoch": 0.014380321665089877, "grad_norm": 37.723976135253906, "learning_rate": 1.505328048981752e-05, "loss": 1.695, "step": 19 }, { "epoch": 0.015137180700094607, "grad_norm": 36.247718811035156, "learning_rate": 1.5315514604066738e-05, "loss": 1.7189, "step": 20 }, { "epoch": 0.015894039735099338, "grad_norm": 23.032033920288086, "learning_rate": 1.5564951604318184e-05, "loss": 1.6817, "step": 21 }, { "epoch": 0.016650898770104068, "grad_norm": 28.1435489654541, "learning_rate": 1.580278259646902e-05, "loss": 1.7185, "step": 22 }, { "epoch": 0.0174077578051088, "grad_norm": 75.195068359375, "learning_rate": 1.603003975988117e-05, "loss": 1.7109, "step": 23 }, { "epoch": 0.01816461684011353, "grad_norm": 30.104032516479492, "learning_rate": 1.6247623421121105e-05, "loss": 1.6333, "step": 24 }, { "epoch": 0.01892147587511826, "grad_norm": 24.25992774963379, "learning_rate": 1.6456323597288844e-05, "loss": 1.6416, "step": 25 }, { "epoch": 0.01967833491012299, "grad_norm": 28.712949752807617, "learning_rate": 1.6656837311542876e-05, "loss": 1.6712, "step": 26 }, { "epoch": 0.02043519394512772, "grad_norm": 26.72446060180664, "learning_rate": 1.6849782638962885e-05, "loss": 1.5939, "step": 27 }, { "epoch": 0.02119205298013245, "grad_norm": 20.644784927368164, "learning_rate": 1.7035710196752873e-05, "loss": 1.6718, "step": 28 }, { "epoch": 0.02194891201513718, "grad_norm": 24.643821716308594, "learning_rate": 1.7215112617252848e-05, "loss": 1.6778, "step": 29 }, { "epoch": 0.02270577105014191, "grad_norm": 22.256315231323242, "learning_rate": 1.738843241434321e-05, "loss": 1.6233, "step": 30 }, { "epoch": 0.02346263008514664, "grad_norm": 24.24241065979004, "learning_rate": 1.7556068559516658e-05, "loss": 1.6744, "step": 31 }, { "epoch": 0.024219489120151372, "grad_norm": 37.310150146484375, "learning_rate": 1.7718382013555794e-05, "loss": 1.6556, "step": 32 }, { "epoch": 0.024976348155156102, "grad_norm": 48.23684310913086, "learning_rate": 1.7875700406745488e-05, "loss": 1.6575, "step": 33 }, { "epoch": 0.025733207190160833, "grad_norm": 24.166748046875, "learning_rate": 1.8028322020208693e-05, "loss": 1.6946, "step": 34 }, { "epoch": 0.026490066225165563, "grad_norm": 29.902538299560547, "learning_rate": 1.817651918997498e-05, "loss": 1.6453, "step": 35 }, { "epoch": 0.027246925260170293, "grad_norm": 27.081722259521484, "learning_rate": 1.8320541231397574e-05, "loss": 1.5933, "step": 36 }, { "epoch": 0.028003784295175024, "grad_norm": 28.04783058166504, "learning_rate": 1.8460616962803535e-05, "loss": 1.676, "step": 37 }, { "epoch": 0.028760643330179754, "grad_norm": 49.34148406982422, "learning_rate": 1.859695689252868e-05, "loss": 1.7431, "step": 38 }, { "epoch": 0.029517502365184484, "grad_norm": 32.92803192138672, "learning_rate": 1.872975512181935e-05, "loss": 1.7004, "step": 39 }, { "epoch": 0.030274361400189215, "grad_norm": 28.5103816986084, "learning_rate": 1.8859191006777896e-05, "loss": 1.646, "step": 40 }, { "epoch": 0.031031220435193945, "grad_norm": 16.540956497192383, "learning_rate": 1.8985430615062968e-05, "loss": 1.6601, "step": 41 }, { "epoch": 0.031788079470198675, "grad_norm": 26.588886260986328, "learning_rate": 1.9108628007029345e-05, "loss": 1.6273, "step": 42 }, { "epoch": 0.03254493850520341, "grad_norm": 42.97163009643555, "learning_rate": 1.9228926366101076e-05, "loss": 1.5573, "step": 43 }, { "epoch": 0.033301797540208136, "grad_norm": 40.343658447265625, "learning_rate": 1.9346458999180177e-05, "loss": 1.5821, "step": 44 }, { "epoch": 0.03405865657521287, "grad_norm": 41.81525421142578, "learning_rate": 1.946135022461968e-05, "loss": 1.5927, "step": 45 }, { "epoch": 0.0348155156102176, "grad_norm": 24.463436126708984, "learning_rate": 1.9573716162592327e-05, "loss": 1.6377, "step": 46 }, { "epoch": 0.03557237464522233, "grad_norm": 21.16547203063965, "learning_rate": 1.9683665440452515e-05, "loss": 1.6151, "step": 47 }, { "epoch": 0.03632923368022706, "grad_norm": 75.09718322753906, "learning_rate": 1.9791299823832263e-05, "loss": 1.6261, "step": 48 }, { "epoch": 0.03708609271523179, "grad_norm": 30.04339027404785, "learning_rate": 1.989671478266111e-05, "loss": 1.6229, "step": 49 }, { "epoch": 0.03784295175023652, "grad_norm": 100.00825500488281, "learning_rate": 2e-05, "loss": 1.6116, "step": 50 }, { "epoch": 0.03859981078524125, "grad_norm": 28.68238639831543, "learning_rate": 1.9999996777398546e-05, "loss": 1.6644, "step": 51 }, { "epoch": 0.03935666982024598, "grad_norm": 68.90675354003906, "learning_rate": 1.9999987109596254e-05, "loss": 1.6159, "step": 52 }, { "epoch": 0.04011352885525071, "grad_norm": 81.32110595703125, "learning_rate": 1.999997099659936e-05, "loss": 1.7117, "step": 53 }, { "epoch": 0.04087038789025544, "grad_norm": 171.38938903808594, "learning_rate": 1.999994843841825e-05, "loss": 1.5922, "step": 54 }, { "epoch": 0.041627246925260174, "grad_norm": 43.32768249511719, "learning_rate": 1.9999919435067465e-05, "loss": 1.6245, "step": 55 }, { "epoch": 0.0423841059602649, "grad_norm": 74.8743896484375, "learning_rate": 1.9999883986565696e-05, "loss": 1.6613, "step": 56 }, { "epoch": 0.043140964995269634, "grad_norm": 18.4515438079834, "learning_rate": 1.9999842092935797e-05, "loss": 1.6048, "step": 57 }, { "epoch": 0.04389782403027436, "grad_norm": 22.151155471801758, "learning_rate": 1.999979375420477e-05, "loss": 1.632, "step": 58 }, { "epoch": 0.044654683065279095, "grad_norm": 19.156835556030273, "learning_rate": 1.9999738970403775e-05, "loss": 1.6189, "step": 59 }, { "epoch": 0.04541154210028382, "grad_norm": 18.256269454956055, "learning_rate": 1.999967774156812e-05, "loss": 1.6306, "step": 60 }, { "epoch": 0.046168401135288556, "grad_norm": 26.946420669555664, "learning_rate": 1.999961006773728e-05, "loss": 1.6411, "step": 61 }, { "epoch": 0.04692526017029328, "grad_norm": 23.500892639160156, "learning_rate": 1.999953594895487e-05, "loss": 1.5876, "step": 62 }, { "epoch": 0.04768211920529802, "grad_norm": 12.633379936218262, "learning_rate": 1.999945538526867e-05, "loss": 1.601, "step": 63 }, { "epoch": 0.048438978240302744, "grad_norm": 15.04751205444336, "learning_rate": 1.999936837673061e-05, "loss": 1.6078, "step": 64 }, { "epoch": 0.04919583727530748, "grad_norm": 11.406414985656738, "learning_rate": 1.999927492339677e-05, "loss": 1.5959, "step": 65 }, { "epoch": 0.049952696310312204, "grad_norm": 11.040087699890137, "learning_rate": 1.9999175025327395e-05, "loss": 1.6025, "step": 66 }, { "epoch": 0.05070955534531694, "grad_norm": 10.689179420471191, "learning_rate": 1.999906868258687e-05, "loss": 1.5797, "step": 67 }, { "epoch": 0.051466414380321665, "grad_norm": 8.19336986541748, "learning_rate": 1.9998955895243748e-05, "loss": 1.5564, "step": 68 }, { "epoch": 0.0522232734153264, "grad_norm": 13.38058853149414, "learning_rate": 1.9998836663370726e-05, "loss": 1.5584, "step": 69 }, { "epoch": 0.052980132450331126, "grad_norm": 7.053563594818115, "learning_rate": 1.9998710987044664e-05, "loss": 1.5005, "step": 70 }, { "epoch": 0.05373699148533586, "grad_norm": 6.7353105545043945, "learning_rate": 1.9998578866346564e-05, "loss": 1.5306, "step": 71 }, { "epoch": 0.054493850520340587, "grad_norm": 5.975197792053223, "learning_rate": 1.9998440301361598e-05, "loss": 1.5885, "step": 72 }, { "epoch": 0.05525070955534532, "grad_norm": 6.6494011878967285, "learning_rate": 1.9998295292179073e-05, "loss": 1.532, "step": 73 }, { "epoch": 0.05600756859035005, "grad_norm": 5.434142589569092, "learning_rate": 1.9998143838892468e-05, "loss": 1.5764, "step": 74 }, { "epoch": 0.05676442762535478, "grad_norm": 6.099053859710693, "learning_rate": 1.99979859415994e-05, "loss": 1.5187, "step": 75 }, { "epoch": 0.05752128666035951, "grad_norm": 5.7470855712890625, "learning_rate": 1.999782160040166e-05, "loss": 1.5377, "step": 76 }, { "epoch": 0.05827814569536424, "grad_norm": 5.577144145965576, "learning_rate": 1.9997650815405167e-05, "loss": 1.4817, "step": 77 }, { "epoch": 0.05903500473036897, "grad_norm": 5.294313907623291, "learning_rate": 1.999747358672001e-05, "loss": 1.4812, "step": 78 }, { "epoch": 0.0597918637653737, "grad_norm": 5.860252380371094, "learning_rate": 1.9997289914460428e-05, "loss": 1.524, "step": 79 }, { "epoch": 0.06054872280037843, "grad_norm": 9.259795188903809, "learning_rate": 1.9997099798744815e-05, "loss": 1.4869, "step": 80 }, { "epoch": 0.06130558183538316, "grad_norm": 5.9855852127075195, "learning_rate": 1.999690323969571e-05, "loss": 1.5187, "step": 81 }, { "epoch": 0.06206244087038789, "grad_norm": 6.138685703277588, "learning_rate": 1.9996700237439823e-05, "loss": 1.5468, "step": 82 }, { "epoch": 0.06281929990539262, "grad_norm": 6.596303939819336, "learning_rate": 1.9996490792107997e-05, "loss": 1.4899, "step": 83 }, { "epoch": 0.06357615894039735, "grad_norm": 5.794029712677002, "learning_rate": 1.9996274903835247e-05, "loss": 1.4633, "step": 84 }, { "epoch": 0.06433301797540208, "grad_norm": 5.815922260284424, "learning_rate": 1.9996052572760723e-05, "loss": 1.4968, "step": 85 }, { "epoch": 0.06508987701040682, "grad_norm": 11.239161491394043, "learning_rate": 1.9995823799027737e-05, "loss": 1.4604, "step": 86 }, { "epoch": 0.06584673604541154, "grad_norm": 5.367482662200928, "learning_rate": 1.9995588582783753e-05, "loss": 1.518, "step": 87 }, { "epoch": 0.06660359508041627, "grad_norm": 19.351980209350586, "learning_rate": 1.9995346924180394e-05, "loss": 1.5267, "step": 88 }, { "epoch": 0.067360454115421, "grad_norm": 6.7129316329956055, "learning_rate": 1.999509882337342e-05, "loss": 1.4639, "step": 89 }, { "epoch": 0.06811731315042574, "grad_norm": 6.777989387512207, "learning_rate": 1.999484428052276e-05, "loss": 1.5127, "step": 90 }, { "epoch": 0.06887417218543046, "grad_norm": 14.996123313903809, "learning_rate": 1.9994583295792487e-05, "loss": 1.5277, "step": 91 }, { "epoch": 0.0696310312204352, "grad_norm": 6.6374311447143555, "learning_rate": 1.9994315869350826e-05, "loss": 1.4834, "step": 92 }, { "epoch": 0.07038789025543993, "grad_norm": 11.19003963470459, "learning_rate": 1.9994042001370154e-05, "loss": 1.5084, "step": 93 }, { "epoch": 0.07114474929044466, "grad_norm": 6.2547407150268555, "learning_rate": 1.9993761692027007e-05, "loss": 1.485, "step": 94 }, { "epoch": 0.07190160832544938, "grad_norm": 6.645302772521973, "learning_rate": 1.9993474941502067e-05, "loss": 1.463, "step": 95 }, { "epoch": 0.07265846736045412, "grad_norm": 7.073038578033447, "learning_rate": 1.9993181749980168e-05, "loss": 1.509, "step": 96 }, { "epoch": 0.07341532639545885, "grad_norm": 6.401993274688721, "learning_rate": 1.99928821176503e-05, "loss": 1.4958, "step": 97 }, { "epoch": 0.07417218543046358, "grad_norm": 6.126581192016602, "learning_rate": 1.9992576044705596e-05, "loss": 1.4449, "step": 98 }, { "epoch": 0.0749290444654683, "grad_norm": 8.766273498535156, "learning_rate": 1.9992263531343348e-05, "loss": 1.5218, "step": 99 }, { "epoch": 0.07568590350047304, "grad_norm": 5.65410852432251, "learning_rate": 1.9991944577764996e-05, "loss": 1.5205, "step": 100 }, { "epoch": 0.07644276253547777, "grad_norm": 5.447603702545166, "learning_rate": 1.9991619184176136e-05, "loss": 1.4651, "step": 101 }, { "epoch": 0.0771996215704825, "grad_norm": 5.317190647125244, "learning_rate": 1.9991287350786512e-05, "loss": 1.5059, "step": 102 }, { "epoch": 0.07795648060548722, "grad_norm": 5.233520984649658, "learning_rate": 1.9990949077810015e-05, "loss": 1.4556, "step": 103 }, { "epoch": 0.07871333964049196, "grad_norm": 4.955499649047852, "learning_rate": 1.9990604365464693e-05, "loss": 1.4236, "step": 104 }, { "epoch": 0.07947019867549669, "grad_norm": 9.175353050231934, "learning_rate": 1.9990253213972742e-05, "loss": 1.4482, "step": 105 }, { "epoch": 0.08022705771050143, "grad_norm": 5.2216057777404785, "learning_rate": 1.998989562356051e-05, "loss": 1.4902, "step": 106 }, { "epoch": 0.08098391674550615, "grad_norm": 4.395474910736084, "learning_rate": 1.9989531594458487e-05, "loss": 1.4419, "step": 107 }, { "epoch": 0.08174077578051088, "grad_norm": 4.641335487365723, "learning_rate": 1.998916112690133e-05, "loss": 1.4715, "step": 108 }, { "epoch": 0.08249763481551561, "grad_norm": 5.315745830535889, "learning_rate": 1.9988784221127834e-05, "loss": 1.4742, "step": 109 }, { "epoch": 0.08325449385052035, "grad_norm": 5.404274940490723, "learning_rate": 1.998840087738095e-05, "loss": 1.4579, "step": 110 }, { "epoch": 0.08401135288552507, "grad_norm": 4.288702011108398, "learning_rate": 1.9988011095907768e-05, "loss": 1.49, "step": 111 }, { "epoch": 0.0847682119205298, "grad_norm": 4.434887409210205, "learning_rate": 1.9987614876959536e-05, "loss": 1.4946, "step": 112 }, { "epoch": 0.08552507095553454, "grad_norm": 5.428564071655273, "learning_rate": 1.9987212220791657e-05, "loss": 1.3817, "step": 113 }, { "epoch": 0.08628192999053927, "grad_norm": 3.9893720149993896, "learning_rate": 1.9986803127663672e-05, "loss": 1.4428, "step": 114 }, { "epoch": 0.08703878902554399, "grad_norm": 4.35543966293335, "learning_rate": 1.998638759783928e-05, "loss": 1.3801, "step": 115 }, { "epoch": 0.08779564806054872, "grad_norm": 4.2772722244262695, "learning_rate": 1.9985965631586318e-05, "loss": 1.3975, "step": 116 }, { "epoch": 0.08855250709555346, "grad_norm": 4.769036769866943, "learning_rate": 1.9985537229176787e-05, "loss": 1.4413, "step": 117 }, { "epoch": 0.08930936613055819, "grad_norm": 4.7659759521484375, "learning_rate": 1.9985102390886825e-05, "loss": 1.4665, "step": 118 }, { "epoch": 0.09006622516556291, "grad_norm": 5.218923091888428, "learning_rate": 1.9984661116996723e-05, "loss": 1.4544, "step": 119 }, { "epoch": 0.09082308420056764, "grad_norm": 4.296699047088623, "learning_rate": 1.9984213407790924e-05, "loss": 1.3944, "step": 120 }, { "epoch": 0.09157994323557238, "grad_norm": 3.866936683654785, "learning_rate": 1.9983759263558003e-05, "loss": 1.4273, "step": 121 }, { "epoch": 0.09233680227057711, "grad_norm": 4.711172103881836, "learning_rate": 1.99832986845907e-05, "loss": 1.4208, "step": 122 }, { "epoch": 0.09309366130558183, "grad_norm": 4.553902626037598, "learning_rate": 1.9982831671185905e-05, "loss": 1.525, "step": 123 }, { "epoch": 0.09385052034058657, "grad_norm": 4.0878801345825195, "learning_rate": 1.9982358223644635e-05, "loss": 1.4621, "step": 124 }, { "epoch": 0.0946073793755913, "grad_norm": 4.239192485809326, "learning_rate": 1.9981878342272074e-05, "loss": 1.4151, "step": 125 }, { "epoch": 0.09536423841059603, "grad_norm": 3.9742391109466553, "learning_rate": 1.9981392027377548e-05, "loss": 1.4588, "step": 126 }, { "epoch": 0.09612109744560075, "grad_norm": 4.459286212921143, "learning_rate": 1.9980899279274523e-05, "loss": 1.414, "step": 127 }, { "epoch": 0.09687795648060549, "grad_norm": 4.164027214050293, "learning_rate": 1.9980400098280622e-05, "loss": 1.4572, "step": 128 }, { "epoch": 0.09763481551561022, "grad_norm": 4.778876304626465, "learning_rate": 1.9979894484717604e-05, "loss": 1.4279, "step": 129 }, { "epoch": 0.09839167455061495, "grad_norm": 4.639044761657715, "learning_rate": 1.9979382438911383e-05, "loss": 1.4343, "step": 130 }, { "epoch": 0.09914853358561967, "grad_norm": 4.090446949005127, "learning_rate": 1.9978863961192018e-05, "loss": 1.4802, "step": 131 }, { "epoch": 0.09990539262062441, "grad_norm": 4.360771656036377, "learning_rate": 1.9978339051893702e-05, "loss": 1.4552, "step": 132 }, { "epoch": 0.10066225165562914, "grad_norm": 3.851464033126831, "learning_rate": 1.9977807711354796e-05, "loss": 1.3779, "step": 133 }, { "epoch": 0.10141911069063388, "grad_norm": 4.016122341156006, "learning_rate": 1.997726993991779e-05, "loss": 1.4313, "step": 134 }, { "epoch": 0.1021759697256386, "grad_norm": 4.009467124938965, "learning_rate": 1.997672573792932e-05, "loss": 1.491, "step": 135 }, { "epoch": 0.10293282876064333, "grad_norm": 3.8596322536468506, "learning_rate": 1.997617510574018e-05, "loss": 1.4724, "step": 136 }, { "epoch": 0.10368968779564806, "grad_norm": 3.6011574268341064, "learning_rate": 1.9975618043705282e-05, "loss": 1.3931, "step": 137 }, { "epoch": 0.1044465468306528, "grad_norm": 4.025736331939697, "learning_rate": 1.997505455218371e-05, "loss": 1.4269, "step": 138 }, { "epoch": 0.10520340586565752, "grad_norm": 3.760977268218994, "learning_rate": 1.9974484631538685e-05, "loss": 1.4311, "step": 139 }, { "epoch": 0.10596026490066225, "grad_norm": 4.554644584655762, "learning_rate": 1.9973908282137565e-05, "loss": 1.4535, "step": 140 }, { "epoch": 0.10671712393566699, "grad_norm": 4.12142276763916, "learning_rate": 1.9973325504351856e-05, "loss": 1.4111, "step": 141 }, { "epoch": 0.10747398297067172, "grad_norm": 3.9459025859832764, "learning_rate": 1.9972736298557207e-05, "loss": 1.4424, "step": 142 }, { "epoch": 0.10823084200567644, "grad_norm": 3.65413236618042, "learning_rate": 1.9972140665133412e-05, "loss": 1.3589, "step": 143 }, { "epoch": 0.10898770104068117, "grad_norm": 3.935250997543335, "learning_rate": 1.997153860446441e-05, "loss": 1.3985, "step": 144 }, { "epoch": 0.1097445600756859, "grad_norm": 4.394814968109131, "learning_rate": 1.9970930116938273e-05, "loss": 1.4304, "step": 145 }, { "epoch": 0.11050141911069064, "grad_norm": 3.6491141319274902, "learning_rate": 1.997031520294723e-05, "loss": 1.3928, "step": 146 }, { "epoch": 0.11125827814569536, "grad_norm": 4.235386848449707, "learning_rate": 1.9969693862887643e-05, "loss": 1.4712, "step": 147 }, { "epoch": 0.1120151371807001, "grad_norm": 4.189138412475586, "learning_rate": 1.996906609716002e-05, "loss": 1.3609, "step": 148 }, { "epoch": 0.11277199621570483, "grad_norm": 3.729450225830078, "learning_rate": 1.9968431906169005e-05, "loss": 1.4229, "step": 149 }, { "epoch": 0.11352885525070956, "grad_norm": 3.915863513946533, "learning_rate": 1.996779129032339e-05, "loss": 1.3628, "step": 150 }, { "epoch": 0.11428571428571428, "grad_norm": 4.461569786071777, "learning_rate": 1.9967144250036104e-05, "loss": 1.4087, "step": 151 }, { "epoch": 0.11504257332071902, "grad_norm": 4.412698745727539, "learning_rate": 1.9966490785724223e-05, "loss": 1.4392, "step": 152 }, { "epoch": 0.11579943235572375, "grad_norm": 4.236743450164795, "learning_rate": 1.9965830897808954e-05, "loss": 1.4391, "step": 153 }, { "epoch": 0.11655629139072848, "grad_norm": 4.672597408294678, "learning_rate": 1.996516458671566e-05, "loss": 1.3995, "step": 154 }, { "epoch": 0.1173131504257332, "grad_norm": 5.059709072113037, "learning_rate": 1.9964491852873833e-05, "loss": 1.3566, "step": 155 }, { "epoch": 0.11807000946073794, "grad_norm": 4.973750114440918, "learning_rate": 1.99638126967171e-05, "loss": 1.3993, "step": 156 }, { "epoch": 0.11882686849574267, "grad_norm": 4.362597942352295, "learning_rate": 1.996312711868324e-05, "loss": 1.4254, "step": 157 }, { "epoch": 0.1195837275307474, "grad_norm": 4.407685279846191, "learning_rate": 1.9962435119214164e-05, "loss": 1.3983, "step": 158 }, { "epoch": 0.12034058656575213, "grad_norm": 4.614277362823486, "learning_rate": 1.9961736698755928e-05, "loss": 1.412, "step": 159 }, { "epoch": 0.12109744560075686, "grad_norm": 4.18186092376709, "learning_rate": 1.9961031857758718e-05, "loss": 1.3653, "step": 160 }, { "epoch": 0.12185430463576159, "grad_norm": 4.011139392852783, "learning_rate": 1.9960320596676866e-05, "loss": 1.4234, "step": 161 }, { "epoch": 0.12261116367076633, "grad_norm": 4.428970813751221, "learning_rate": 1.9959602915968842e-05, "loss": 1.3899, "step": 162 }, { "epoch": 0.12336802270577105, "grad_norm": 4.968282222747803, "learning_rate": 1.995887881609725e-05, "loss": 1.4235, "step": 163 }, { "epoch": 0.12412488174077578, "grad_norm": 4.600246906280518, "learning_rate": 1.9958148297528833e-05, "loss": 1.3656, "step": 164 }, { "epoch": 0.12488174077578051, "grad_norm": 4.392306804656982, "learning_rate": 1.9957411360734476e-05, "loss": 1.3804, "step": 165 }, { "epoch": 0.12563859981078523, "grad_norm": 4.066370964050293, "learning_rate": 1.995666800618919e-05, "loss": 1.4013, "step": 166 }, { "epoch": 0.12639545884578998, "grad_norm": 3.9358901977539062, "learning_rate": 1.995591823437214e-05, "loss": 1.3887, "step": 167 }, { "epoch": 0.1271523178807947, "grad_norm": 4.182436466217041, "learning_rate": 1.9955162045766607e-05, "loss": 1.4011, "step": 168 }, { "epoch": 0.12790917691579942, "grad_norm": 3.8715391159057617, "learning_rate": 1.9954399440860026e-05, "loss": 1.3881, "step": 169 }, { "epoch": 0.12866603595080417, "grad_norm": 4.34489107131958, "learning_rate": 1.9953630420143958e-05, "loss": 1.3679, "step": 170 }, { "epoch": 0.1294228949858089, "grad_norm": 4.175931930541992, "learning_rate": 1.9952854984114097e-05, "loss": 1.3995, "step": 171 }, { "epoch": 0.13017975402081364, "grad_norm": 6.231164932250977, "learning_rate": 1.9952073133270288e-05, "loss": 1.3826, "step": 172 }, { "epoch": 0.13093661305581836, "grad_norm": 3.969299554824829, "learning_rate": 1.9951284868116495e-05, "loss": 1.427, "step": 173 }, { "epoch": 0.13169347209082308, "grad_norm": 5.056988716125488, "learning_rate": 1.9950490189160818e-05, "loss": 1.4377, "step": 174 }, { "epoch": 0.13245033112582782, "grad_norm": 3.9641916751861572, "learning_rate": 1.99496890969155e-05, "loss": 1.3729, "step": 175 }, { "epoch": 0.13320719016083254, "grad_norm": 4.119785308837891, "learning_rate": 1.9948881591896913e-05, "loss": 1.4061, "step": 176 }, { "epoch": 0.13396404919583726, "grad_norm": 4.154798984527588, "learning_rate": 1.9948067674625557e-05, "loss": 1.4383, "step": 177 }, { "epoch": 0.134720908230842, "grad_norm": 4.396413326263428, "learning_rate": 1.994724734562607e-05, "loss": 1.3806, "step": 178 }, { "epoch": 0.13547776726584673, "grad_norm": 10.802559852600098, "learning_rate": 1.9946420605427235e-05, "loss": 1.4279, "step": 179 }, { "epoch": 0.13623462630085148, "grad_norm": 4.602297782897949, "learning_rate": 1.9945587454561944e-05, "loss": 1.3618, "step": 180 }, { "epoch": 0.1369914853358562, "grad_norm": 4.874974727630615, "learning_rate": 1.994474789356724e-05, "loss": 1.3582, "step": 181 }, { "epoch": 0.13774834437086092, "grad_norm": 5.023828983306885, "learning_rate": 1.994390192298429e-05, "loss": 1.3445, "step": 182 }, { "epoch": 0.13850520340586567, "grad_norm": 4.938666343688965, "learning_rate": 1.994304954335839e-05, "loss": 1.4221, "step": 183 }, { "epoch": 0.1392620624408704, "grad_norm": 5.975377559661865, "learning_rate": 1.9942190755238973e-05, "loss": 1.3947, "step": 184 }, { "epoch": 0.1400189214758751, "grad_norm": 8.078311920166016, "learning_rate": 1.9941325559179608e-05, "loss": 1.3925, "step": 185 }, { "epoch": 0.14077578051087986, "grad_norm": 5.0124897956848145, "learning_rate": 1.9940453955737976e-05, "loss": 1.3958, "step": 186 }, { "epoch": 0.14153263954588458, "grad_norm": 4.94537353515625, "learning_rate": 1.9939575945475905e-05, "loss": 1.3855, "step": 187 }, { "epoch": 0.14228949858088932, "grad_norm": 5.828818321228027, "learning_rate": 1.9938691528959348e-05, "loss": 1.4567, "step": 188 }, { "epoch": 0.14304635761589404, "grad_norm": 4.672356605529785, "learning_rate": 1.993780070675838e-05, "loss": 1.3581, "step": 189 }, { "epoch": 0.14380321665089876, "grad_norm": 5.052429676055908, "learning_rate": 1.993690347944722e-05, "loss": 1.3874, "step": 190 }, { "epoch": 0.1445600756859035, "grad_norm": 4.454349040985107, "learning_rate": 1.9935999847604204e-05, "loss": 1.4282, "step": 191 }, { "epoch": 0.14531693472090823, "grad_norm": 4.81812858581543, "learning_rate": 1.9935089811811794e-05, "loss": 1.4103, "step": 192 }, { "epoch": 0.14607379375591295, "grad_norm": 3.8706412315368652, "learning_rate": 1.993417337265659e-05, "loss": 1.4024, "step": 193 }, { "epoch": 0.1468306527909177, "grad_norm": 3.948594093322754, "learning_rate": 1.9933250530729314e-05, "loss": 1.387, "step": 194 }, { "epoch": 0.14758751182592242, "grad_norm": 4.73719596862793, "learning_rate": 1.993232128662482e-05, "loss": 1.4528, "step": 195 }, { "epoch": 0.14834437086092717, "grad_norm": 3.9017584323883057, "learning_rate": 1.993138564094208e-05, "loss": 1.4245, "step": 196 }, { "epoch": 0.14910122989593189, "grad_norm": 6.6446309089660645, "learning_rate": 1.9930443594284193e-05, "loss": 1.4046, "step": 197 }, { "epoch": 0.1498580889309366, "grad_norm": 4.191623210906982, "learning_rate": 1.9929495147258395e-05, "loss": 1.3987, "step": 198 }, { "epoch": 0.15061494796594135, "grad_norm": 3.8362607955932617, "learning_rate": 1.992854030047604e-05, "loss": 1.3583, "step": 199 }, { "epoch": 0.15137180700094607, "grad_norm": 4.051894187927246, "learning_rate": 1.9927579054552603e-05, "loss": 1.3856, "step": 200 }, { "epoch": 0.1521286660359508, "grad_norm": 3.792412281036377, "learning_rate": 1.992661141010769e-05, "loss": 1.3961, "step": 201 }, { "epoch": 0.15288552507095554, "grad_norm": 3.697641134262085, "learning_rate": 1.992563736776503e-05, "loss": 1.3808, "step": 202 }, { "epoch": 0.15364238410596026, "grad_norm": 4.134721279144287, "learning_rate": 1.992465692815248e-05, "loss": 1.3594, "step": 203 }, { "epoch": 0.154399243140965, "grad_norm": 4.171304225921631, "learning_rate": 1.9923670091902013e-05, "loss": 1.4217, "step": 204 }, { "epoch": 0.15515610217596973, "grad_norm": 3.476039171218872, "learning_rate": 1.992267685964973e-05, "loss": 1.3967, "step": 205 }, { "epoch": 0.15591296121097445, "grad_norm": 3.4347240924835205, "learning_rate": 1.9921677232035846e-05, "loss": 1.3422, "step": 206 }, { "epoch": 0.1566698202459792, "grad_norm": 3.7200000286102295, "learning_rate": 1.992067120970472e-05, "loss": 1.3538, "step": 207 }, { "epoch": 0.15742667928098392, "grad_norm": 3.8184263706207275, "learning_rate": 1.9919658793304804e-05, "loss": 1.3956, "step": 208 }, { "epoch": 0.15818353831598864, "grad_norm": 3.761478900909424, "learning_rate": 1.9918639983488694e-05, "loss": 1.4233, "step": 209 }, { "epoch": 0.15894039735099338, "grad_norm": 3.587502956390381, "learning_rate": 1.99176147809131e-05, "loss": 1.3514, "step": 210 }, { "epoch": 0.1596972563859981, "grad_norm": 3.3828699588775635, "learning_rate": 1.9916583186238847e-05, "loss": 1.3766, "step": 211 }, { "epoch": 0.16045411542100285, "grad_norm": 3.2444939613342285, "learning_rate": 1.9915545200130893e-05, "loss": 1.4051, "step": 212 }, { "epoch": 0.16121097445600757, "grad_norm": 3.4360880851745605, "learning_rate": 1.9914500823258298e-05, "loss": 1.3364, "step": 213 }, { "epoch": 0.1619678334910123, "grad_norm": 3.3002805709838867, "learning_rate": 1.9913450056294255e-05, "loss": 1.3807, "step": 214 }, { "epoch": 0.16272469252601704, "grad_norm": 3.551203489303589, "learning_rate": 1.991239289991608e-05, "loss": 1.4077, "step": 215 }, { "epoch": 0.16348155156102176, "grad_norm": 2.9857335090637207, "learning_rate": 1.991132935480519e-05, "loss": 1.3667, "step": 216 }, { "epoch": 0.16423841059602648, "grad_norm": 3.935084342956543, "learning_rate": 1.9910259421647136e-05, "loss": 1.3973, "step": 217 }, { "epoch": 0.16499526963103123, "grad_norm": 3.209479570388794, "learning_rate": 1.9909183101131576e-05, "loss": 1.3752, "step": 218 }, { "epoch": 0.16575212866603595, "grad_norm": 3.311500072479248, "learning_rate": 1.9908100393952293e-05, "loss": 1.3566, "step": 219 }, { "epoch": 0.1665089877010407, "grad_norm": 3.0751259326934814, "learning_rate": 1.990701130080718e-05, "loss": 1.411, "step": 220 }, { "epoch": 0.16726584673604541, "grad_norm": 3.3133180141448975, "learning_rate": 1.9905915822398257e-05, "loss": 1.4006, "step": 221 }, { "epoch": 0.16802270577105013, "grad_norm": 3.2017252445220947, "learning_rate": 1.9904813959431646e-05, "loss": 1.4028, "step": 222 }, { "epoch": 0.16877956480605488, "grad_norm": 3.404691219329834, "learning_rate": 1.9903705712617595e-05, "loss": 1.355, "step": 223 }, { "epoch": 0.1695364238410596, "grad_norm": 3.1049623489379883, "learning_rate": 1.990259108267046e-05, "loss": 1.3305, "step": 224 }, { "epoch": 0.17029328287606432, "grad_norm": 3.3933444023132324, "learning_rate": 1.990147007030871e-05, "loss": 1.3718, "step": 225 }, { "epoch": 0.17105014191106907, "grad_norm": 3.479591131210327, "learning_rate": 1.9900342676254945e-05, "loss": 1.393, "step": 226 }, { "epoch": 0.1718070009460738, "grad_norm": 3.3810219764709473, "learning_rate": 1.989920890123586e-05, "loss": 1.3864, "step": 227 }, { "epoch": 0.17256385998107854, "grad_norm": 3.4179928302764893, "learning_rate": 1.9898068745982263e-05, "loss": 1.3322, "step": 228 }, { "epoch": 0.17332071901608326, "grad_norm": 3.288922071456909, "learning_rate": 1.9896922211229088e-05, "loss": 1.3738, "step": 229 }, { "epoch": 0.17407757805108798, "grad_norm": 3.4045164585113525, "learning_rate": 1.9895769297715373e-05, "loss": 1.3509, "step": 230 }, { "epoch": 0.17483443708609273, "grad_norm": 3.384779453277588, "learning_rate": 1.9894610006184264e-05, "loss": 1.3596, "step": 231 }, { "epoch": 0.17559129612109745, "grad_norm": 3.6631815433502197, "learning_rate": 1.989344433738303e-05, "loss": 1.4126, "step": 232 }, { "epoch": 0.17634815515610217, "grad_norm": 3.1958444118499756, "learning_rate": 1.9892272292063034e-05, "loss": 1.3711, "step": 233 }, { "epoch": 0.1771050141911069, "grad_norm": 3.4087891578674316, "learning_rate": 1.989109387097977e-05, "loss": 1.3604, "step": 234 }, { "epoch": 0.17786187322611163, "grad_norm": 3.5950968265533447, "learning_rate": 1.988990907489282e-05, "loss": 1.3464, "step": 235 }, { "epoch": 0.17861873226111638, "grad_norm": 3.5223278999328613, "learning_rate": 1.988871790456589e-05, "loss": 1.3965, "step": 236 }, { "epoch": 0.1793755912961211, "grad_norm": 3.259669780731201, "learning_rate": 1.988752036076679e-05, "loss": 1.3915, "step": 237 }, { "epoch": 0.18013245033112582, "grad_norm": 3.1698622703552246, "learning_rate": 1.9886316444267436e-05, "loss": 1.3674, "step": 238 }, { "epoch": 0.18088930936613057, "grad_norm": 3.336416482925415, "learning_rate": 1.9885106155843857e-05, "loss": 1.3476, "step": 239 }, { "epoch": 0.1816461684011353, "grad_norm": 3.2986626625061035, "learning_rate": 1.9883889496276188e-05, "loss": 1.3139, "step": 240 }, { "epoch": 0.18240302743614, "grad_norm": 3.2197721004486084, "learning_rate": 1.9882666466348665e-05, "loss": 1.3611, "step": 241 }, { "epoch": 0.18315988647114476, "grad_norm": 3.120088815689087, "learning_rate": 1.988143706684964e-05, "loss": 1.364, "step": 242 }, { "epoch": 0.18391674550614948, "grad_norm": 2.9464049339294434, "learning_rate": 1.9880201298571558e-05, "loss": 1.3295, "step": 243 }, { "epoch": 0.18467360454115422, "grad_norm": 3.3369717597961426, "learning_rate": 1.9878959162310983e-05, "loss": 1.3669, "step": 244 }, { "epoch": 0.18543046357615894, "grad_norm": 2.900787353515625, "learning_rate": 1.987771065886857e-05, "loss": 1.3565, "step": 245 }, { "epoch": 0.18618732261116366, "grad_norm": 3.0211544036865234, "learning_rate": 1.9876455789049096e-05, "loss": 1.3882, "step": 246 }, { "epoch": 0.1869441816461684, "grad_norm": 3.1576292514801025, "learning_rate": 1.9875194553661415e-05, "loss": 1.3075, "step": 247 }, { "epoch": 0.18770104068117313, "grad_norm": 2.8850550651550293, "learning_rate": 1.9873926953518515e-05, "loss": 1.3665, "step": 248 }, { "epoch": 0.18845789971617785, "grad_norm": 3.188582420349121, "learning_rate": 1.9872652989437467e-05, "loss": 1.3555, "step": 249 }, { "epoch": 0.1892147587511826, "grad_norm": 4.313934803009033, "learning_rate": 1.9871372662239446e-05, "loss": 1.2937, "step": 250 }, { "epoch": 0.18997161778618732, "grad_norm": 3.0744991302490234, "learning_rate": 1.9870085972749733e-05, "loss": 1.3289, "step": 251 }, { "epoch": 0.19072847682119207, "grad_norm": 2.9217262268066406, "learning_rate": 1.986879292179771e-05, "loss": 1.33, "step": 252 }, { "epoch": 0.1914853358561968, "grad_norm": 3.089919328689575, "learning_rate": 1.986749351021686e-05, "loss": 1.3532, "step": 253 }, { "epoch": 0.1922421948912015, "grad_norm": 3.5609021186828613, "learning_rate": 1.9866187738844753e-05, "loss": 1.4002, "step": 254 }, { "epoch": 0.19299905392620625, "grad_norm": 3.0856025218963623, "learning_rate": 1.986487560852308e-05, "loss": 1.3691, "step": 255 }, { "epoch": 0.19375591296121097, "grad_norm": 2.679279327392578, "learning_rate": 1.986355712009762e-05, "loss": 1.3412, "step": 256 }, { "epoch": 0.1945127719962157, "grad_norm": 3.1083905696868896, "learning_rate": 1.9862232274418246e-05, "loss": 1.3023, "step": 257 }, { "epoch": 0.19526963103122044, "grad_norm": 2.726358413696289, "learning_rate": 1.9860901072338936e-05, "loss": 1.377, "step": 258 }, { "epoch": 0.19602649006622516, "grad_norm": 2.966639995574951, "learning_rate": 1.985956351471776e-05, "loss": 1.304, "step": 259 }, { "epoch": 0.1967833491012299, "grad_norm": 2.8776400089263916, "learning_rate": 1.9858219602416887e-05, "loss": 1.3481, "step": 260 }, { "epoch": 0.19754020813623463, "grad_norm": 3.0099427700042725, "learning_rate": 1.9856869336302588e-05, "loss": 1.4332, "step": 261 }, { "epoch": 0.19829706717123935, "grad_norm": 3.146959066390991, "learning_rate": 1.985551271724522e-05, "loss": 1.3372, "step": 262 }, { "epoch": 0.1990539262062441, "grad_norm": 3.076327323913574, "learning_rate": 1.9854149746119232e-05, "loss": 1.3258, "step": 263 }, { "epoch": 0.19981078524124882, "grad_norm": 2.8409347534179688, "learning_rate": 1.9852780423803187e-05, "loss": 1.2975, "step": 264 }, { "epoch": 0.20056764427625354, "grad_norm": 3.1386849880218506, "learning_rate": 1.9851404751179723e-05, "loss": 1.3395, "step": 265 }, { "epoch": 0.20132450331125828, "grad_norm": 3.104682445526123, "learning_rate": 1.9850022729135578e-05, "loss": 1.3667, "step": 266 }, { "epoch": 0.202081362346263, "grad_norm": 3.337529182434082, "learning_rate": 1.9848634358561584e-05, "loss": 1.3145, "step": 267 }, { "epoch": 0.20283822138126775, "grad_norm": 3.380446195602417, "learning_rate": 1.984723964035266e-05, "loss": 1.364, "step": 268 }, { "epoch": 0.20359508041627247, "grad_norm": 3.161867141723633, "learning_rate": 1.9845838575407824e-05, "loss": 1.3333, "step": 269 }, { "epoch": 0.2043519394512772, "grad_norm": 3.323434352874756, "learning_rate": 1.9844431164630178e-05, "loss": 1.3897, "step": 270 }, { "epoch": 0.20510879848628194, "grad_norm": 3.4208099842071533, "learning_rate": 1.984301740892692e-05, "loss": 1.333, "step": 271 }, { "epoch": 0.20586565752128666, "grad_norm": 3.178248643875122, "learning_rate": 1.984159730920933e-05, "loss": 1.3033, "step": 272 }, { "epoch": 0.20662251655629138, "grad_norm": 3.0145297050476074, "learning_rate": 1.9840170866392795e-05, "loss": 1.3055, "step": 273 }, { "epoch": 0.20737937559129613, "grad_norm": 3.6076059341430664, "learning_rate": 1.9838738081396764e-05, "loss": 1.3442, "step": 274 }, { "epoch": 0.20813623462630085, "grad_norm": 3.3622937202453613, "learning_rate": 1.9837298955144796e-05, "loss": 1.3666, "step": 275 }, { "epoch": 0.2088930936613056, "grad_norm": 3.782317876815796, "learning_rate": 1.9835853488564527e-05, "loss": 1.3791, "step": 276 }, { "epoch": 0.20964995269631032, "grad_norm": 3.1874301433563232, "learning_rate": 1.9834401682587688e-05, "loss": 1.3703, "step": 277 }, { "epoch": 0.21040681173131504, "grad_norm": 3.0065550804138184, "learning_rate": 1.9832943538150083e-05, "loss": 1.331, "step": 278 }, { "epoch": 0.21116367076631978, "grad_norm": 3.953733444213867, "learning_rate": 1.9831479056191618e-05, "loss": 1.3855, "step": 279 }, { "epoch": 0.2119205298013245, "grad_norm": 3.682438611984253, "learning_rate": 1.983000823765627e-05, "loss": 1.3605, "step": 280 }, { "epoch": 0.21267738883632922, "grad_norm": 3.57037615776062, "learning_rate": 1.9828531083492102e-05, "loss": 1.3048, "step": 281 }, { "epoch": 0.21343424787133397, "grad_norm": 3.4117233753204346, "learning_rate": 1.9827047594651275e-05, "loss": 1.3606, "step": 282 }, { "epoch": 0.2141911069063387, "grad_norm": 2.914785623550415, "learning_rate": 1.982555777209002e-05, "loss": 1.3596, "step": 283 }, { "epoch": 0.21494796594134344, "grad_norm": 3.271235942840576, "learning_rate": 1.9824061616768652e-05, "loss": 1.3208, "step": 284 }, { "epoch": 0.21570482497634816, "grad_norm": 3.3142642974853516, "learning_rate": 1.982255912965157e-05, "loss": 1.3574, "step": 285 }, { "epoch": 0.21646168401135288, "grad_norm": 3.752458095550537, "learning_rate": 1.9821050311707253e-05, "loss": 1.3818, "step": 286 }, { "epoch": 0.21721854304635763, "grad_norm": 3.1010730266571045, "learning_rate": 1.9819535163908266e-05, "loss": 1.2799, "step": 287 }, { "epoch": 0.21797540208136235, "grad_norm": 3.3089754581451416, "learning_rate": 1.9818013687231252e-05, "loss": 1.3719, "step": 288 }, { "epoch": 0.21873226111636707, "grad_norm": 3.800584316253662, "learning_rate": 1.9816485882656925e-05, "loss": 1.3458, "step": 289 }, { "epoch": 0.2194891201513718, "grad_norm": 3.5390021800994873, "learning_rate": 1.9814951751170087e-05, "loss": 1.3558, "step": 290 }, { "epoch": 0.22024597918637653, "grad_norm": 3.37929630279541, "learning_rate": 1.9813411293759618e-05, "loss": 1.3236, "step": 291 }, { "epoch": 0.22100283822138128, "grad_norm": 3.255699872970581, "learning_rate": 1.9811864511418467e-05, "loss": 1.3245, "step": 292 }, { "epoch": 0.221759697256386, "grad_norm": 3.8194658756256104, "learning_rate": 1.981031140514367e-05, "loss": 1.3381, "step": 293 }, { "epoch": 0.22251655629139072, "grad_norm": 3.8124804496765137, "learning_rate": 1.9808751975936344e-05, "loss": 1.3006, "step": 294 }, { "epoch": 0.22327341532639547, "grad_norm": 3.637120246887207, "learning_rate": 1.980718622480166e-05, "loss": 1.3411, "step": 295 }, { "epoch": 0.2240302743614002, "grad_norm": 3.8235883712768555, "learning_rate": 1.9805614152748887e-05, "loss": 1.3285, "step": 296 }, { "epoch": 0.2247871333964049, "grad_norm": 3.892608642578125, "learning_rate": 1.980403576079135e-05, "loss": 1.3015, "step": 297 }, { "epoch": 0.22554399243140966, "grad_norm": 3.9942359924316406, "learning_rate": 1.9802451049946468e-05, "loss": 1.3404, "step": 298 }, { "epoch": 0.22630085146641438, "grad_norm": 3.8982861042022705, "learning_rate": 1.9800860021235708e-05, "loss": 1.3194, "step": 299 }, { "epoch": 0.22705771050141912, "grad_norm": 4.402480125427246, "learning_rate": 1.979926267568463e-05, "loss": 1.3383, "step": 300 }, { "epoch": 0.22781456953642384, "grad_norm": 3.9718708992004395, "learning_rate": 1.979765901432286e-05, "loss": 1.301, "step": 301 }, { "epoch": 0.22857142857142856, "grad_norm": 3.8678481578826904, "learning_rate": 1.979604903818409e-05, "loss": 1.3269, "step": 302 }, { "epoch": 0.2293282876064333, "grad_norm": 3.352957010269165, "learning_rate": 1.979443274830609e-05, "loss": 1.3332, "step": 303 }, { "epoch": 0.23008514664143803, "grad_norm": 3.937535524368286, "learning_rate": 1.9792810145730696e-05, "loss": 1.3464, "step": 304 }, { "epoch": 0.23084200567644275, "grad_norm": 4.081162929534912, "learning_rate": 1.9791181231503804e-05, "loss": 1.327, "step": 305 }, { "epoch": 0.2315988647114475, "grad_norm": 3.5600180625915527, "learning_rate": 1.97895460066754e-05, "loss": 1.3536, "step": 306 }, { "epoch": 0.23235572374645222, "grad_norm": 3.9321706295013428, "learning_rate": 1.9787904472299512e-05, "loss": 1.2999, "step": 307 }, { "epoch": 0.23311258278145697, "grad_norm": 4.384609699249268, "learning_rate": 1.978625662943426e-05, "loss": 1.3461, "step": 308 }, { "epoch": 0.2338694418164617, "grad_norm": 4.421790599822998, "learning_rate": 1.978460247914181e-05, "loss": 1.3324, "step": 309 }, { "epoch": 0.2346263008514664, "grad_norm": 4.101651191711426, "learning_rate": 1.9782942022488404e-05, "loss": 1.2738, "step": 310 }, { "epoch": 0.23538315988647115, "grad_norm": 4.219285488128662, "learning_rate": 1.978127526054435e-05, "loss": 1.3519, "step": 311 }, { "epoch": 0.23614001892147587, "grad_norm": 3.5981838703155518, "learning_rate": 1.9779602194384014e-05, "loss": 1.3546, "step": 312 }, { "epoch": 0.2368968779564806, "grad_norm": 3.758359432220459, "learning_rate": 1.9777922825085835e-05, "loss": 1.3264, "step": 313 }, { "epoch": 0.23765373699148534, "grad_norm": 3.7645103931427, "learning_rate": 1.97762371537323e-05, "loss": 1.3135, "step": 314 }, { "epoch": 0.23841059602649006, "grad_norm": 3.3905699253082275, "learning_rate": 1.9774545181409973e-05, "loss": 1.2848, "step": 315 }, { "epoch": 0.2391674550614948, "grad_norm": 3.6732635498046875, "learning_rate": 1.9772846909209473e-05, "loss": 1.3487, "step": 316 }, { "epoch": 0.23992431409649953, "grad_norm": 3.8122737407684326, "learning_rate": 1.9771142338225476e-05, "loss": 1.333, "step": 317 }, { "epoch": 0.24068117313150425, "grad_norm": 4.025964260101318, "learning_rate": 1.9769431469556728e-05, "loss": 1.3431, "step": 318 }, { "epoch": 0.241438032166509, "grad_norm": 3.054323196411133, "learning_rate": 1.9767714304306024e-05, "loss": 1.3279, "step": 319 }, { "epoch": 0.24219489120151372, "grad_norm": 4.698709964752197, "learning_rate": 1.9765990843580227e-05, "loss": 1.3209, "step": 320 }, { "epoch": 0.24295175023651844, "grad_norm": 3.341327428817749, "learning_rate": 1.976426108849025e-05, "loss": 1.3424, "step": 321 }, { "epoch": 0.24370860927152319, "grad_norm": 3.9361190795898438, "learning_rate": 1.9762525040151074e-05, "loss": 1.3083, "step": 322 }, { "epoch": 0.2444654683065279, "grad_norm": 3.340085506439209, "learning_rate": 1.9760782699681716e-05, "loss": 1.3358, "step": 323 }, { "epoch": 0.24522232734153265, "grad_norm": 3.044618606567383, "learning_rate": 1.9759034068205273e-05, "loss": 1.3099, "step": 324 }, { "epoch": 0.24597918637653737, "grad_norm": 3.619760274887085, "learning_rate": 1.9757279146848883e-05, "loss": 1.3455, "step": 325 }, { "epoch": 0.2467360454115421, "grad_norm": 3.7121100425720215, "learning_rate": 1.975551793674374e-05, "loss": 1.3106, "step": 326 }, { "epoch": 0.24749290444654684, "grad_norm": 3.5931692123413086, "learning_rate": 1.9753750439025095e-05, "loss": 1.2905, "step": 327 }, { "epoch": 0.24824976348155156, "grad_norm": 3.603030204772949, "learning_rate": 1.975197665483225e-05, "loss": 1.3319, "step": 328 }, { "epoch": 0.24900662251655628, "grad_norm": 3.6277918815612793, "learning_rate": 1.9750196585308564e-05, "loss": 1.3393, "step": 329 }, { "epoch": 0.24976348155156103, "grad_norm": 3.5887362957000732, "learning_rate": 1.974841023160143e-05, "loss": 1.3866, "step": 330 }, { "epoch": 0.25052034058656575, "grad_norm": 3.4283299446105957, "learning_rate": 1.974661759486232e-05, "loss": 1.329, "step": 331 }, { "epoch": 0.25127719962157047, "grad_norm": 3.7355992794036865, "learning_rate": 1.9744818676246724e-05, "loss": 1.3129, "step": 332 }, { "epoch": 0.2520340586565752, "grad_norm": 3.726663589477539, "learning_rate": 1.974301347691421e-05, "loss": 1.3665, "step": 333 }, { "epoch": 0.25279091769157996, "grad_norm": 3.93129825592041, "learning_rate": 1.9741201998028377e-05, "loss": 1.3876, "step": 334 }, { "epoch": 0.2535477767265847, "grad_norm": 3.588931083679199, "learning_rate": 1.9739384240756873e-05, "loss": 1.3715, "step": 335 }, { "epoch": 0.2543046357615894, "grad_norm": 3.4406232833862305, "learning_rate": 1.9737560206271404e-05, "loss": 1.3013, "step": 336 }, { "epoch": 0.2550614947965941, "grad_norm": 3.481201171875, "learning_rate": 1.9735729895747714e-05, "loss": 1.3625, "step": 337 }, { "epoch": 0.25581835383159884, "grad_norm": 3.7452211380004883, "learning_rate": 1.973389331036559e-05, "loss": 1.3452, "step": 338 }, { "epoch": 0.2565752128666036, "grad_norm": 3.8469581604003906, "learning_rate": 1.973205045130887e-05, "loss": 1.3824, "step": 339 }, { "epoch": 0.25733207190160834, "grad_norm": 3.252890110015869, "learning_rate": 1.9730201319765423e-05, "loss": 1.311, "step": 340 }, { "epoch": 0.25808893093661306, "grad_norm": 3.9583048820495605, "learning_rate": 1.9728345916927187e-05, "loss": 1.3244, "step": 341 }, { "epoch": 0.2588457899716178, "grad_norm": 3.6613519191741943, "learning_rate": 1.9726484243990115e-05, "loss": 1.3539, "step": 342 }, { "epoch": 0.2596026490066225, "grad_norm": 3.4180917739868164, "learning_rate": 1.9724616302154218e-05, "loss": 1.3353, "step": 343 }, { "epoch": 0.2603595080416273, "grad_norm": 3.7470951080322266, "learning_rate": 1.9722742092623536e-05, "loss": 1.2864, "step": 344 }, { "epoch": 0.261116367076632, "grad_norm": 4.141618251800537, "learning_rate": 1.9720861616606165e-05, "loss": 1.3486, "step": 345 }, { "epoch": 0.2618732261116367, "grad_norm": 3.7161524295806885, "learning_rate": 1.9718974875314226e-05, "loss": 1.339, "step": 346 }, { "epoch": 0.26263008514664143, "grad_norm": 4.011509895324707, "learning_rate": 1.9717081869963887e-05, "loss": 1.4027, "step": 347 }, { "epoch": 0.26338694418164615, "grad_norm": 4.976902008056641, "learning_rate": 1.9715182601775348e-05, "loss": 1.3078, "step": 348 }, { "epoch": 0.2641438032166509, "grad_norm": 3.8435733318328857, "learning_rate": 1.9713277071972844e-05, "loss": 1.3013, "step": 349 }, { "epoch": 0.26490066225165565, "grad_norm": 3.3969762325286865, "learning_rate": 1.971136528178466e-05, "loss": 1.3078, "step": 350 }, { "epoch": 0.26565752128666037, "grad_norm": 4.123608112335205, "learning_rate": 1.9709447232443096e-05, "loss": 1.3476, "step": 351 }, { "epoch": 0.2664143803216651, "grad_norm": 3.974820137023926, "learning_rate": 1.9707522925184507e-05, "loss": 1.377, "step": 352 }, { "epoch": 0.2671712393566698, "grad_norm": 4.08565616607666, "learning_rate": 1.9705592361249267e-05, "loss": 1.3559, "step": 353 }, { "epoch": 0.26792809839167453, "grad_norm": 3.7338943481445312, "learning_rate": 1.970365554188179e-05, "loss": 1.2845, "step": 354 }, { "epoch": 0.2686849574266793, "grad_norm": 3.806567430496216, "learning_rate": 1.9701712468330518e-05, "loss": 1.4283, "step": 355 }, { "epoch": 0.269441816461684, "grad_norm": 3.4662294387817383, "learning_rate": 1.9699763141847928e-05, "loss": 1.3068, "step": 356 }, { "epoch": 0.27019867549668874, "grad_norm": 3.5118749141693115, "learning_rate": 1.9697807563690522e-05, "loss": 1.266, "step": 357 }, { "epoch": 0.27095553453169346, "grad_norm": 4.166219711303711, "learning_rate": 1.969584573511885e-05, "loss": 1.3355, "step": 358 }, { "epoch": 0.2717123935666982, "grad_norm": 3.828523635864258, "learning_rate": 1.969387765739746e-05, "loss": 1.2712, "step": 359 }, { "epoch": 0.27246925260170296, "grad_norm": 3.8785219192504883, "learning_rate": 1.969190333179495e-05, "loss": 1.2761, "step": 360 }, { "epoch": 0.2732261116367077, "grad_norm": 3.772268056869507, "learning_rate": 1.9689922759583947e-05, "loss": 1.372, "step": 361 }, { "epoch": 0.2739829706717124, "grad_norm": 3.7379493713378906, "learning_rate": 1.968793594204109e-05, "loss": 1.2843, "step": 362 }, { "epoch": 0.2747398297067171, "grad_norm": 4.294455051422119, "learning_rate": 1.9685942880447054e-05, "loss": 1.3069, "step": 363 }, { "epoch": 0.27549668874172184, "grad_norm": 4.1428728103637695, "learning_rate": 1.9683943576086536e-05, "loss": 1.366, "step": 364 }, { "epoch": 0.27625354777672656, "grad_norm": 3.9030814170837402, "learning_rate": 1.9681938030248257e-05, "loss": 1.342, "step": 365 }, { "epoch": 0.27701040681173134, "grad_norm": 4.4898681640625, "learning_rate": 1.967992624422496e-05, "loss": 1.2735, "step": 366 }, { "epoch": 0.27776726584673606, "grad_norm": 4.548799514770508, "learning_rate": 1.9677908219313414e-05, "loss": 1.3589, "step": 367 }, { "epoch": 0.2785241248817408, "grad_norm": 4.4808478355407715, "learning_rate": 1.9675883956814403e-05, "loss": 1.373, "step": 368 }, { "epoch": 0.2792809839167455, "grad_norm": 4.146103858947754, "learning_rate": 1.967385345803274e-05, "loss": 1.2748, "step": 369 }, { "epoch": 0.2800378429517502, "grad_norm": 5.006552696228027, "learning_rate": 1.9671816724277254e-05, "loss": 1.2852, "step": 370 }, { "epoch": 0.280794701986755, "grad_norm": 4.279321670532227, "learning_rate": 1.966977375686079e-05, "loss": 1.3634, "step": 371 }, { "epoch": 0.2815515610217597, "grad_norm": 5.318479537963867, "learning_rate": 1.9667724557100214e-05, "loss": 1.3184, "step": 372 }, { "epoch": 0.28230842005676443, "grad_norm": 4.354931354522705, "learning_rate": 1.966566912631641e-05, "loss": 1.3018, "step": 373 }, { "epoch": 0.28306527909176915, "grad_norm": 3.5126800537109375, "learning_rate": 1.9663607465834275e-05, "loss": 1.2811, "step": 374 }, { "epoch": 0.28382213812677387, "grad_norm": 4.875300407409668, "learning_rate": 1.9661539576982728e-05, "loss": 1.3238, "step": 375 }, { "epoch": 0.28457899716177865, "grad_norm": 4.699173450469971, "learning_rate": 1.9659465461094692e-05, "loss": 1.3223, "step": 376 }, { "epoch": 0.28533585619678337, "grad_norm": 3.6528842449188232, "learning_rate": 1.9657385119507118e-05, "loss": 1.292, "step": 377 }, { "epoch": 0.2860927152317881, "grad_norm": 3.849123239517212, "learning_rate": 1.965529855356096e-05, "loss": 1.3114, "step": 378 }, { "epoch": 0.2868495742667928, "grad_norm": 3.7049927711486816, "learning_rate": 1.9653205764601182e-05, "loss": 1.3314, "step": 379 }, { "epoch": 0.2876064333017975, "grad_norm": 4.335115909576416, "learning_rate": 1.9651106753976768e-05, "loss": 1.3719, "step": 380 }, { "epoch": 0.28836329233680225, "grad_norm": 4.870954990386963, "learning_rate": 1.964900152304071e-05, "loss": 1.3264, "step": 381 }, { "epoch": 0.289120151371807, "grad_norm": 4.583834648132324, "learning_rate": 1.9646890073150005e-05, "loss": 1.3743, "step": 382 }, { "epoch": 0.28987701040681174, "grad_norm": 3.795956611633301, "learning_rate": 1.964477240566566e-05, "loss": 1.2997, "step": 383 }, { "epoch": 0.29063386944181646, "grad_norm": 5.41873025894165, "learning_rate": 1.9642648521952695e-05, "loss": 1.3381, "step": 384 }, { "epoch": 0.2913907284768212, "grad_norm": 4.2772393226623535, "learning_rate": 1.9640518423380127e-05, "loss": 1.3322, "step": 385 }, { "epoch": 0.2921475875118259, "grad_norm": 10.241232872009277, "learning_rate": 1.9638382111320996e-05, "loss": 1.3249, "step": 386 }, { "epoch": 0.2929044465468307, "grad_norm": 3.4204752445220947, "learning_rate": 1.9636239587152323e-05, "loss": 1.3295, "step": 387 }, { "epoch": 0.2936613055818354, "grad_norm": 3.368516683578491, "learning_rate": 1.9634090852255154e-05, "loss": 1.3561, "step": 388 }, { "epoch": 0.2944181646168401, "grad_norm": 3.5226809978485107, "learning_rate": 1.9631935908014532e-05, "loss": 1.3146, "step": 389 }, { "epoch": 0.29517502365184484, "grad_norm": 3.446794271469116, "learning_rate": 1.9629774755819495e-05, "loss": 1.2973, "step": 390 }, { "epoch": 0.29593188268684956, "grad_norm": 3.176982879638672, "learning_rate": 1.9627607397063097e-05, "loss": 1.3233, "step": 391 }, { "epoch": 0.29668874172185433, "grad_norm": 3.0656180381774902, "learning_rate": 1.9625433833142376e-05, "loss": 1.3246, "step": 392 }, { "epoch": 0.29744560075685905, "grad_norm": 3.4723055362701416, "learning_rate": 1.9623254065458387e-05, "loss": 1.3461, "step": 393 }, { "epoch": 0.29820245979186377, "grad_norm": 3.2769827842712402, "learning_rate": 1.962106809541616e-05, "loss": 1.3242, "step": 394 }, { "epoch": 0.2989593188268685, "grad_norm": 2.8769099712371826, "learning_rate": 1.9618875924424756e-05, "loss": 1.2548, "step": 395 }, { "epoch": 0.2997161778618732, "grad_norm": 3.3671765327453613, "learning_rate": 1.9616677553897204e-05, "loss": 1.3241, "step": 396 }, { "epoch": 0.30047303689687793, "grad_norm": 3.104637384414673, "learning_rate": 1.9614472985250547e-05, "loss": 1.3121, "step": 397 }, { "epoch": 0.3012298959318827, "grad_norm": 3.6635613441467285, "learning_rate": 1.9612262219905807e-05, "loss": 1.3157, "step": 398 }, { "epoch": 0.3019867549668874, "grad_norm": 3.4978229999542236, "learning_rate": 1.9610045259288017e-05, "loss": 1.3566, "step": 399 }, { "epoch": 0.30274361400189215, "grad_norm": 3.084291458129883, "learning_rate": 1.9607822104826198e-05, "loss": 1.3157, "step": 400 }, { "epoch": 0.30350047303689687, "grad_norm": 2.742034673690796, "learning_rate": 1.9605592757953354e-05, "loss": 1.2778, "step": 401 }, { "epoch": 0.3042573320719016, "grad_norm": 2.89613938331604, "learning_rate": 1.960335722010649e-05, "loss": 1.3467, "step": 402 }, { "epoch": 0.30501419110690636, "grad_norm": 3.3919119834899902, "learning_rate": 1.9601115492726603e-05, "loss": 1.3264, "step": 403 }, { "epoch": 0.3057710501419111, "grad_norm": 2.5944290161132812, "learning_rate": 1.9598867577258672e-05, "loss": 1.3765, "step": 404 }, { "epoch": 0.3065279091769158, "grad_norm": 2.6674866676330566, "learning_rate": 1.9596613475151674e-05, "loss": 1.3077, "step": 405 }, { "epoch": 0.3072847682119205, "grad_norm": 2.9125916957855225, "learning_rate": 1.9594353187858567e-05, "loss": 1.3472, "step": 406 }, { "epoch": 0.30804162724692524, "grad_norm": 2.746316909790039, "learning_rate": 1.9592086716836292e-05, "loss": 1.3137, "step": 407 }, { "epoch": 0.30879848628193, "grad_norm": 3.5916221141815186, "learning_rate": 1.958981406354579e-05, "loss": 1.3181, "step": 408 }, { "epoch": 0.30955534531693474, "grad_norm": 2.9677999019622803, "learning_rate": 1.9587535229451973e-05, "loss": 1.3094, "step": 409 }, { "epoch": 0.31031220435193946, "grad_norm": 2.766179084777832, "learning_rate": 1.9585250216023746e-05, "loss": 1.3205, "step": 410 }, { "epoch": 0.3110690633869442, "grad_norm": 3.019426107406616, "learning_rate": 1.9582959024733992e-05, "loss": 1.3053, "step": 411 }, { "epoch": 0.3118259224219489, "grad_norm": 3.580401659011841, "learning_rate": 1.9580661657059582e-05, "loss": 1.3685, "step": 412 }, { "epoch": 0.3125827814569536, "grad_norm": 3.2559759616851807, "learning_rate": 1.957835811448136e-05, "loss": 1.2975, "step": 413 }, { "epoch": 0.3133396404919584, "grad_norm": 3.185425281524658, "learning_rate": 1.957604839848415e-05, "loss": 1.3391, "step": 414 }, { "epoch": 0.3140964995269631, "grad_norm": 3.2222900390625, "learning_rate": 1.9573732510556772e-05, "loss": 1.2233, "step": 415 }, { "epoch": 0.31485335856196783, "grad_norm": 3.3176467418670654, "learning_rate": 1.9571410452192003e-05, "loss": 1.32, "step": 416 }, { "epoch": 0.31561021759697255, "grad_norm": 2.996213912963867, "learning_rate": 1.9569082224886607e-05, "loss": 1.3158, "step": 417 }, { "epoch": 0.3163670766319773, "grad_norm": 2.757145881652832, "learning_rate": 1.9566747830141327e-05, "loss": 1.2747, "step": 418 }, { "epoch": 0.31712393566698205, "grad_norm": 3.0630686283111572, "learning_rate": 1.9564407269460873e-05, "loss": 1.2863, "step": 419 }, { "epoch": 0.31788079470198677, "grad_norm": 2.979710102081299, "learning_rate": 1.956206054435394e-05, "loss": 1.3017, "step": 420 }, { "epoch": 0.3186376537369915, "grad_norm": 2.9305684566497803, "learning_rate": 1.955970765633319e-05, "loss": 1.2655, "step": 421 }, { "epoch": 0.3193945127719962, "grad_norm": 3.2490427494049072, "learning_rate": 1.955734860691526e-05, "loss": 1.312, "step": 422 }, { "epoch": 0.32015137180700093, "grad_norm": 2.647688388824463, "learning_rate": 1.9554983397620754e-05, "loss": 1.3009, "step": 423 }, { "epoch": 0.3209082308420057, "grad_norm": 2.8015365600585938, "learning_rate": 1.9552612029974246e-05, "loss": 1.3069, "step": 424 }, { "epoch": 0.3216650898770104, "grad_norm": 2.606043577194214, "learning_rate": 1.9550234505504294e-05, "loss": 1.2951, "step": 425 }, { "epoch": 0.32242194891201514, "grad_norm": 2.9746274948120117, "learning_rate": 1.9547850825743407e-05, "loss": 1.2736, "step": 426 }, { "epoch": 0.32317880794701986, "grad_norm": 3.0589208602905273, "learning_rate": 1.9545460992228074e-05, "loss": 1.3242, "step": 427 }, { "epoch": 0.3239356669820246, "grad_norm": 3.041224956512451, "learning_rate": 1.954306500649874e-05, "loss": 1.3397, "step": 428 }, { "epoch": 0.3246925260170293, "grad_norm": 2.700326681137085, "learning_rate": 1.954066287009982e-05, "loss": 1.2822, "step": 429 }, { "epoch": 0.3254493850520341, "grad_norm": 2.7489256858825684, "learning_rate": 1.95382545845797e-05, "loss": 1.3056, "step": 430 }, { "epoch": 0.3262062440870388, "grad_norm": 3.0966339111328125, "learning_rate": 1.953584015149072e-05, "loss": 1.3316, "step": 431 }, { "epoch": 0.3269631031220435, "grad_norm": 2.661102533340454, "learning_rate": 1.9533419572389186e-05, "loss": 1.3017, "step": 432 }, { "epoch": 0.32771996215704824, "grad_norm": 3.1965274810791016, "learning_rate": 1.9530992848835367e-05, "loss": 1.2975, "step": 433 }, { "epoch": 0.32847682119205296, "grad_norm": 3.0282115936279297, "learning_rate": 1.9528559982393497e-05, "loss": 1.3261, "step": 434 }, { "epoch": 0.32923368022705773, "grad_norm": 2.794201374053955, "learning_rate": 1.9526120974631763e-05, "loss": 1.3363, "step": 435 }, { "epoch": 0.32999053926206245, "grad_norm": 2.8009607791900635, "learning_rate": 1.9523675827122305e-05, "loss": 1.2738, "step": 436 }, { "epoch": 0.3307473982970672, "grad_norm": 3.1605050563812256, "learning_rate": 1.952122454144123e-05, "loss": 1.3242, "step": 437 }, { "epoch": 0.3315042573320719, "grad_norm": 2.7758185863494873, "learning_rate": 1.9518767119168608e-05, "loss": 1.2546, "step": 438 }, { "epoch": 0.3322611163670766, "grad_norm": 3.3435556888580322, "learning_rate": 1.9516303561888446e-05, "loss": 1.2966, "step": 439 }, { "epoch": 0.3330179754020814, "grad_norm": 3.4312620162963867, "learning_rate": 1.9513833871188724e-05, "loss": 1.328, "step": 440 }, { "epoch": 0.3337748344370861, "grad_norm": 3.4291491508483887, "learning_rate": 1.951135804866136e-05, "loss": 1.2927, "step": 441 }, { "epoch": 0.33453169347209083, "grad_norm": 2.797574281692505, "learning_rate": 1.9508876095902236e-05, "loss": 1.3218, "step": 442 }, { "epoch": 0.33528855250709555, "grad_norm": 3.1859307289123535, "learning_rate": 1.9506388014511176e-05, "loss": 1.2827, "step": 443 }, { "epoch": 0.33604541154210027, "grad_norm": 3.4026360511779785, "learning_rate": 1.950389380609196e-05, "loss": 1.2879, "step": 444 }, { "epoch": 0.336802270577105, "grad_norm": 3.2964580059051514, "learning_rate": 1.9501393472252324e-05, "loss": 1.2976, "step": 445 }, { "epoch": 0.33755912961210977, "grad_norm": 3.227969169616699, "learning_rate": 1.9498887014603937e-05, "loss": 1.3191, "step": 446 }, { "epoch": 0.3383159886471145, "grad_norm": 4.118795871734619, "learning_rate": 1.949637443476243e-05, "loss": 1.3112, "step": 447 }, { "epoch": 0.3390728476821192, "grad_norm": 3.7260451316833496, "learning_rate": 1.9493855734347367e-05, "loss": 1.2836, "step": 448 }, { "epoch": 0.3398297067171239, "grad_norm": 3.0048820972442627, "learning_rate": 1.9491330914982265e-05, "loss": 1.3106, "step": 449 }, { "epoch": 0.34058656575212864, "grad_norm": 2.7483198642730713, "learning_rate": 1.9488799978294586e-05, "loss": 1.338, "step": 450 }, { "epoch": 0.3413434247871334, "grad_norm": 3.021895170211792, "learning_rate": 1.9486262925915736e-05, "loss": 1.2931, "step": 451 }, { "epoch": 0.34210028382213814, "grad_norm": 2.793663740158081, "learning_rate": 1.948371975948106e-05, "loss": 1.2895, "step": 452 }, { "epoch": 0.34285714285714286, "grad_norm": 3.6046817302703857, "learning_rate": 1.9481170480629835e-05, "loss": 1.326, "step": 453 }, { "epoch": 0.3436140018921476, "grad_norm": 2.8959131240844727, "learning_rate": 1.9478615091005296e-05, "loss": 1.3018, "step": 454 }, { "epoch": 0.3443708609271523, "grad_norm": 2.869874954223633, "learning_rate": 1.9476053592254608e-05, "loss": 1.3181, "step": 455 }, { "epoch": 0.3451277199621571, "grad_norm": 2.9448678493499756, "learning_rate": 1.947348598602887e-05, "loss": 1.2992, "step": 456 }, { "epoch": 0.3458845789971618, "grad_norm": 2.8842031955718994, "learning_rate": 1.9470912273983123e-05, "loss": 1.3297, "step": 457 }, { "epoch": 0.3466414380321665, "grad_norm": 3.329968214035034, "learning_rate": 1.946833245777635e-05, "loss": 1.3074, "step": 458 }, { "epoch": 0.34739829706717124, "grad_norm": 2.8565642833709717, "learning_rate": 1.9465746539071447e-05, "loss": 1.3204, "step": 459 }, { "epoch": 0.34815515610217596, "grad_norm": 3.0529487133026123, "learning_rate": 1.946315451953527e-05, "loss": 1.3249, "step": 460 }, { "epoch": 0.3489120151371807, "grad_norm": 2.988011360168457, "learning_rate": 1.946055640083859e-05, "loss": 1.2612, "step": 461 }, { "epoch": 0.34966887417218545, "grad_norm": 3.3266758918762207, "learning_rate": 1.945795218465611e-05, "loss": 1.3283, "step": 462 }, { "epoch": 0.35042573320719017, "grad_norm": 3.2849862575531006, "learning_rate": 1.945534187266648e-05, "loss": 1.3476, "step": 463 }, { "epoch": 0.3511825922421949, "grad_norm": 2.831113576889038, "learning_rate": 1.945272546655226e-05, "loss": 1.2726, "step": 464 }, { "epoch": 0.3519394512771996, "grad_norm": 3.232224464416504, "learning_rate": 1.9450102967999946e-05, "loss": 1.3362, "step": 465 }, { "epoch": 0.35269631031220433, "grad_norm": 3.704671621322632, "learning_rate": 1.944747437869996e-05, "loss": 1.3011, "step": 466 }, { "epoch": 0.3534531693472091, "grad_norm": 2.6540513038635254, "learning_rate": 1.944483970034665e-05, "loss": 1.3268, "step": 467 }, { "epoch": 0.3542100283822138, "grad_norm": 3.856849431991577, "learning_rate": 1.944219893463829e-05, "loss": 1.2762, "step": 468 }, { "epoch": 0.35496688741721855, "grad_norm": 2.809225082397461, "learning_rate": 1.943955208327708e-05, "loss": 1.2515, "step": 469 }, { "epoch": 0.35572374645222327, "grad_norm": 3.271754503250122, "learning_rate": 1.943689914796914e-05, "loss": 1.313, "step": 470 }, { "epoch": 0.356480605487228, "grad_norm": 2.872096061706543, "learning_rate": 1.9434240130424504e-05, "loss": 1.2762, "step": 471 }, { "epoch": 0.35723746452223276, "grad_norm": 2.9466817378997803, "learning_rate": 1.9431575032357147e-05, "loss": 1.3123, "step": 472 }, { "epoch": 0.3579943235572375, "grad_norm": 3.358745813369751, "learning_rate": 1.9428903855484938e-05, "loss": 1.2684, "step": 473 }, { "epoch": 0.3587511825922422, "grad_norm": 3.3290534019470215, "learning_rate": 1.9426226601529685e-05, "loss": 1.321, "step": 474 }, { "epoch": 0.3595080416272469, "grad_norm": 3.1677582263946533, "learning_rate": 1.9423543272217103e-05, "loss": 1.2994, "step": 475 }, { "epoch": 0.36026490066225164, "grad_norm": 3.891291618347168, "learning_rate": 1.9420853869276822e-05, "loss": 1.2783, "step": 476 }, { "epoch": 0.36102175969725636, "grad_norm": 3.3545546531677246, "learning_rate": 1.9418158394442395e-05, "loss": 1.2985, "step": 477 }, { "epoch": 0.36177861873226114, "grad_norm": 3.187551498413086, "learning_rate": 1.941545684945128e-05, "loss": 1.3401, "step": 478 }, { "epoch": 0.36253547776726586, "grad_norm": 3.063565969467163, "learning_rate": 1.9412749236044855e-05, "loss": 1.2574, "step": 479 }, { "epoch": 0.3632923368022706, "grad_norm": 3.0356266498565674, "learning_rate": 1.9410035555968403e-05, "loss": 1.2734, "step": 480 }, { "epoch": 0.3640491958372753, "grad_norm": 4.256435871124268, "learning_rate": 1.9407315810971123e-05, "loss": 1.2623, "step": 481 }, { "epoch": 0.36480605487228, "grad_norm": 3.298546075820923, "learning_rate": 1.9404590002806122e-05, "loss": 1.3079, "step": 482 }, { "epoch": 0.3655629139072848, "grad_norm": 3.06703782081604, "learning_rate": 1.9401858133230412e-05, "loss": 1.347, "step": 483 }, { "epoch": 0.3663197729422895, "grad_norm": 3.244100332260132, "learning_rate": 1.9399120204004917e-05, "loss": 1.298, "step": 484 }, { "epoch": 0.36707663197729423, "grad_norm": 2.7238996028900146, "learning_rate": 1.9396376216894462e-05, "loss": 1.2434, "step": 485 }, { "epoch": 0.36783349101229895, "grad_norm": 3.0345072746276855, "learning_rate": 1.939362617366778e-05, "loss": 1.3634, "step": 486 }, { "epoch": 0.36859035004730367, "grad_norm": 3.2847676277160645, "learning_rate": 1.9390870076097507e-05, "loss": 1.3037, "step": 487 }, { "epoch": 0.36934720908230845, "grad_norm": 2.8988196849823, "learning_rate": 1.9388107925960183e-05, "loss": 1.3137, "step": 488 }, { "epoch": 0.37010406811731317, "grad_norm": 3.4510462284088135, "learning_rate": 1.9385339725036244e-05, "loss": 1.3042, "step": 489 }, { "epoch": 0.3708609271523179, "grad_norm": 2.7349674701690674, "learning_rate": 1.938256547511003e-05, "loss": 1.3104, "step": 490 }, { "epoch": 0.3716177861873226, "grad_norm": 2.6827545166015625, "learning_rate": 1.9379785177969787e-05, "loss": 1.3312, "step": 491 }, { "epoch": 0.3723746452223273, "grad_norm": 2.871415138244629, "learning_rate": 1.937699883540765e-05, "loss": 1.2999, "step": 492 }, { "epoch": 0.37313150425733205, "grad_norm": 3.261521339416504, "learning_rate": 1.9374206449219646e-05, "loss": 1.3027, "step": 493 }, { "epoch": 0.3738883632923368, "grad_norm": 2.7188801765441895, "learning_rate": 1.9371408021205708e-05, "loss": 1.2688, "step": 494 }, { "epoch": 0.37464522232734154, "grad_norm": 2.762587070465088, "learning_rate": 1.936860355316967e-05, "loss": 1.316, "step": 495 }, { "epoch": 0.37540208136234626, "grad_norm": 3.2157773971557617, "learning_rate": 1.9365793046919233e-05, "loss": 1.2818, "step": 496 }, { "epoch": 0.376158940397351, "grad_norm": 2.720599889755249, "learning_rate": 1.9362976504266017e-05, "loss": 1.2767, "step": 497 }, { "epoch": 0.3769157994323557, "grad_norm": 2.728111982345581, "learning_rate": 1.936015392702552e-05, "loss": 1.3292, "step": 498 }, { "epoch": 0.3776726584673605, "grad_norm": 3.2255611419677734, "learning_rate": 1.9357325317017127e-05, "loss": 1.3165, "step": 499 }, { "epoch": 0.3784295175023652, "grad_norm": 3.559377431869507, "learning_rate": 1.935449067606413e-05, "loss": 1.3284, "step": 500 }, { "epoch": 0.3791863765373699, "grad_norm": 3.0584166049957275, "learning_rate": 1.935165000599368e-05, "loss": 1.2788, "step": 501 }, { "epoch": 0.37994323557237464, "grad_norm": 3.422832727432251, "learning_rate": 1.9348803308636836e-05, "loss": 1.3315, "step": 502 }, { "epoch": 0.38070009460737936, "grad_norm": 3.052250623703003, "learning_rate": 1.9345950585828543e-05, "loss": 1.2772, "step": 503 }, { "epoch": 0.38145695364238413, "grad_norm": 3.164451837539673, "learning_rate": 1.9343091839407608e-05, "loss": 1.2796, "step": 504 }, { "epoch": 0.38221381267738885, "grad_norm": 2.815291166305542, "learning_rate": 1.9340227071216747e-05, "loss": 1.2473, "step": 505 }, { "epoch": 0.3829706717123936, "grad_norm": 2.953880548477173, "learning_rate": 1.9337356283102543e-05, "loss": 1.299, "step": 506 }, { "epoch": 0.3837275307473983, "grad_norm": 2.8606905937194824, "learning_rate": 1.9334479476915462e-05, "loss": 1.3075, "step": 507 }, { "epoch": 0.384484389782403, "grad_norm": 2.8410329818725586, "learning_rate": 1.9331596654509848e-05, "loss": 1.3377, "step": 508 }, { "epoch": 0.38524124881740773, "grad_norm": 3.284508228302002, "learning_rate": 1.9328707817743923e-05, "loss": 1.2549, "step": 509 }, { "epoch": 0.3859981078524125, "grad_norm": 2.892754554748535, "learning_rate": 1.9325812968479793e-05, "loss": 1.292, "step": 510 }, { "epoch": 0.38675496688741723, "grad_norm": 3.1711387634277344, "learning_rate": 1.932291210858343e-05, "loss": 1.2258, "step": 511 }, { "epoch": 0.38751182592242195, "grad_norm": 2.6686558723449707, "learning_rate": 1.932000523992468e-05, "loss": 1.246, "step": 512 }, { "epoch": 0.38826868495742667, "grad_norm": 2.947592258453369, "learning_rate": 1.9317092364377273e-05, "loss": 1.2544, "step": 513 }, { "epoch": 0.3890255439924314, "grad_norm": 3.5232532024383545, "learning_rate": 1.93141734838188e-05, "loss": 1.3041, "step": 514 }, { "epoch": 0.38978240302743616, "grad_norm": 3.5411503314971924, "learning_rate": 1.931124860013073e-05, "loss": 1.2852, "step": 515 }, { "epoch": 0.3905392620624409, "grad_norm": 3.0337717533111572, "learning_rate": 1.93083177151984e-05, "loss": 1.2867, "step": 516 }, { "epoch": 0.3912961210974456, "grad_norm": 3.082791805267334, "learning_rate": 1.9305380830911002e-05, "loss": 1.2981, "step": 517 }, { "epoch": 0.3920529801324503, "grad_norm": 3.1100258827209473, "learning_rate": 1.9302437949161622e-05, "loss": 1.2645, "step": 518 }, { "epoch": 0.39280983916745504, "grad_norm": 3.3867480754852295, "learning_rate": 1.9299489071847185e-05, "loss": 1.3555, "step": 519 }, { "epoch": 0.3935666982024598, "grad_norm": 3.1625607013702393, "learning_rate": 1.9296534200868504e-05, "loss": 1.3111, "step": 520 }, { "epoch": 0.39432355723746454, "grad_norm": 4.334456443786621, "learning_rate": 1.929357333813023e-05, "loss": 1.3102, "step": 521 }, { "epoch": 0.39508041627246926, "grad_norm": 3.650447130203247, "learning_rate": 1.9290606485540903e-05, "loss": 1.3129, "step": 522 }, { "epoch": 0.395837275307474, "grad_norm": 3.8689398765563965, "learning_rate": 1.9287633645012898e-05, "loss": 1.2974, "step": 523 }, { "epoch": 0.3965941343424787, "grad_norm": 3.520089864730835, "learning_rate": 1.9284654818462474e-05, "loss": 1.291, "step": 524 }, { "epoch": 0.3973509933774834, "grad_norm": 4.220740795135498, "learning_rate": 1.9281670007809735e-05, "loss": 1.3039, "step": 525 }, { "epoch": 0.3981078524124882, "grad_norm": 3.871176242828369, "learning_rate": 1.9278679214978637e-05, "loss": 1.2682, "step": 526 }, { "epoch": 0.3988647114474929, "grad_norm": 3.2446093559265137, "learning_rate": 1.9275682441897007e-05, "loss": 1.2866, "step": 527 }, { "epoch": 0.39962157048249763, "grad_norm": 3.475529432296753, "learning_rate": 1.9272679690496517e-05, "loss": 1.344, "step": 528 }, { "epoch": 0.40037842951750235, "grad_norm": 3.29640531539917, "learning_rate": 1.9269670962712695e-05, "loss": 1.3257, "step": 529 }, { "epoch": 0.4011352885525071, "grad_norm": 3.43729305267334, "learning_rate": 1.9266656260484925e-05, "loss": 1.3504, "step": 530 }, { "epoch": 0.40189214758751185, "grad_norm": 3.6663601398468018, "learning_rate": 1.9263635585756424e-05, "loss": 1.2738, "step": 531 }, { "epoch": 0.40264900662251657, "grad_norm": 3.4716086387634277, "learning_rate": 1.9260608940474293e-05, "loss": 1.2997, "step": 532 }, { "epoch": 0.4034058656575213, "grad_norm": 3.0576701164245605, "learning_rate": 1.9257576326589448e-05, "loss": 1.2958, "step": 533 }, { "epoch": 0.404162724692526, "grad_norm": 3.7031450271606445, "learning_rate": 1.9254537746056664e-05, "loss": 1.2537, "step": 534 }, { "epoch": 0.40491958372753073, "grad_norm": 3.070580005645752, "learning_rate": 1.925149320083457e-05, "loss": 1.3362, "step": 535 }, { "epoch": 0.4056764427625355, "grad_norm": 3.241197347640991, "learning_rate": 1.9248442692885634e-05, "loss": 1.2984, "step": 536 }, { "epoch": 0.4064333017975402, "grad_norm": 2.7833101749420166, "learning_rate": 1.9245386224176162e-05, "loss": 1.2589, "step": 537 }, { "epoch": 0.40719016083254495, "grad_norm": 2.8053226470947266, "learning_rate": 1.9242323796676313e-05, "loss": 1.277, "step": 538 }, { "epoch": 0.40794701986754967, "grad_norm": 3.119124412536621, "learning_rate": 1.9239255412360075e-05, "loss": 1.2516, "step": 539 }, { "epoch": 0.4087038789025544, "grad_norm": 3.013762950897217, "learning_rate": 1.923618107320529e-05, "loss": 1.2988, "step": 540 }, { "epoch": 0.4094607379375591, "grad_norm": 2.8327529430389404, "learning_rate": 1.923310078119362e-05, "loss": 1.2596, "step": 541 }, { "epoch": 0.4102175969725639, "grad_norm": 2.7732462882995605, "learning_rate": 1.9230014538310575e-05, "loss": 1.2525, "step": 542 }, { "epoch": 0.4109744560075686, "grad_norm": 2.984377145767212, "learning_rate": 1.9226922346545513e-05, "loss": 1.2688, "step": 543 }, { "epoch": 0.4117313150425733, "grad_norm": 3.146101474761963, "learning_rate": 1.92238242078916e-05, "loss": 1.3291, "step": 544 }, { "epoch": 0.41248817407757804, "grad_norm": 2.911142587661743, "learning_rate": 1.9220720124345855e-05, "loss": 1.2372, "step": 545 }, { "epoch": 0.41324503311258276, "grad_norm": 3.006364345550537, "learning_rate": 1.921761009790912e-05, "loss": 1.2157, "step": 546 }, { "epoch": 0.41400189214758754, "grad_norm": 2.9054133892059326, "learning_rate": 1.9214494130586074e-05, "loss": 1.3591, "step": 547 }, { "epoch": 0.41475875118259226, "grad_norm": 2.9922358989715576, "learning_rate": 1.9211372224385222e-05, "loss": 1.3093, "step": 548 }, { "epoch": 0.415515610217597, "grad_norm": 2.6461005210876465, "learning_rate": 1.9208244381318892e-05, "loss": 1.2585, "step": 549 }, { "epoch": 0.4162724692526017, "grad_norm": 2.7143542766571045, "learning_rate": 1.9205110603403247e-05, "loss": 1.2594, "step": 550 }, { "epoch": 0.4170293282876064, "grad_norm": 2.9333744049072266, "learning_rate": 1.9201970892658273e-05, "loss": 1.3178, "step": 551 }, { "epoch": 0.4177861873226112, "grad_norm": 2.956841230392456, "learning_rate": 1.919882525110778e-05, "loss": 1.2745, "step": 552 }, { "epoch": 0.4185430463576159, "grad_norm": 3.0672903060913086, "learning_rate": 1.91956736807794e-05, "loss": 1.2648, "step": 553 }, { "epoch": 0.41929990539262063, "grad_norm": 2.7969796657562256, "learning_rate": 1.9192516183704587e-05, "loss": 1.3154, "step": 554 }, { "epoch": 0.42005676442762535, "grad_norm": 2.9009835720062256, "learning_rate": 1.9189352761918616e-05, "loss": 1.2412, "step": 555 }, { "epoch": 0.42081362346263007, "grad_norm": 2.8731672763824463, "learning_rate": 1.918618341746058e-05, "loss": 1.2811, "step": 556 }, { "epoch": 0.4215704824976348, "grad_norm": 2.7065563201904297, "learning_rate": 1.918300815237339e-05, "loss": 1.2895, "step": 557 }, { "epoch": 0.42232734153263957, "grad_norm": 2.670109748840332, "learning_rate": 1.9179826968703775e-05, "loss": 1.2809, "step": 558 }, { "epoch": 0.4230842005676443, "grad_norm": 2.9249067306518555, "learning_rate": 1.9176639868502273e-05, "loss": 1.3528, "step": 559 }, { "epoch": 0.423841059602649, "grad_norm": 2.733651638031006, "learning_rate": 1.917344685382325e-05, "loss": 1.2516, "step": 560 }, { "epoch": 0.4245979186376537, "grad_norm": 3.126077651977539, "learning_rate": 1.9170247926724863e-05, "loss": 1.3048, "step": 561 }, { "epoch": 0.42535477767265845, "grad_norm": 3.024705648422241, "learning_rate": 1.9167043089269096e-05, "loss": 1.2871, "step": 562 }, { "epoch": 0.4261116367076632, "grad_norm": 3.0809972286224365, "learning_rate": 1.916383234352174e-05, "loss": 1.2939, "step": 563 }, { "epoch": 0.42686849574266794, "grad_norm": 2.8006155490875244, "learning_rate": 1.9160615691552388e-05, "loss": 1.2681, "step": 564 }, { "epoch": 0.42762535477767266, "grad_norm": 3.146348714828491, "learning_rate": 1.915739313543445e-05, "loss": 1.299, "step": 565 }, { "epoch": 0.4283822138126774, "grad_norm": 2.707672119140625, "learning_rate": 1.915416467724514e-05, "loss": 1.305, "step": 566 }, { "epoch": 0.4291390728476821, "grad_norm": 3.0839362144470215, "learning_rate": 1.9150930319065465e-05, "loss": 1.2806, "step": 567 }, { "epoch": 0.4298959318826869, "grad_norm": 2.6987831592559814, "learning_rate": 1.9147690062980243e-05, "loss": 1.2449, "step": 568 }, { "epoch": 0.4306527909176916, "grad_norm": 3.5137927532196045, "learning_rate": 1.9144443911078098e-05, "loss": 1.2525, "step": 569 }, { "epoch": 0.4314096499526963, "grad_norm": 2.656526803970337, "learning_rate": 1.914119186545145e-05, "loss": 1.2801, "step": 570 }, { "epoch": 0.43216650898770104, "grad_norm": 2.7091798782348633, "learning_rate": 1.9137933928196514e-05, "loss": 1.2743, "step": 571 }, { "epoch": 0.43292336802270576, "grad_norm": 2.6860084533691406, "learning_rate": 1.913467010141331e-05, "loss": 1.2569, "step": 572 }, { "epoch": 0.4336802270577105, "grad_norm": 2.8987984657287598, "learning_rate": 1.9131400387205653e-05, "loss": 1.2411, "step": 573 }, { "epoch": 0.43443708609271525, "grad_norm": 2.579749584197998, "learning_rate": 1.9128124787681145e-05, "loss": 1.2344, "step": 574 }, { "epoch": 0.43519394512771997, "grad_norm": 2.835766553878784, "learning_rate": 1.912484330495119e-05, "loss": 1.2922, "step": 575 }, { "epoch": 0.4359508041627247, "grad_norm": 3.549691915512085, "learning_rate": 1.9121555941130986e-05, "loss": 1.2908, "step": 576 }, { "epoch": 0.4367076631977294, "grad_norm": 2.881730556488037, "learning_rate": 1.911826269833951e-05, "loss": 1.2787, "step": 577 }, { "epoch": 0.43746452223273413, "grad_norm": 2.881334066390991, "learning_rate": 1.9114963578699538e-05, "loss": 1.3111, "step": 578 }, { "epoch": 0.4382213812677389, "grad_norm": 2.941556453704834, "learning_rate": 1.911165858433764e-05, "loss": 1.2857, "step": 579 }, { "epoch": 0.4389782403027436, "grad_norm": 2.6916472911834717, "learning_rate": 1.9108347717384156e-05, "loss": 1.2512, "step": 580 }, { "epoch": 0.43973509933774835, "grad_norm": 3.0234310626983643, "learning_rate": 1.9105030979973223e-05, "loss": 1.2089, "step": 581 }, { "epoch": 0.44049195837275307, "grad_norm": 2.7675161361694336, "learning_rate": 1.9101708374242764e-05, "loss": 1.3253, "step": 582 }, { "epoch": 0.4412488174077578, "grad_norm": 2.746612310409546, "learning_rate": 1.909837990233447e-05, "loss": 1.2554, "step": 583 }, { "epoch": 0.44200567644276256, "grad_norm": 2.629913091659546, "learning_rate": 1.9095045566393834e-05, "loss": 1.3158, "step": 584 }, { "epoch": 0.4427625354777673, "grad_norm": 3.0382394790649414, "learning_rate": 1.909170536857011e-05, "loss": 1.3382, "step": 585 }, { "epoch": 0.443519394512772, "grad_norm": 3.1332645416259766, "learning_rate": 1.908835931101634e-05, "loss": 1.2561, "step": 586 }, { "epoch": 0.4442762535477767, "grad_norm": 2.91369891166687, "learning_rate": 1.9085007395889342e-05, "loss": 1.287, "step": 587 }, { "epoch": 0.44503311258278144, "grad_norm": 2.6690382957458496, "learning_rate": 1.9081649625349715e-05, "loss": 1.275, "step": 588 }, { "epoch": 0.44578997161778616, "grad_norm": 2.7576904296875, "learning_rate": 1.9078286001561822e-05, "loss": 1.2669, "step": 589 }, { "epoch": 0.44654683065279094, "grad_norm": 2.731320381164551, "learning_rate": 1.9074916526693804e-05, "loss": 1.292, "step": 590 }, { "epoch": 0.44730368968779566, "grad_norm": 2.6240909099578857, "learning_rate": 1.9071541202917572e-05, "loss": 1.2852, "step": 591 }, { "epoch": 0.4480605487228004, "grad_norm": 2.8189620971679688, "learning_rate": 1.906816003240881e-05, "loss": 1.2655, "step": 592 }, { "epoch": 0.4488174077578051, "grad_norm": 2.7323951721191406, "learning_rate": 1.906477301734697e-05, "loss": 1.2942, "step": 593 }, { "epoch": 0.4495742667928098, "grad_norm": 2.8606555461883545, "learning_rate": 1.9061380159915262e-05, "loss": 1.3039, "step": 594 }, { "epoch": 0.4503311258278146, "grad_norm": 2.7523887157440186, "learning_rate": 1.9057981462300683e-05, "loss": 1.2372, "step": 595 }, { "epoch": 0.4510879848628193, "grad_norm": 3.1251001358032227, "learning_rate": 1.9054576926693977e-05, "loss": 1.2726, "step": 596 }, { "epoch": 0.45184484389782403, "grad_norm": 3.1092488765716553, "learning_rate": 1.9051166555289652e-05, "loss": 1.3126, "step": 597 }, { "epoch": 0.45260170293282875, "grad_norm": 2.722238302230835, "learning_rate": 1.904775035028598e-05, "loss": 1.2765, "step": 598 }, { "epoch": 0.4533585619678335, "grad_norm": 3.9474592208862305, "learning_rate": 1.9044328313885e-05, "loss": 1.2389, "step": 599 }, { "epoch": 0.45411542100283825, "grad_norm": 2.7783472537994385, "learning_rate": 1.90409004482925e-05, "loss": 1.2683, "step": 600 }, { "epoch": 0.45487228003784297, "grad_norm": 2.7635014057159424, "learning_rate": 1.9037466755718038e-05, "loss": 1.3073, "step": 601 }, { "epoch": 0.4556291390728477, "grad_norm": 2.899637222290039, "learning_rate": 1.903402723837491e-05, "loss": 1.2682, "step": 602 }, { "epoch": 0.4563859981078524, "grad_norm": 2.5725064277648926, "learning_rate": 1.9030581898480182e-05, "loss": 1.2445, "step": 603 }, { "epoch": 0.45714285714285713, "grad_norm": 2.767765760421753, "learning_rate": 1.902713073825467e-05, "loss": 1.3006, "step": 604 }, { "epoch": 0.45789971617786185, "grad_norm": 2.7437305450439453, "learning_rate": 1.902367375992293e-05, "loss": 1.256, "step": 605 }, { "epoch": 0.4586565752128666, "grad_norm": 2.764497756958008, "learning_rate": 1.9020210965713287e-05, "loss": 1.2316, "step": 606 }, { "epoch": 0.45941343424787134, "grad_norm": 2.6510708332061768, "learning_rate": 1.9016742357857802e-05, "loss": 1.2413, "step": 607 }, { "epoch": 0.46017029328287606, "grad_norm": 2.727973699569702, "learning_rate": 1.9013267938592282e-05, "loss": 1.2779, "step": 608 }, { "epoch": 0.4609271523178808, "grad_norm": 2.7336103916168213, "learning_rate": 1.900978771015629e-05, "loss": 1.3133, "step": 609 }, { "epoch": 0.4616840113528855, "grad_norm": 2.635427713394165, "learning_rate": 1.9006301674793128e-05, "loss": 1.233, "step": 610 }, { "epoch": 0.4624408703878903, "grad_norm": 2.99351167678833, "learning_rate": 1.900280983474984e-05, "loss": 1.2353, "step": 611 }, { "epoch": 0.463197729422895, "grad_norm": 3.155054807662964, "learning_rate": 1.8999312192277217e-05, "loss": 1.3258, "step": 612 }, { "epoch": 0.4639545884578997, "grad_norm": 2.745626926422119, "learning_rate": 1.8995808749629773e-05, "loss": 1.2321, "step": 613 }, { "epoch": 0.46471144749290444, "grad_norm": 2.662928819656372, "learning_rate": 1.899229950906579e-05, "loss": 1.2291, "step": 614 }, { "epoch": 0.46546830652790916, "grad_norm": 2.684296131134033, "learning_rate": 1.8988784472847262e-05, "loss": 1.2575, "step": 615 }, { "epoch": 0.46622516556291393, "grad_norm": 2.850404977798462, "learning_rate": 1.8985263643239932e-05, "loss": 1.2727, "step": 616 }, { "epoch": 0.46698202459791865, "grad_norm": 2.8185768127441406, "learning_rate": 1.8981737022513268e-05, "loss": 1.2145, "step": 617 }, { "epoch": 0.4677388836329234, "grad_norm": 2.865675449371338, "learning_rate": 1.8978204612940476e-05, "loss": 1.2602, "step": 618 }, { "epoch": 0.4684957426679281, "grad_norm": 2.706779718399048, "learning_rate": 1.8974666416798496e-05, "loss": 1.2578, "step": 619 }, { "epoch": 0.4692526017029328, "grad_norm": 2.7865641117095947, "learning_rate": 1.8971122436368002e-05, "loss": 1.2549, "step": 620 }, { "epoch": 0.47000946073793753, "grad_norm": 3.0289227962493896, "learning_rate": 1.8967572673933373e-05, "loss": 1.2794, "step": 621 }, { "epoch": 0.4707663197729423, "grad_norm": 2.986976146697998, "learning_rate": 1.8964017131782748e-05, "loss": 1.2666, "step": 622 }, { "epoch": 0.47152317880794703, "grad_norm": 2.907590866088867, "learning_rate": 1.896045581220797e-05, "loss": 1.3149, "step": 623 }, { "epoch": 0.47228003784295175, "grad_norm": 2.5124711990356445, "learning_rate": 1.8956888717504607e-05, "loss": 1.2692, "step": 624 }, { "epoch": 0.47303689687795647, "grad_norm": 2.8450794219970703, "learning_rate": 1.8953315849971956e-05, "loss": 1.2385, "step": 625 }, { "epoch": 0.4737937559129612, "grad_norm": 3.127713441848755, "learning_rate": 1.8949737211913038e-05, "loss": 1.2725, "step": 626 }, { "epoch": 0.47455061494796597, "grad_norm": 3.0674550533294678, "learning_rate": 1.894615280563458e-05, "loss": 1.3016, "step": 627 }, { "epoch": 0.4753074739829707, "grad_norm": 3.29008150100708, "learning_rate": 1.894256263344704e-05, "loss": 1.2382, "step": 628 }, { "epoch": 0.4760643330179754, "grad_norm": 3.2081003189086914, "learning_rate": 1.8938966697664592e-05, "loss": 1.259, "step": 629 }, { "epoch": 0.4768211920529801, "grad_norm": 2.922011613845825, "learning_rate": 1.8935365000605116e-05, "loss": 1.3017, "step": 630 }, { "epoch": 0.47757805108798485, "grad_norm": 3.075958490371704, "learning_rate": 1.893175754459021e-05, "loss": 1.2595, "step": 631 }, { "epoch": 0.4783349101229896, "grad_norm": 2.9022579193115234, "learning_rate": 1.892814433194519e-05, "loss": 1.3033, "step": 632 }, { "epoch": 0.47909176915799434, "grad_norm": 2.9433717727661133, "learning_rate": 1.8924525364999077e-05, "loss": 1.2636, "step": 633 }, { "epoch": 0.47984862819299906, "grad_norm": 2.9550983905792236, "learning_rate": 1.89209006460846e-05, "loss": 1.2936, "step": 634 }, { "epoch": 0.4806054872280038, "grad_norm": 2.8603897094726562, "learning_rate": 1.8917270177538198e-05, "loss": 1.2497, "step": 635 }, { "epoch": 0.4813623462630085, "grad_norm": 3.0159318447113037, "learning_rate": 1.8913633961700014e-05, "loss": 1.2627, "step": 636 }, { "epoch": 0.4821192052980132, "grad_norm": 3.3943378925323486, "learning_rate": 1.8909992000913896e-05, "loss": 1.2977, "step": 637 }, { "epoch": 0.482876064333018, "grad_norm": 2.8387339115142822, "learning_rate": 1.8906344297527403e-05, "loss": 1.2922, "step": 638 }, { "epoch": 0.4836329233680227, "grad_norm": 2.8385610580444336, "learning_rate": 1.8902690853891787e-05, "loss": 1.2023, "step": 639 }, { "epoch": 0.48438978240302744, "grad_norm": 3.155811309814453, "learning_rate": 1.8899031672362e-05, "loss": 1.3069, "step": 640 }, { "epoch": 0.48514664143803216, "grad_norm": 3.442098617553711, "learning_rate": 1.8895366755296693e-05, "loss": 1.2361, "step": 641 }, { "epoch": 0.4859035004730369, "grad_norm": 2.805680751800537, "learning_rate": 1.8891696105058218e-05, "loss": 1.2349, "step": 642 }, { "epoch": 0.48666035950804165, "grad_norm": 2.7870709896087646, "learning_rate": 1.8888019724012618e-05, "loss": 1.3326, "step": 643 }, { "epoch": 0.48741721854304637, "grad_norm": 2.8645455837249756, "learning_rate": 1.8884337614529636e-05, "loss": 1.2829, "step": 644 }, { "epoch": 0.4881740775780511, "grad_norm": 2.8770759105682373, "learning_rate": 1.88806497789827e-05, "loss": 1.2268, "step": 645 }, { "epoch": 0.4889309366130558, "grad_norm": 2.8018059730529785, "learning_rate": 1.8876956219748934e-05, "loss": 1.2566, "step": 646 }, { "epoch": 0.48968779564806053, "grad_norm": 3.0624117851257324, "learning_rate": 1.887325693920915e-05, "loss": 1.2776, "step": 647 }, { "epoch": 0.4904446546830653, "grad_norm": 2.7411904335021973, "learning_rate": 1.886955193974785e-05, "loss": 1.2941, "step": 648 }, { "epoch": 0.49120151371807, "grad_norm": 2.4694104194641113, "learning_rate": 1.8865841223753216e-05, "loss": 1.245, "step": 649 }, { "epoch": 0.49195837275307475, "grad_norm": 2.4889931678771973, "learning_rate": 1.886212479361712e-05, "loss": 1.2664, "step": 650 }, { "epoch": 0.49271523178807947, "grad_norm": 2.699221134185791, "learning_rate": 1.885840265173512e-05, "loss": 1.245, "step": 651 }, { "epoch": 0.4934720908230842, "grad_norm": 3.0901527404785156, "learning_rate": 1.8854674800506447e-05, "loss": 1.2683, "step": 652 }, { "epoch": 0.4942289498580889, "grad_norm": 2.5710549354553223, "learning_rate": 1.8850941242334024e-05, "loss": 1.2677, "step": 653 }, { "epoch": 0.4949858088930937, "grad_norm": 2.747673988342285, "learning_rate": 1.8847201979624433e-05, "loss": 1.2487, "step": 654 }, { "epoch": 0.4957426679280984, "grad_norm": 2.6453075408935547, "learning_rate": 1.8843457014787954e-05, "loss": 1.2534, "step": 655 }, { "epoch": 0.4964995269631031, "grad_norm": 2.3280134201049805, "learning_rate": 1.8839706350238537e-05, "loss": 1.2529, "step": 656 }, { "epoch": 0.49725638599810784, "grad_norm": 2.353527307510376, "learning_rate": 1.88359499883938e-05, "loss": 1.2612, "step": 657 }, { "epoch": 0.49801324503311256, "grad_norm": 2.827341318130493, "learning_rate": 1.8832187931675036e-05, "loss": 1.2883, "step": 658 }, { "epoch": 0.49877010406811734, "grad_norm": 2.620957374572754, "learning_rate": 1.882842018250721e-05, "loss": 1.25, "step": 659 }, { "epoch": 0.49952696310312206, "grad_norm": 2.600372076034546, "learning_rate": 1.8824646743318955e-05, "loss": 1.2497, "step": 660 }, { "epoch": 0.5002838221381267, "grad_norm": 2.544832706451416, "learning_rate": 1.882086761654257e-05, "loss": 1.2656, "step": 661 }, { "epoch": 0.5010406811731315, "grad_norm": 2.809065818786621, "learning_rate": 1.881708280461403e-05, "loss": 1.3098, "step": 662 }, { "epoch": 0.5017975402081363, "grad_norm": 2.423124313354492, "learning_rate": 1.881329230997296e-05, "loss": 1.2676, "step": 663 }, { "epoch": 0.5025543992431409, "grad_norm": 2.6886796951293945, "learning_rate": 1.880949613506266e-05, "loss": 1.2764, "step": 664 }, { "epoch": 0.5033112582781457, "grad_norm": 2.9043877124786377, "learning_rate": 1.8805694282330076e-05, "loss": 1.2499, "step": 665 }, { "epoch": 0.5040681173131504, "grad_norm": 2.5381906032562256, "learning_rate": 1.880188675422584e-05, "loss": 1.2429, "step": 666 }, { "epoch": 0.5048249763481552, "grad_norm": 2.5368845462799072, "learning_rate": 1.8798073553204216e-05, "loss": 1.2992, "step": 667 }, { "epoch": 0.5055818353831599, "grad_norm": 2.313969850540161, "learning_rate": 1.879425468172314e-05, "loss": 1.2602, "step": 668 }, { "epoch": 0.5063386944181646, "grad_norm": 2.473052978515625, "learning_rate": 1.8790430142244192e-05, "loss": 1.2558, "step": 669 }, { "epoch": 0.5070955534531694, "grad_norm": 2.5860140323638916, "learning_rate": 1.878659993723262e-05, "loss": 1.2489, "step": 670 }, { "epoch": 0.507852412488174, "grad_norm": 2.7334864139556885, "learning_rate": 1.8782764069157307e-05, "loss": 1.2892, "step": 671 }, { "epoch": 0.5086092715231788, "grad_norm": 2.7741503715515137, "learning_rate": 1.8778922540490803e-05, "loss": 1.214, "step": 672 }, { "epoch": 0.5093661305581836, "grad_norm": 2.3246145248413086, "learning_rate": 1.8775075353709294e-05, "loss": 1.2301, "step": 673 }, { "epoch": 0.5101229895931882, "grad_norm": 2.879974365234375, "learning_rate": 1.8771222511292622e-05, "loss": 1.2351, "step": 674 }, { "epoch": 0.510879848628193, "grad_norm": 2.5754384994506836, "learning_rate": 1.8767364015724266e-05, "loss": 1.2701, "step": 675 }, { "epoch": 0.5116367076631977, "grad_norm": 2.623716115951538, "learning_rate": 1.8763499869491356e-05, "loss": 1.2934, "step": 676 }, { "epoch": 0.5123935666982025, "grad_norm": 2.6354804039001465, "learning_rate": 1.8759630075084664e-05, "loss": 1.2454, "step": 677 }, { "epoch": 0.5131504257332072, "grad_norm": 2.550604820251465, "learning_rate": 1.8755754634998593e-05, "loss": 1.2555, "step": 678 }, { "epoch": 0.5139072847682119, "grad_norm": 2.5519111156463623, "learning_rate": 1.8751873551731196e-05, "loss": 1.2384, "step": 679 }, { "epoch": 0.5146641438032167, "grad_norm": 2.6348938941955566, "learning_rate": 1.8747986827784167e-05, "loss": 1.2453, "step": 680 }, { "epoch": 0.5154210028382213, "grad_norm": 2.5110082626342773, "learning_rate": 1.874409446566282e-05, "loss": 1.3047, "step": 681 }, { "epoch": 0.5161778618732261, "grad_norm": 2.5216503143310547, "learning_rate": 1.8740196467876114e-05, "loss": 1.2464, "step": 682 }, { "epoch": 0.5169347209082309, "grad_norm": 2.737325668334961, "learning_rate": 1.8736292836936643e-05, "loss": 1.2666, "step": 683 }, { "epoch": 0.5176915799432356, "grad_norm": 2.625519037246704, "learning_rate": 1.8732383575360625e-05, "loss": 1.2403, "step": 684 }, { "epoch": 0.5184484389782403, "grad_norm": 2.784569263458252, "learning_rate": 1.8728468685667914e-05, "loss": 1.2627, "step": 685 }, { "epoch": 0.519205298013245, "grad_norm": 2.7349774837493896, "learning_rate": 1.8724548170381983e-05, "loss": 1.2771, "step": 686 }, { "epoch": 0.5199621570482498, "grad_norm": 2.681603193283081, "learning_rate": 1.8720622032029936e-05, "loss": 1.276, "step": 687 }, { "epoch": 0.5207190160832545, "grad_norm": 2.767359972000122, "learning_rate": 1.8716690273142504e-05, "loss": 1.2279, "step": 688 }, { "epoch": 0.5214758751182592, "grad_norm": 2.5928122997283936, "learning_rate": 1.871275289625404e-05, "loss": 1.2568, "step": 689 }, { "epoch": 0.522232734153264, "grad_norm": 2.6970558166503906, "learning_rate": 1.8708809903902517e-05, "loss": 1.3101, "step": 690 }, { "epoch": 0.5229895931882687, "grad_norm": 2.6737709045410156, "learning_rate": 1.8704861298629524e-05, "loss": 1.2575, "step": 691 }, { "epoch": 0.5237464522232734, "grad_norm": 3.0363659858703613, "learning_rate": 1.870090708298028e-05, "loss": 1.3034, "step": 692 }, { "epoch": 0.5245033112582781, "grad_norm": 2.817183017730713, "learning_rate": 1.8696947259503603e-05, "loss": 1.2962, "step": 693 }, { "epoch": 0.5252601702932829, "grad_norm": 3.507577896118164, "learning_rate": 1.8692981830751937e-05, "loss": 1.2643, "step": 694 }, { "epoch": 0.5260170293282876, "grad_norm": 2.9019994735717773, "learning_rate": 1.868901079928134e-05, "loss": 1.2968, "step": 695 }, { "epoch": 0.5267738883632923, "grad_norm": 2.6820502281188965, "learning_rate": 1.8685034167651477e-05, "loss": 1.281, "step": 696 }, { "epoch": 0.5275307473982971, "grad_norm": 2.5685501098632812, "learning_rate": 1.8681051938425626e-05, "loss": 1.2368, "step": 697 }, { "epoch": 0.5282876064333017, "grad_norm": 2.943498134613037, "learning_rate": 1.867706411417067e-05, "loss": 1.2494, "step": 698 }, { "epoch": 0.5290444654683065, "grad_norm": 2.9893808364868164, "learning_rate": 1.8673070697457097e-05, "loss": 1.3033, "step": 699 }, { "epoch": 0.5298013245033113, "grad_norm": 3.192913293838501, "learning_rate": 1.8669071690859002e-05, "loss": 1.3122, "step": 700 }, { "epoch": 0.530558183538316, "grad_norm": 2.6208715438842773, "learning_rate": 1.866506709695409e-05, "loss": 1.2335, "step": 701 }, { "epoch": 0.5313150425733207, "grad_norm": 2.793226718902588, "learning_rate": 1.8661056918323654e-05, "loss": 1.2721, "step": 702 }, { "epoch": 0.5320719016083254, "grad_norm": 2.809190034866333, "learning_rate": 1.8657041157552597e-05, "loss": 1.2318, "step": 703 }, { "epoch": 0.5328287606433302, "grad_norm": 2.70646595954895, "learning_rate": 1.865301981722942e-05, "loss": 1.2471, "step": 704 }, { "epoch": 0.533585619678335, "grad_norm": 2.691943407058716, "learning_rate": 1.864899289994621e-05, "loss": 1.2765, "step": 705 }, { "epoch": 0.5343424787133396, "grad_norm": 2.6376893520355225, "learning_rate": 1.864496040829867e-05, "loss": 1.2932, "step": 706 }, { "epoch": 0.5350993377483444, "grad_norm": 2.727936029434204, "learning_rate": 1.8640922344886066e-05, "loss": 1.2056, "step": 707 }, { "epoch": 0.5358561967833491, "grad_norm": 2.599090337753296, "learning_rate": 1.863687871231128e-05, "loss": 1.2747, "step": 708 }, { "epoch": 0.5366130558183538, "grad_norm": 2.9305431842803955, "learning_rate": 1.863282951318078e-05, "loss": 1.2593, "step": 709 }, { "epoch": 0.5373699148533586, "grad_norm": 2.5242085456848145, "learning_rate": 1.8628774750104615e-05, "loss": 1.2669, "step": 710 }, { "epoch": 0.5381267738883633, "grad_norm": 2.737729787826538, "learning_rate": 1.862471442569642e-05, "loss": 1.2515, "step": 711 }, { "epoch": 0.538883632923368, "grad_norm": 2.8515143394470215, "learning_rate": 1.8620648542573423e-05, "loss": 1.2483, "step": 712 }, { "epoch": 0.5396404919583727, "grad_norm": 2.8016417026519775, "learning_rate": 1.8616577103356425e-05, "loss": 1.2389, "step": 713 }, { "epoch": 0.5403973509933775, "grad_norm": 2.9451699256896973, "learning_rate": 1.861250011066982e-05, "loss": 1.2345, "step": 714 }, { "epoch": 0.5411542100283823, "grad_norm": 2.771279811859131, "learning_rate": 1.8608417567141572e-05, "loss": 1.2621, "step": 715 }, { "epoch": 0.5419110690633869, "grad_norm": 2.9805190563201904, "learning_rate": 1.860432947540322e-05, "loss": 1.2348, "step": 716 }, { "epoch": 0.5426679280983917, "grad_norm": 2.803847312927246, "learning_rate": 1.8600235838089896e-05, "loss": 1.241, "step": 717 }, { "epoch": 0.5434247871333964, "grad_norm": 2.4871954917907715, "learning_rate": 1.859613665784029e-05, "loss": 1.2883, "step": 718 }, { "epoch": 0.5441816461684011, "grad_norm": 3.067754030227661, "learning_rate": 1.8592031937296673e-05, "loss": 1.2833, "step": 719 }, { "epoch": 0.5449385052034059, "grad_norm": 2.8348135948181152, "learning_rate": 1.8587921679104887e-05, "loss": 1.3083, "step": 720 }, { "epoch": 0.5456953642384106, "grad_norm": 2.538663387298584, "learning_rate": 1.8583805885914345e-05, "loss": 1.2288, "step": 721 }, { "epoch": 0.5464522232734154, "grad_norm": 2.7975425720214844, "learning_rate": 1.857968456037801e-05, "loss": 1.3166, "step": 722 }, { "epoch": 0.54720908230842, "grad_norm": 2.60284423828125, "learning_rate": 1.857555770515244e-05, "loss": 1.251, "step": 723 }, { "epoch": 0.5479659413434248, "grad_norm": 3.0047545433044434, "learning_rate": 1.857142532289774e-05, "loss": 1.2372, "step": 724 }, { "epoch": 0.5487228003784295, "grad_norm": 2.7439827919006348, "learning_rate": 1.8567287416277576e-05, "loss": 1.2686, "step": 725 }, { "epoch": 0.5494796594134342, "grad_norm": 2.7966012954711914, "learning_rate": 1.856314398795918e-05, "loss": 1.2997, "step": 726 }, { "epoch": 0.550236518448439, "grad_norm": 2.4072394371032715, "learning_rate": 1.855899504061335e-05, "loss": 1.2371, "step": 727 }, { "epoch": 0.5509933774834437, "grad_norm": 2.6710758209228516, "learning_rate": 1.8554840576914425e-05, "loss": 1.3084, "step": 728 }, { "epoch": 0.5517502365184485, "grad_norm": 2.4834091663360596, "learning_rate": 1.8550680599540315e-05, "loss": 1.2335, "step": 729 }, { "epoch": 0.5525070955534531, "grad_norm": 3.0747454166412354, "learning_rate": 1.8546515111172475e-05, "loss": 1.2691, "step": 730 }, { "epoch": 0.5532639545884579, "grad_norm": 2.3881189823150635, "learning_rate": 1.8542344114495918e-05, "loss": 1.2852, "step": 731 }, { "epoch": 0.5540208136234627, "grad_norm": 2.559795618057251, "learning_rate": 1.85381676121992e-05, "loss": 1.2266, "step": 732 }, { "epoch": 0.5547776726584673, "grad_norm": 2.5426385402679443, "learning_rate": 1.8533985606974436e-05, "loss": 1.2136, "step": 733 }, { "epoch": 0.5555345316934721, "grad_norm": 2.7627816200256348, "learning_rate": 1.8529798101517283e-05, "loss": 1.227, "step": 734 }, { "epoch": 0.5562913907284768, "grad_norm": 3.559936285018921, "learning_rate": 1.8525605098526935e-05, "loss": 1.2823, "step": 735 }, { "epoch": 0.5570482497634816, "grad_norm": 2.6380114555358887, "learning_rate": 1.8521406600706146e-05, "loss": 1.2077, "step": 736 }, { "epoch": 0.5578051087984863, "grad_norm": 2.3080461025238037, "learning_rate": 1.8517202610761203e-05, "loss": 1.2146, "step": 737 }, { "epoch": 0.558561967833491, "grad_norm": 2.245431423187256, "learning_rate": 1.851299313140193e-05, "loss": 1.2073, "step": 738 }, { "epoch": 0.5593188268684958, "grad_norm": 2.4832706451416016, "learning_rate": 1.8508778165341697e-05, "loss": 1.2167, "step": 739 }, { "epoch": 0.5600756859035004, "grad_norm": 2.646280288696289, "learning_rate": 1.85045577152974e-05, "loss": 1.2379, "step": 740 }, { "epoch": 0.5608325449385052, "grad_norm": 2.449310302734375, "learning_rate": 1.8500331783989486e-05, "loss": 1.2085, "step": 741 }, { "epoch": 0.56158940397351, "grad_norm": 2.7046239376068115, "learning_rate": 1.8496100374141924e-05, "loss": 1.2255, "step": 742 }, { "epoch": 0.5623462630085146, "grad_norm": 2.5250003337860107, "learning_rate": 1.849186348848221e-05, "loss": 1.2028, "step": 743 }, { "epoch": 0.5631031220435194, "grad_norm": 2.423783779144287, "learning_rate": 1.848762112974138e-05, "loss": 1.2485, "step": 744 }, { "epoch": 0.5638599810785241, "grad_norm": 2.3143739700317383, "learning_rate": 1.8483373300653995e-05, "loss": 1.2238, "step": 745 }, { "epoch": 0.5646168401135289, "grad_norm": 2.433070421218872, "learning_rate": 1.8479120003958136e-05, "loss": 1.2496, "step": 746 }, { "epoch": 0.5653736991485336, "grad_norm": 2.5320703983306885, "learning_rate": 1.8474861242395424e-05, "loss": 1.2477, "step": 747 }, { "epoch": 0.5661305581835383, "grad_norm": 5.566840171813965, "learning_rate": 1.8470597018710976e-05, "loss": 1.2086, "step": 748 }, { "epoch": 0.5668874172185431, "grad_norm": 2.5624606609344482, "learning_rate": 1.8466327335653458e-05, "loss": 1.2458, "step": 749 }, { "epoch": 0.5676442762535477, "grad_norm": 3.781528949737549, "learning_rate": 1.846205219597504e-05, "loss": 1.259, "step": 750 }, { "epoch": 0.5684011352885525, "grad_norm": 2.4453654289245605, "learning_rate": 1.8457771602431406e-05, "loss": 1.2511, "step": 751 }, { "epoch": 0.5691579943235573, "grad_norm": 2.4234702587127686, "learning_rate": 1.8453485557781768e-05, "loss": 1.2339, "step": 752 }, { "epoch": 0.569914853358562, "grad_norm": 2.637007236480713, "learning_rate": 1.8449194064788845e-05, "loss": 1.2274, "step": 753 }, { "epoch": 0.5706717123935667, "grad_norm": 2.557408332824707, "learning_rate": 1.8444897126218865e-05, "loss": 1.2718, "step": 754 }, { "epoch": 0.5714285714285714, "grad_norm": 2.3460357189178467, "learning_rate": 1.8440594744841564e-05, "loss": 1.2522, "step": 755 }, { "epoch": 0.5721854304635762, "grad_norm": 2.9702370166778564, "learning_rate": 1.84362869234302e-05, "loss": 1.2365, "step": 756 }, { "epoch": 0.5729422894985808, "grad_norm": 2.4645347595214844, "learning_rate": 1.843197366476153e-05, "loss": 1.2497, "step": 757 }, { "epoch": 0.5736991485335856, "grad_norm": 2.525984764099121, "learning_rate": 1.8427654971615804e-05, "loss": 1.2472, "step": 758 }, { "epoch": 0.5744560075685904, "grad_norm": 2.598914861679077, "learning_rate": 1.8423330846776797e-05, "loss": 1.2783, "step": 759 }, { "epoch": 0.575212866603595, "grad_norm": 2.464893341064453, "learning_rate": 1.841900129303177e-05, "loss": 1.2331, "step": 760 }, { "epoch": 0.5759697256385998, "grad_norm": 2.517779588699341, "learning_rate": 1.8414666313171488e-05, "loss": 1.2087, "step": 761 }, { "epoch": 0.5767265846736045, "grad_norm": 2.3364832401275635, "learning_rate": 1.8410325909990207e-05, "loss": 1.251, "step": 762 }, { "epoch": 0.5774834437086093, "grad_norm": 2.348635673522949, "learning_rate": 1.8405980086285693e-05, "loss": 1.2424, "step": 763 }, { "epoch": 0.578240302743614, "grad_norm": 2.472801446914673, "learning_rate": 1.8401628844859193e-05, "loss": 1.1972, "step": 764 }, { "epoch": 0.5789971617786187, "grad_norm": 2.528832197189331, "learning_rate": 1.839727218851545e-05, "loss": 1.2904, "step": 765 }, { "epoch": 0.5797540208136235, "grad_norm": 2.833585262298584, "learning_rate": 1.83929101200627e-05, "loss": 1.2284, "step": 766 }, { "epoch": 0.5805108798486281, "grad_norm": 2.886864185333252, "learning_rate": 1.838854264231267e-05, "loss": 1.2529, "step": 767 }, { "epoch": 0.5812677388836329, "grad_norm": 2.6184258460998535, "learning_rate": 1.8384169758080564e-05, "loss": 1.2422, "step": 768 }, { "epoch": 0.5820245979186377, "grad_norm": 2.59594988822937, "learning_rate": 1.8379791470185077e-05, "loss": 1.2349, "step": 769 }, { "epoch": 0.5827814569536424, "grad_norm": 2.359560489654541, "learning_rate": 1.837540778144839e-05, "loss": 1.2146, "step": 770 }, { "epoch": 0.5835383159886471, "grad_norm": 3.088444709777832, "learning_rate": 1.8371018694696155e-05, "loss": 1.2667, "step": 771 }, { "epoch": 0.5842951750236518, "grad_norm": 2.766091823577881, "learning_rate": 1.836662421275752e-05, "loss": 1.218, "step": 772 }, { "epoch": 0.5850520340586566, "grad_norm": 2.739274263381958, "learning_rate": 1.8362224338465093e-05, "loss": 1.2618, "step": 773 }, { "epoch": 0.5858088930936614, "grad_norm": 4.742860794067383, "learning_rate": 1.835781907465497e-05, "loss": 1.2989, "step": 774 }, { "epoch": 0.586565752128666, "grad_norm": 3.0373001098632812, "learning_rate": 1.8353408424166712e-05, "loss": 1.283, "step": 775 }, { "epoch": 0.5873226111636708, "grad_norm": 2.5657973289489746, "learning_rate": 1.8348992389843365e-05, "loss": 1.1942, "step": 776 }, { "epoch": 0.5880794701986755, "grad_norm": 2.7128591537475586, "learning_rate": 1.834457097453143e-05, "loss": 1.262, "step": 777 }, { "epoch": 0.5888363292336802, "grad_norm": 2.4917023181915283, "learning_rate": 1.834014418108089e-05, "loss": 1.2194, "step": 778 }, { "epoch": 0.589593188268685, "grad_norm": 2.7309277057647705, "learning_rate": 1.8335712012345188e-05, "loss": 1.231, "step": 779 }, { "epoch": 0.5903500473036897, "grad_norm": 2.894216537475586, "learning_rate": 1.8331274471181224e-05, "loss": 1.234, "step": 780 }, { "epoch": 0.5911069063386944, "grad_norm": 2.491863250732422, "learning_rate": 1.8326831560449375e-05, "loss": 1.2335, "step": 781 }, { "epoch": 0.5918637653736991, "grad_norm": 2.636247396469116, "learning_rate": 1.832238328301348e-05, "loss": 1.2371, "step": 782 }, { "epoch": 0.5926206244087039, "grad_norm": 2.6232643127441406, "learning_rate": 1.831792964174082e-05, "loss": 1.2626, "step": 783 }, { "epoch": 0.5933774834437087, "grad_norm": 2.667076826095581, "learning_rate": 1.8313470639502148e-05, "loss": 1.257, "step": 784 }, { "epoch": 0.5941343424787133, "grad_norm": 2.936359405517578, "learning_rate": 1.8309006279171675e-05, "loss": 1.2509, "step": 785 }, { "epoch": 0.5948912015137181, "grad_norm": 2.522406578063965, "learning_rate": 1.8304536563627052e-05, "loss": 1.2804, "step": 786 }, { "epoch": 0.5956480605487228, "grad_norm": 2.542407512664795, "learning_rate": 1.830006149574939e-05, "loss": 1.2393, "step": 787 }, { "epoch": 0.5964049195837275, "grad_norm": 2.5919876098632812, "learning_rate": 1.8295581078423253e-05, "loss": 1.2622, "step": 788 }, { "epoch": 0.5971617786187322, "grad_norm": 2.7095932960510254, "learning_rate": 1.8291095314536647e-05, "loss": 1.2491, "step": 789 }, { "epoch": 0.597918637653737, "grad_norm": 2.4110512733459473, "learning_rate": 1.8286604206981028e-05, "loss": 1.2622, "step": 790 }, { "epoch": 0.5986754966887418, "grad_norm": 2.7041079998016357, "learning_rate": 1.8282107758651295e-05, "loss": 1.2563, "step": 791 }, { "epoch": 0.5994323557237464, "grad_norm": 2.7525973320007324, "learning_rate": 1.827760597244579e-05, "loss": 1.2449, "step": 792 }, { "epoch": 0.6001892147587512, "grad_norm": 2.612968921661377, "learning_rate": 1.8273098851266297e-05, "loss": 1.258, "step": 793 }, { "epoch": 0.6009460737937559, "grad_norm": 2.6070921421051025, "learning_rate": 1.826858639801804e-05, "loss": 1.3045, "step": 794 }, { "epoch": 0.6017029328287606, "grad_norm": 2.4890692234039307, "learning_rate": 1.8264068615609668e-05, "loss": 1.2253, "step": 795 }, { "epoch": 0.6024597918637654, "grad_norm": 2.9760918617248535, "learning_rate": 1.8259545506953285e-05, "loss": 1.2673, "step": 796 }, { "epoch": 0.6032166508987701, "grad_norm": 2.8577773571014404, "learning_rate": 1.825501707496441e-05, "loss": 1.264, "step": 797 }, { "epoch": 0.6039735099337749, "grad_norm": 2.549546718597412, "learning_rate": 1.825048332256201e-05, "loss": 1.2228, "step": 798 }, { "epoch": 0.6047303689687795, "grad_norm": 2.7687017917633057, "learning_rate": 1.8245944252668462e-05, "loss": 1.2522, "step": 799 }, { "epoch": 0.6054872280037843, "grad_norm": 2.533287763595581, "learning_rate": 1.824139986820959e-05, "loss": 1.1939, "step": 800 }, { "epoch": 0.6062440870387891, "grad_norm": 2.6402809619903564, "learning_rate": 1.8236850172114633e-05, "loss": 1.2417, "step": 801 }, { "epoch": 0.6070009460737937, "grad_norm": 2.592946767807007, "learning_rate": 1.8232295167316252e-05, "loss": 1.2922, "step": 802 }, { "epoch": 0.6077578051087985, "grad_norm": 2.6012048721313477, "learning_rate": 1.8227734856750537e-05, "loss": 1.2658, "step": 803 }, { "epoch": 0.6085146641438032, "grad_norm": 2.737257242202759, "learning_rate": 1.8223169243356995e-05, "loss": 1.2955, "step": 804 }, { "epoch": 0.609271523178808, "grad_norm": 2.8576440811157227, "learning_rate": 1.8218598330078548e-05, "loss": 1.261, "step": 805 }, { "epoch": 0.6100283822138127, "grad_norm": 4.944385051727295, "learning_rate": 1.8214022119861537e-05, "loss": 1.2438, "step": 806 }, { "epoch": 0.6107852412488174, "grad_norm": 2.8472225666046143, "learning_rate": 1.820944061565572e-05, "loss": 1.2305, "step": 807 }, { "epoch": 0.6115421002838222, "grad_norm": 2.8943638801574707, "learning_rate": 1.8204853820414267e-05, "loss": 1.2608, "step": 808 }, { "epoch": 0.6122989593188268, "grad_norm": 2.523142099380493, "learning_rate": 1.820026173709375e-05, "loss": 1.2721, "step": 809 }, { "epoch": 0.6130558183538316, "grad_norm": 2.8089590072631836, "learning_rate": 1.8195664368654157e-05, "loss": 1.222, "step": 810 }, { "epoch": 0.6138126773888364, "grad_norm": 2.9274590015411377, "learning_rate": 1.8191061718058885e-05, "loss": 1.2534, "step": 811 }, { "epoch": 0.614569536423841, "grad_norm": 2.6819167137145996, "learning_rate": 1.818645378827473e-05, "loss": 1.2566, "step": 812 }, { "epoch": 0.6153263954588458, "grad_norm": 2.5687010288238525, "learning_rate": 1.8181840582271897e-05, "loss": 1.2323, "step": 813 }, { "epoch": 0.6160832544938505, "grad_norm": 2.636622428894043, "learning_rate": 1.8177222103023983e-05, "loss": 1.2007, "step": 814 }, { "epoch": 0.6168401135288553, "grad_norm": 2.5585618019104004, "learning_rate": 1.8172598353507988e-05, "loss": 1.2169, "step": 815 }, { "epoch": 0.61759697256386, "grad_norm": 2.880889415740967, "learning_rate": 1.8167969336704322e-05, "loss": 1.2211, "step": 816 }, { "epoch": 0.6183538315988647, "grad_norm": 2.575530767440796, "learning_rate": 1.8163335055596764e-05, "loss": 1.2165, "step": 817 }, { "epoch": 0.6191106906338695, "grad_norm": 2.65857195854187, "learning_rate": 1.815869551317251e-05, "loss": 1.2527, "step": 818 }, { "epoch": 0.6198675496688741, "grad_norm": 2.7308692932128906, "learning_rate": 1.8154050712422135e-05, "loss": 1.245, "step": 819 }, { "epoch": 0.6206244087038789, "grad_norm": 2.4128143787384033, "learning_rate": 1.8149400656339606e-05, "loss": 1.2274, "step": 820 }, { "epoch": 0.6213812677388836, "grad_norm": 2.678269386291504, "learning_rate": 1.8144745347922282e-05, "loss": 1.2348, "step": 821 }, { "epoch": 0.6221381267738884, "grad_norm": 2.4970011711120605, "learning_rate": 1.81400847901709e-05, "loss": 1.2525, "step": 822 }, { "epoch": 0.6228949858088931, "grad_norm": 3.0284082889556885, "learning_rate": 1.813541898608959e-05, "loss": 1.2283, "step": 823 }, { "epoch": 0.6236518448438978, "grad_norm": 2.5325472354888916, "learning_rate": 1.813074793868585e-05, "loss": 1.2177, "step": 824 }, { "epoch": 0.6244087038789026, "grad_norm": 2.8422694206237793, "learning_rate": 1.8126071650970566e-05, "loss": 1.1957, "step": 825 }, { "epoch": 0.6251655629139072, "grad_norm": 2.7805769443511963, "learning_rate": 1.8121390125958012e-05, "loss": 1.2406, "step": 826 }, { "epoch": 0.625922421948912, "grad_norm": 3.035707473754883, "learning_rate": 1.811670336666582e-05, "loss": 1.2217, "step": 827 }, { "epoch": 0.6266792809839168, "grad_norm": 2.6617417335510254, "learning_rate": 1.8112011376115004e-05, "loss": 1.2489, "step": 828 }, { "epoch": 0.6274361400189215, "grad_norm": 2.593369722366333, "learning_rate": 1.8107314157329953e-05, "loss": 1.2582, "step": 829 }, { "epoch": 0.6281929990539262, "grad_norm": 2.33566951751709, "learning_rate": 1.810261171333842e-05, "loss": 1.1726, "step": 830 }, { "epoch": 0.6289498580889309, "grad_norm": 2.6399929523468018, "learning_rate": 1.8097904047171525e-05, "loss": 1.233, "step": 831 }, { "epoch": 0.6297067171239357, "grad_norm": 2.833388328552246, "learning_rate": 1.8093191161863765e-05, "loss": 1.2465, "step": 832 }, { "epoch": 0.6304635761589404, "grad_norm": 2.5618953704833984, "learning_rate": 1.808847306045299e-05, "loss": 1.2786, "step": 833 }, { "epoch": 0.6312204351939451, "grad_norm": 2.449512004852295, "learning_rate": 1.8083749745980417e-05, "loss": 1.1866, "step": 834 }, { "epoch": 0.6319772942289499, "grad_norm": 2.3261687755584717, "learning_rate": 1.8079021221490623e-05, "loss": 1.2293, "step": 835 }, { "epoch": 0.6327341532639545, "grad_norm": 2.2670247554779053, "learning_rate": 1.8074287490031544e-05, "loss": 1.2307, "step": 836 }, { "epoch": 0.6334910122989593, "grad_norm": 2.9090189933776855, "learning_rate": 1.8069548554654465e-05, "loss": 1.23, "step": 837 }, { "epoch": 0.6342478713339641, "grad_norm": 2.2023513317108154, "learning_rate": 1.8064804418414036e-05, "loss": 1.2559, "step": 838 }, { "epoch": 0.6350047303689688, "grad_norm": 2.3907856941223145, "learning_rate": 1.8060055084368256e-05, "loss": 1.1783, "step": 839 }, { "epoch": 0.6357615894039735, "grad_norm": 2.7036445140838623, "learning_rate": 1.805530055557847e-05, "loss": 1.2268, "step": 840 }, { "epoch": 0.6365184484389782, "grad_norm": 2.429286003112793, "learning_rate": 1.805054083510938e-05, "loss": 1.1904, "step": 841 }, { "epoch": 0.637275307473983, "grad_norm": 2.644791603088379, "learning_rate": 1.804577592602902e-05, "loss": 1.1866, "step": 842 }, { "epoch": 0.6380321665089878, "grad_norm": 2.7880802154541016, "learning_rate": 1.804100583140879e-05, "loss": 1.2817, "step": 843 }, { "epoch": 0.6387890255439924, "grad_norm": 2.485358476638794, "learning_rate": 1.8036230554323413e-05, "loss": 1.281, "step": 844 }, { "epoch": 0.6395458845789972, "grad_norm": 2.5849761962890625, "learning_rate": 1.803145009785096e-05, "loss": 1.248, "step": 845 }, { "epoch": 0.6403027436140019, "grad_norm": 2.357409715652466, "learning_rate": 1.8026664465072838e-05, "loss": 1.2828, "step": 846 }, { "epoch": 0.6410596026490066, "grad_norm": 2.4510414600372314, "learning_rate": 1.80218736590738e-05, "loss": 1.2275, "step": 847 }, { "epoch": 0.6418164616840114, "grad_norm": 2.625035524368286, "learning_rate": 1.8017077682941918e-05, "loss": 1.2369, "step": 848 }, { "epoch": 0.6425733207190161, "grad_norm": 2.4510104656219482, "learning_rate": 1.8012276539768613e-05, "loss": 1.2624, "step": 849 }, { "epoch": 0.6433301797540208, "grad_norm": 2.6468582153320312, "learning_rate": 1.800747023264862e-05, "loss": 1.2964, "step": 850 }, { "epoch": 0.6440870387890255, "grad_norm": 2.45991587638855, "learning_rate": 1.800265876468002e-05, "loss": 1.2359, "step": 851 }, { "epoch": 0.6448438978240303, "grad_norm": 2.546734571456909, "learning_rate": 1.799784213896421e-05, "loss": 1.2124, "step": 852 }, { "epoch": 0.645600756859035, "grad_norm": 2.265397787094116, "learning_rate": 1.799302035860591e-05, "loss": 1.1945, "step": 853 }, { "epoch": 0.6463576158940397, "grad_norm": 2.4162395000457764, "learning_rate": 1.7988193426713165e-05, "loss": 1.2115, "step": 854 }, { "epoch": 0.6471144749290445, "grad_norm": 2.2301483154296875, "learning_rate": 1.7983361346397347e-05, "loss": 1.2699, "step": 855 }, { "epoch": 0.6478713339640492, "grad_norm": 2.2673699855804443, "learning_rate": 1.797852412077314e-05, "loss": 1.2525, "step": 856 }, { "epoch": 0.6486281929990539, "grad_norm": 2.5041098594665527, "learning_rate": 1.7973681752958543e-05, "loss": 1.231, "step": 857 }, { "epoch": 0.6493850520340586, "grad_norm": 2.5438284873962402, "learning_rate": 1.7968834246074875e-05, "loss": 1.2316, "step": 858 }, { "epoch": 0.6501419110690634, "grad_norm": 2.4436419010162354, "learning_rate": 1.7963981603246762e-05, "loss": 1.2461, "step": 859 }, { "epoch": 0.6508987701040682, "grad_norm": 2.3260018825531006, "learning_rate": 1.795912382760215e-05, "loss": 1.2575, "step": 860 }, { "epoch": 0.6516556291390728, "grad_norm": 2.527569532394409, "learning_rate": 1.7954260922272278e-05, "loss": 1.2552, "step": 861 }, { "epoch": 0.6524124881740776, "grad_norm": 2.5068411827087402, "learning_rate": 1.7949392890391706e-05, "loss": 1.2439, "step": 862 }, { "epoch": 0.6531693472090823, "grad_norm": 2.8131117820739746, "learning_rate": 1.7944519735098295e-05, "loss": 1.2669, "step": 863 }, { "epoch": 0.653926206244087, "grad_norm": 2.368083953857422, "learning_rate": 1.79396414595332e-05, "loss": 1.273, "step": 864 }, { "epoch": 0.6546830652790918, "grad_norm": 2.4757819175720215, "learning_rate": 1.7934758066840893e-05, "loss": 1.2652, "step": 865 }, { "epoch": 0.6554399243140965, "grad_norm": 2.7727437019348145, "learning_rate": 1.7929869560169123e-05, "loss": 1.2661, "step": 866 }, { "epoch": 0.6561967833491013, "grad_norm": 2.5417017936706543, "learning_rate": 1.7924975942668954e-05, "loss": 1.2624, "step": 867 }, { "epoch": 0.6569536423841059, "grad_norm": 3.0404696464538574, "learning_rate": 1.792007721749474e-05, "loss": 1.2149, "step": 868 }, { "epoch": 0.6577105014191107, "grad_norm": 2.528648853302002, "learning_rate": 1.7915173387804115e-05, "loss": 1.2536, "step": 869 }, { "epoch": 0.6584673604541155, "grad_norm": 2.5994584560394287, "learning_rate": 1.791026445675802e-05, "loss": 1.2146, "step": 870 }, { "epoch": 0.6592242194891201, "grad_norm": 2.523890495300293, "learning_rate": 1.7905350427520672e-05, "loss": 1.2599, "step": 871 }, { "epoch": 0.6599810785241249, "grad_norm": 3.055417537689209, "learning_rate": 1.7900431303259585e-05, "loss": 1.2447, "step": 872 }, { "epoch": 0.6607379375591296, "grad_norm": 2.5144965648651123, "learning_rate": 1.789550708714555e-05, "loss": 1.2022, "step": 873 }, { "epoch": 0.6614947965941343, "grad_norm": 2.5344860553741455, "learning_rate": 1.789057778235264e-05, "loss": 1.2578, "step": 874 }, { "epoch": 0.6622516556291391, "grad_norm": 2.7370986938476562, "learning_rate": 1.7885643392058207e-05, "loss": 1.1964, "step": 875 }, { "epoch": 0.6630085146641438, "grad_norm": 2.831005573272705, "learning_rate": 1.7880703919442885e-05, "loss": 1.2218, "step": 876 }, { "epoch": 0.6637653736991486, "grad_norm": 2.2660505771636963, "learning_rate": 1.787575936769059e-05, "loss": 1.232, "step": 877 }, { "epoch": 0.6645222327341532, "grad_norm": 2.5577943325042725, "learning_rate": 1.78708097399885e-05, "loss": 1.289, "step": 878 }, { "epoch": 0.665279091769158, "grad_norm": 2.8066608905792236, "learning_rate": 1.786585503952707e-05, "loss": 1.2051, "step": 879 }, { "epoch": 0.6660359508041628, "grad_norm": 2.683680295944214, "learning_rate": 1.786089526950002e-05, "loss": 1.2343, "step": 880 }, { "epoch": 0.6667928098391674, "grad_norm": 2.571253538131714, "learning_rate": 1.785593043310434e-05, "loss": 1.2279, "step": 881 }, { "epoch": 0.6675496688741722, "grad_norm": 2.2818214893341064, "learning_rate": 1.78509605335403e-05, "loss": 1.2346, "step": 882 }, { "epoch": 0.6683065279091769, "grad_norm": 2.427520513534546, "learning_rate": 1.7845985574011413e-05, "loss": 1.251, "step": 883 }, { "epoch": 0.6690633869441817, "grad_norm": 2.615901231765747, "learning_rate": 1.784100555772446e-05, "loss": 1.2697, "step": 884 }, { "epoch": 0.6698202459791863, "grad_norm": 2.3778128623962402, "learning_rate": 1.7836020487889495e-05, "loss": 1.2291, "step": 885 }, { "epoch": 0.6705771050141911, "grad_norm": 2.4669504165649414, "learning_rate": 1.7831030367719802e-05, "loss": 1.2365, "step": 886 }, { "epoch": 0.6713339640491959, "grad_norm": 2.397721290588379, "learning_rate": 1.782603520043195e-05, "loss": 1.2718, "step": 887 }, { "epoch": 0.6720908230842005, "grad_norm": 2.323598623275757, "learning_rate": 1.782103498924574e-05, "loss": 1.2706, "step": 888 }, { "epoch": 0.6728476821192053, "grad_norm": 2.592615842819214, "learning_rate": 1.7816029737384234e-05, "loss": 1.2821, "step": 889 }, { "epoch": 0.67360454115421, "grad_norm": 2.552388906478882, "learning_rate": 1.7811019448073742e-05, "loss": 1.2075, "step": 890 }, { "epoch": 0.6743614001892148, "grad_norm": 2.659424304962158, "learning_rate": 1.7806004124543818e-05, "loss": 1.2365, "step": 891 }, { "epoch": 0.6751182592242195, "grad_norm": 2.596625328063965, "learning_rate": 1.7800983770027266e-05, "loss": 1.2685, "step": 892 }, { "epoch": 0.6758751182592242, "grad_norm": 2.485259771347046, "learning_rate": 1.779595838776013e-05, "loss": 1.2453, "step": 893 }, { "epoch": 0.676631977294229, "grad_norm": 2.3858642578125, "learning_rate": 1.7790927980981687e-05, "loss": 1.1896, "step": 894 }, { "epoch": 0.6773888363292336, "grad_norm": 2.53601336479187, "learning_rate": 1.7785892552934468e-05, "loss": 1.2533, "step": 895 }, { "epoch": 0.6781456953642384, "grad_norm": 2.7505080699920654, "learning_rate": 1.778085210686423e-05, "loss": 1.2449, "step": 896 }, { "epoch": 0.6789025543992432, "grad_norm": 2.4080655574798584, "learning_rate": 1.7775806646019974e-05, "loss": 1.1985, "step": 897 }, { "epoch": 0.6796594134342478, "grad_norm": 2.742640972137451, "learning_rate": 1.7770756173653923e-05, "loss": 1.2434, "step": 898 }, { "epoch": 0.6804162724692526, "grad_norm": 2.377990484237671, "learning_rate": 1.776570069302153e-05, "loss": 1.1726, "step": 899 }, { "epoch": 0.6811731315042573, "grad_norm": 2.35687518119812, "learning_rate": 1.7760640207381486e-05, "loss": 1.2189, "step": 900 }, { "epoch": 0.6819299905392621, "grad_norm": 2.576018810272217, "learning_rate": 1.77555747199957e-05, "loss": 1.2318, "step": 901 }, { "epoch": 0.6826868495742668, "grad_norm": 2.3314318656921387, "learning_rate": 1.7750504234129312e-05, "loss": 1.1889, "step": 902 }, { "epoch": 0.6834437086092715, "grad_norm": 2.3357717990875244, "learning_rate": 1.7745428753050675e-05, "loss": 1.2168, "step": 903 }, { "epoch": 0.6842005676442763, "grad_norm": 2.2540555000305176, "learning_rate": 1.774034828003137e-05, "loss": 1.2017, "step": 904 }, { "epoch": 0.684957426679281, "grad_norm": 2.325144052505493, "learning_rate": 1.773526281834619e-05, "loss": 1.2409, "step": 905 }, { "epoch": 0.6857142857142857, "grad_norm": 2.731501340866089, "learning_rate": 1.7730172371273147e-05, "loss": 1.2765, "step": 906 }, { "epoch": 0.6864711447492905, "grad_norm": 2.3535265922546387, "learning_rate": 1.7725076942093468e-05, "loss": 1.2353, "step": 907 }, { "epoch": 0.6872280037842952, "grad_norm": 2.852663040161133, "learning_rate": 1.7719976534091584e-05, "loss": 1.2761, "step": 908 }, { "epoch": 0.6879848628192999, "grad_norm": 2.5675928592681885, "learning_rate": 1.7714871150555146e-05, "loss": 1.1906, "step": 909 }, { "epoch": 0.6887417218543046, "grad_norm": 2.1720049381256104, "learning_rate": 1.7709760794775e-05, "loss": 1.2057, "step": 910 }, { "epoch": 0.6894985808893094, "grad_norm": 2.567373514175415, "learning_rate": 1.7704645470045213e-05, "loss": 1.2365, "step": 911 }, { "epoch": 0.6902554399243142, "grad_norm": 2.38577938079834, "learning_rate": 1.7699525179663034e-05, "loss": 1.2047, "step": 912 }, { "epoch": 0.6910122989593188, "grad_norm": 2.3595142364501953, "learning_rate": 1.7694399926928932e-05, "loss": 1.2329, "step": 913 }, { "epoch": 0.6917691579943236, "grad_norm": 2.7524566650390625, "learning_rate": 1.7689269715146562e-05, "loss": 1.2461, "step": 914 }, { "epoch": 0.6925260170293283, "grad_norm": 2.2120566368103027, "learning_rate": 1.768413454762278e-05, "loss": 1.2232, "step": 915 }, { "epoch": 0.693282876064333, "grad_norm": 2.491506338119507, "learning_rate": 1.767899442766764e-05, "loss": 1.2185, "step": 916 }, { "epoch": 0.6940397350993377, "grad_norm": 2.299386501312256, "learning_rate": 1.7673849358594387e-05, "loss": 1.2146, "step": 917 }, { "epoch": 0.6947965941343425, "grad_norm": 2.367396831512451, "learning_rate": 1.766869934371945e-05, "loss": 1.2666, "step": 918 }, { "epoch": 0.6955534531693472, "grad_norm": 2.379352331161499, "learning_rate": 1.766354438636245e-05, "loss": 1.2295, "step": 919 }, { "epoch": 0.6963103122043519, "grad_norm": 2.91322660446167, "learning_rate": 1.7658384489846197e-05, "loss": 1.2211, "step": 920 }, { "epoch": 0.6970671712393567, "grad_norm": 2.3727736473083496, "learning_rate": 1.7653219657496675e-05, "loss": 1.2478, "step": 921 }, { "epoch": 0.6978240302743614, "grad_norm": 2.3029327392578125, "learning_rate": 1.7648049892643064e-05, "loss": 1.238, "step": 922 }, { "epoch": 0.6985808893093661, "grad_norm": 2.3356475830078125, "learning_rate": 1.7642875198617715e-05, "loss": 1.1932, "step": 923 }, { "epoch": 0.6993377483443709, "grad_norm": 2.5331709384918213, "learning_rate": 1.7637695578756148e-05, "loss": 1.1822, "step": 924 }, { "epoch": 0.7000946073793756, "grad_norm": 2.714674949645996, "learning_rate": 1.7632511036397078e-05, "loss": 1.2454, "step": 925 }, { "epoch": 0.7008514664143803, "grad_norm": 2.497758388519287, "learning_rate": 1.7627321574882373e-05, "loss": 1.2552, "step": 926 }, { "epoch": 0.701608325449385, "grad_norm": 2.6237785816192627, "learning_rate": 1.7622127197557085e-05, "loss": 1.2334, "step": 927 }, { "epoch": 0.7023651844843898, "grad_norm": 2.4308512210845947, "learning_rate": 1.7616927907769436e-05, "loss": 1.2516, "step": 928 }, { "epoch": 0.7031220435193946, "grad_norm": 2.1913723945617676, "learning_rate": 1.7611723708870797e-05, "loss": 1.267, "step": 929 }, { "epoch": 0.7038789025543992, "grad_norm": 2.6569485664367676, "learning_rate": 1.7606514604215723e-05, "loss": 1.2301, "step": 930 }, { "epoch": 0.704635761589404, "grad_norm": 2.4195547103881836, "learning_rate": 1.7601300597161918e-05, "loss": 1.2464, "step": 931 }, { "epoch": 0.7053926206244087, "grad_norm": 2.580186128616333, "learning_rate": 1.7596081691070262e-05, "loss": 1.2432, "step": 932 }, { "epoch": 0.7061494796594134, "grad_norm": 2.4679551124572754, "learning_rate": 1.759085788930477e-05, "loss": 1.23, "step": 933 }, { "epoch": 0.7069063386944182, "grad_norm": 2.256150722503662, "learning_rate": 1.7585629195232633e-05, "loss": 1.2362, "step": 934 }, { "epoch": 0.7076631977294229, "grad_norm": 2.7825927734375, "learning_rate": 1.7580395612224184e-05, "loss": 1.2542, "step": 935 }, { "epoch": 0.7084200567644277, "grad_norm": 2.568265676498413, "learning_rate": 1.757515714365291e-05, "loss": 1.3004, "step": 936 }, { "epoch": 0.7091769157994323, "grad_norm": 2.422884702682495, "learning_rate": 1.7569913792895455e-05, "loss": 1.2382, "step": 937 }, { "epoch": 0.7099337748344371, "grad_norm": 2.4318430423736572, "learning_rate": 1.7564665563331597e-05, "loss": 1.2421, "step": 938 }, { "epoch": 0.7106906338694419, "grad_norm": 2.4188950061798096, "learning_rate": 1.755941245834426e-05, "loss": 1.244, "step": 939 }, { "epoch": 0.7114474929044465, "grad_norm": 2.37963604927063, "learning_rate": 1.7554154481319523e-05, "loss": 1.2137, "step": 940 }, { "epoch": 0.7122043519394513, "grad_norm": 2.5548665523529053, "learning_rate": 1.7548891635646595e-05, "loss": 1.241, "step": 941 }, { "epoch": 0.712961210974456, "grad_norm": 2.384345054626465, "learning_rate": 1.7543623924717827e-05, "loss": 1.2299, "step": 942 }, { "epoch": 0.7137180700094607, "grad_norm": 2.459399461746216, "learning_rate": 1.7538351351928705e-05, "loss": 1.2242, "step": 943 }, { "epoch": 0.7144749290444655, "grad_norm": 2.2715373039245605, "learning_rate": 1.7533073920677847e-05, "loss": 1.2264, "step": 944 }, { "epoch": 0.7152317880794702, "grad_norm": 2.423783302307129, "learning_rate": 1.752779163436701e-05, "loss": 1.2061, "step": 945 }, { "epoch": 0.715988647114475, "grad_norm": 2.368046283721924, "learning_rate": 1.7522504496401068e-05, "loss": 1.2568, "step": 946 }, { "epoch": 0.7167455061494796, "grad_norm": 2.7439255714416504, "learning_rate": 1.7517212510188034e-05, "loss": 1.2123, "step": 947 }, { "epoch": 0.7175023651844844, "grad_norm": 2.3615167140960693, "learning_rate": 1.751191567913904e-05, "loss": 1.2188, "step": 948 }, { "epoch": 0.7182592242194891, "grad_norm": 2.394190549850464, "learning_rate": 1.7506614006668346e-05, "loss": 1.21, "step": 949 }, { "epoch": 0.7190160832544938, "grad_norm": 2.2254350185394287, "learning_rate": 1.7501307496193324e-05, "loss": 1.2306, "step": 950 }, { "epoch": 0.7197729422894986, "grad_norm": 2.734381675720215, "learning_rate": 1.749599615113447e-05, "loss": 1.2367, "step": 951 }, { "epoch": 0.7205298013245033, "grad_norm": 2.4236867427825928, "learning_rate": 1.7490679974915404e-05, "loss": 1.2013, "step": 952 }, { "epoch": 0.7212866603595081, "grad_norm": 2.4105286598205566, "learning_rate": 1.748535897096284e-05, "loss": 1.1849, "step": 953 }, { "epoch": 0.7220435193945127, "grad_norm": 3.396277666091919, "learning_rate": 1.7480033142706626e-05, "loss": 1.2018, "step": 954 }, { "epoch": 0.7228003784295175, "grad_norm": 2.49308180809021, "learning_rate": 1.7474702493579704e-05, "loss": 1.2533, "step": 955 }, { "epoch": 0.7235572374645223, "grad_norm": 2.2357521057128906, "learning_rate": 1.7469367027018134e-05, "loss": 1.253, "step": 956 }, { "epoch": 0.7243140964995269, "grad_norm": 2.3083794116973877, "learning_rate": 1.746402674646107e-05, "loss": 1.2062, "step": 957 }, { "epoch": 0.7250709555345317, "grad_norm": 2.087985038757324, "learning_rate": 1.745868165535078e-05, "loss": 1.2146, "step": 958 }, { "epoch": 0.7258278145695364, "grad_norm": 2.1703999042510986, "learning_rate": 1.7453331757132627e-05, "loss": 1.2593, "step": 959 }, { "epoch": 0.7265846736045412, "grad_norm": 2.644440174102783, "learning_rate": 1.7447977055255076e-05, "loss": 1.2744, "step": 960 }, { "epoch": 0.7273415326395459, "grad_norm": 2.2902777194976807, "learning_rate": 1.744261755316968e-05, "loss": 1.2336, "step": 961 }, { "epoch": 0.7280983916745506, "grad_norm": 2.1898083686828613, "learning_rate": 1.7437253254331103e-05, "loss": 1.1872, "step": 962 }, { "epoch": 0.7288552507095554, "grad_norm": 2.192096710205078, "learning_rate": 1.7431884162197076e-05, "loss": 1.1904, "step": 963 }, { "epoch": 0.72961210974456, "grad_norm": 2.344484806060791, "learning_rate": 1.7426510280228447e-05, "loss": 1.2086, "step": 964 }, { "epoch": 0.7303689687795648, "grad_norm": 2.4366836547851562, "learning_rate": 1.742113161188913e-05, "loss": 1.2367, "step": 965 }, { "epoch": 0.7311258278145696, "grad_norm": 2.3846473693847656, "learning_rate": 1.7415748160646136e-05, "loss": 1.2182, "step": 966 }, { "epoch": 0.7318826868495742, "grad_norm": 2.432124614715576, "learning_rate": 1.7410359929969555e-05, "loss": 1.2345, "step": 967 }, { "epoch": 0.732639545884579, "grad_norm": 2.427494525909424, "learning_rate": 1.7404966923332558e-05, "loss": 1.2284, "step": 968 }, { "epoch": 0.7333964049195837, "grad_norm": 2.3191261291503906, "learning_rate": 1.73995691442114e-05, "loss": 1.2092, "step": 969 }, { "epoch": 0.7341532639545885, "grad_norm": 2.3739922046661377, "learning_rate": 1.7394166596085393e-05, "loss": 1.2276, "step": 970 }, { "epoch": 0.7349101229895932, "grad_norm": 2.29589581489563, "learning_rate": 1.7388759282436953e-05, "loss": 1.2422, "step": 971 }, { "epoch": 0.7356669820245979, "grad_norm": 2.3834304809570312, "learning_rate": 1.7383347206751542e-05, "loss": 1.252, "step": 972 }, { "epoch": 0.7364238410596027, "grad_norm": 2.2572319507598877, "learning_rate": 1.7377930372517705e-05, "loss": 1.2296, "step": 973 }, { "epoch": 0.7371807000946073, "grad_norm": 2.6052353382110596, "learning_rate": 1.7372508783227052e-05, "loss": 1.2131, "step": 974 }, { "epoch": 0.7379375591296121, "grad_norm": 2.4882845878601074, "learning_rate": 1.7367082442374255e-05, "loss": 1.2259, "step": 975 }, { "epoch": 0.7386944181646169, "grad_norm": 2.301111936569214, "learning_rate": 1.7361651353457053e-05, "loss": 1.191, "step": 976 }, { "epoch": 0.7394512771996216, "grad_norm": 2.496601104736328, "learning_rate": 1.7356215519976236e-05, "loss": 1.1749, "step": 977 }, { "epoch": 0.7402081362346263, "grad_norm": 2.4782116413116455, "learning_rate": 1.7350774945435667e-05, "loss": 1.2282, "step": 978 }, { "epoch": 0.740964995269631, "grad_norm": 2.3096814155578613, "learning_rate": 1.7345329633342253e-05, "loss": 1.1811, "step": 979 }, { "epoch": 0.7417218543046358, "grad_norm": 2.5999755859375, "learning_rate": 1.7339879587205966e-05, "loss": 1.2081, "step": 980 }, { "epoch": 0.7424787133396404, "grad_norm": 2.3727262020111084, "learning_rate": 1.733442481053981e-05, "loss": 1.2392, "step": 981 }, { "epoch": 0.7432355723746452, "grad_norm": 2.621267318725586, "learning_rate": 1.7328965306859864e-05, "loss": 1.1715, "step": 982 }, { "epoch": 0.74399243140965, "grad_norm": 2.786910057067871, "learning_rate": 1.732350107968523e-05, "loss": 1.2307, "step": 983 }, { "epoch": 0.7447492904446547, "grad_norm": 2.5757007598876953, "learning_rate": 1.7318032132538078e-05, "loss": 1.204, "step": 984 }, { "epoch": 0.7455061494796594, "grad_norm": 2.4591543674468994, "learning_rate": 1.7312558468943595e-05, "loss": 1.1665, "step": 985 }, { "epoch": 0.7462630085146641, "grad_norm": 2.4593307971954346, "learning_rate": 1.730708009243003e-05, "loss": 1.2571, "step": 986 }, { "epoch": 0.7470198675496689, "grad_norm": 2.507080554962158, "learning_rate": 1.7301597006528654e-05, "loss": 1.2222, "step": 987 }, { "epoch": 0.7477767265846736, "grad_norm": 2.445662498474121, "learning_rate": 1.7296109214773782e-05, "loss": 1.2066, "step": 988 }, { "epoch": 0.7485335856196783, "grad_norm": 2.341787099838257, "learning_rate": 1.7290616720702768e-05, "loss": 1.2395, "step": 989 }, { "epoch": 0.7492904446546831, "grad_norm": 2.569960832595825, "learning_rate": 1.728511952785598e-05, "loss": 1.241, "step": 990 }, { "epoch": 0.7500473036896877, "grad_norm": 2.5241215229034424, "learning_rate": 1.7279617639776836e-05, "loss": 1.2231, "step": 991 }, { "epoch": 0.7508041627246925, "grad_norm": 2.4361581802368164, "learning_rate": 1.727411106001176e-05, "loss": 1.2381, "step": 992 }, { "epoch": 0.7515610217596973, "grad_norm": 2.3338370323181152, "learning_rate": 1.7268599792110213e-05, "loss": 1.2526, "step": 993 }, { "epoch": 0.752317880794702, "grad_norm": 2.398029327392578, "learning_rate": 1.726308383962467e-05, "loss": 1.2405, "step": 994 }, { "epoch": 0.7530747398297067, "grad_norm": 2.437852382659912, "learning_rate": 1.7257563206110636e-05, "loss": 1.2553, "step": 995 }, { "epoch": 0.7538315988647114, "grad_norm": 2.763335704803467, "learning_rate": 1.7252037895126622e-05, "loss": 1.2342, "step": 996 }, { "epoch": 0.7545884578997162, "grad_norm": 2.5191261768341064, "learning_rate": 1.7246507910234162e-05, "loss": 1.2188, "step": 997 }, { "epoch": 0.755345316934721, "grad_norm": 2.5067646503448486, "learning_rate": 1.72409732549978e-05, "loss": 1.1707, "step": 998 }, { "epoch": 0.7561021759697256, "grad_norm": 2.400637626647949, "learning_rate": 1.7235433932985092e-05, "loss": 1.1599, "step": 999 }, { "epoch": 0.7568590350047304, "grad_norm": 2.410027027130127, "learning_rate": 1.7229889947766597e-05, "loss": 1.2442, "step": 1000 }, { "epoch": 0.7576158940397351, "grad_norm": 2.3706107139587402, "learning_rate": 1.7224341302915885e-05, "loss": 1.2264, "step": 1001 }, { "epoch": 0.7583727530747398, "grad_norm": 2.4948031902313232, "learning_rate": 1.7218788002009527e-05, "loss": 1.2505, "step": 1002 }, { "epoch": 0.7591296121097446, "grad_norm": 2.4337100982666016, "learning_rate": 1.7213230048627093e-05, "loss": 1.2416, "step": 1003 }, { "epoch": 0.7598864711447493, "grad_norm": 2.2913546562194824, "learning_rate": 1.7207667446351165e-05, "loss": 1.22, "step": 1004 }, { "epoch": 0.760643330179754, "grad_norm": 2.4365074634552, "learning_rate": 1.72021001987673e-05, "loss": 1.2398, "step": 1005 }, { "epoch": 0.7614001892147587, "grad_norm": 2.4662392139434814, "learning_rate": 1.7196528309464067e-05, "loss": 1.2372, "step": 1006 }, { "epoch": 0.7621570482497635, "grad_norm": 2.5085933208465576, "learning_rate": 1.719095178203302e-05, "loss": 1.2408, "step": 1007 }, { "epoch": 0.7629139072847683, "grad_norm": 2.447695016860962, "learning_rate": 1.7185370620068705e-05, "loss": 1.2062, "step": 1008 }, { "epoch": 0.7636707663197729, "grad_norm": 2.8076727390289307, "learning_rate": 1.717978482716865e-05, "loss": 1.2051, "step": 1009 }, { "epoch": 0.7644276253547777, "grad_norm": 2.6247246265411377, "learning_rate": 1.7174194406933377e-05, "loss": 1.1861, "step": 1010 }, { "epoch": 0.7651844843897824, "grad_norm": 2.6273937225341797, "learning_rate": 1.7168599362966382e-05, "loss": 1.1919, "step": 1011 }, { "epoch": 0.7659413434247871, "grad_norm": 2.363234281539917, "learning_rate": 1.7162999698874144e-05, "loss": 1.203, "step": 1012 }, { "epoch": 0.7666982024597918, "grad_norm": 2.4418020248413086, "learning_rate": 1.7157395418266125e-05, "loss": 1.2146, "step": 1013 }, { "epoch": 0.7674550614947966, "grad_norm": 2.4737863540649414, "learning_rate": 1.7151786524754755e-05, "loss": 1.2149, "step": 1014 }, { "epoch": 0.7682119205298014, "grad_norm": 2.2613844871520996, "learning_rate": 1.7146173021955444e-05, "loss": 1.2276, "step": 1015 }, { "epoch": 0.768968779564806, "grad_norm": 2.626579523086548, "learning_rate": 1.714055491348657e-05, "loss": 1.2384, "step": 1016 }, { "epoch": 0.7697256385998108, "grad_norm": 2.406792163848877, "learning_rate": 1.7134932202969482e-05, "loss": 1.2285, "step": 1017 }, { "epoch": 0.7704824976348155, "grad_norm": 2.456866979598999, "learning_rate": 1.7129304894028483e-05, "loss": 1.1853, "step": 1018 }, { "epoch": 0.7712393566698202, "grad_norm": 2.5044846534729004, "learning_rate": 1.7123672990290864e-05, "loss": 1.212, "step": 1019 }, { "epoch": 0.771996215704825, "grad_norm": 2.1986587047576904, "learning_rate": 1.7118036495386856e-05, "loss": 1.2106, "step": 1020 }, { "epoch": 0.7727530747398297, "grad_norm": 2.4531362056732178, "learning_rate": 1.7112395412949662e-05, "loss": 1.2466, "step": 1021 }, { "epoch": 0.7735099337748345, "grad_norm": 2.2251899242401123, "learning_rate": 1.7106749746615437e-05, "loss": 1.1857, "step": 1022 }, { "epoch": 0.7742667928098391, "grad_norm": 2.2850799560546875, "learning_rate": 1.7101099500023287e-05, "loss": 1.2499, "step": 1023 }, { "epoch": 0.7750236518448439, "grad_norm": 2.3555169105529785, "learning_rate": 1.709544467681528e-05, "loss": 1.2139, "step": 1024 }, { "epoch": 0.7757805108798487, "grad_norm": 2.54640531539917, "learning_rate": 1.7089785280636428e-05, "loss": 1.2121, "step": 1025 }, { "epoch": 0.7765373699148533, "grad_norm": 2.3403546810150146, "learning_rate": 1.708412131513469e-05, "loss": 1.2294, "step": 1026 }, { "epoch": 0.7772942289498581, "grad_norm": 2.450343370437622, "learning_rate": 1.707845278396097e-05, "loss": 1.2224, "step": 1027 }, { "epoch": 0.7780510879848628, "grad_norm": 2.4089951515197754, "learning_rate": 1.707277969076912e-05, "loss": 1.2295, "step": 1028 }, { "epoch": 0.7788079470198676, "grad_norm": 2.244898796081543, "learning_rate": 1.7067102039215928e-05, "loss": 1.2656, "step": 1029 }, { "epoch": 0.7795648060548723, "grad_norm": 2.2754669189453125, "learning_rate": 1.7061419832961122e-05, "loss": 1.2106, "step": 1030 }, { "epoch": 0.780321665089877, "grad_norm": 2.3827311992645264, "learning_rate": 1.7055733075667368e-05, "loss": 1.1916, "step": 1031 }, { "epoch": 0.7810785241248818, "grad_norm": 3.2731504440307617, "learning_rate": 1.7050041771000258e-05, "loss": 1.2265, "step": 1032 }, { "epoch": 0.7818353831598864, "grad_norm": 2.48207950592041, "learning_rate": 1.7044345922628326e-05, "loss": 1.2305, "step": 1033 }, { "epoch": 0.7825922421948912, "grad_norm": 2.3561174869537354, "learning_rate": 1.703864553422302e-05, "loss": 1.2191, "step": 1034 }, { "epoch": 0.783349101229896, "grad_norm": 2.4696364402770996, "learning_rate": 1.703294060945873e-05, "loss": 1.2354, "step": 1035 }, { "epoch": 0.7841059602649006, "grad_norm": 2.214374542236328, "learning_rate": 1.7027231152012765e-05, "loss": 1.1459, "step": 1036 }, { "epoch": 0.7848628192999054, "grad_norm": 2.8190994262695312, "learning_rate": 1.7021517165565352e-05, "loss": 1.2289, "step": 1037 }, { "epoch": 0.7856196783349101, "grad_norm": 2.3381307125091553, "learning_rate": 1.701579865379964e-05, "loss": 1.2142, "step": 1038 }, { "epoch": 0.7863765373699149, "grad_norm": 2.4270827770233154, "learning_rate": 1.7010075620401693e-05, "loss": 1.175, "step": 1039 }, { "epoch": 0.7871333964049196, "grad_norm": 2.5499768257141113, "learning_rate": 1.7004348069060487e-05, "loss": 1.1907, "step": 1040 }, { "epoch": 0.7878902554399243, "grad_norm": 2.8665435314178467, "learning_rate": 1.6998616003467923e-05, "loss": 1.232, "step": 1041 }, { "epoch": 0.7886471144749291, "grad_norm": 2.443026065826416, "learning_rate": 1.6992879427318798e-05, "loss": 1.206, "step": 1042 }, { "epoch": 0.7894039735099337, "grad_norm": 2.408712148666382, "learning_rate": 1.6987138344310822e-05, "loss": 1.1984, "step": 1043 }, { "epoch": 0.7901608325449385, "grad_norm": 2.5489931106567383, "learning_rate": 1.6981392758144616e-05, "loss": 1.27, "step": 1044 }, { "epoch": 0.7909176915799432, "grad_norm": 2.2722368240356445, "learning_rate": 1.6975642672523684e-05, "loss": 1.2572, "step": 1045 }, { "epoch": 0.791674550614948, "grad_norm": 2.921919822692871, "learning_rate": 1.6969888091154452e-05, "loss": 1.2433, "step": 1046 }, { "epoch": 0.7924314096499527, "grad_norm": 2.41582989692688, "learning_rate": 1.6964129017746236e-05, "loss": 1.2537, "step": 1047 }, { "epoch": 0.7931882686849574, "grad_norm": 2.557302474975586, "learning_rate": 1.695836545601125e-05, "loss": 1.2248, "step": 1048 }, { "epoch": 0.7939451277199622, "grad_norm": 2.4108498096466064, "learning_rate": 1.6952597409664587e-05, "loss": 1.2337, "step": 1049 }, { "epoch": 0.7947019867549668, "grad_norm": 2.3067305088043213, "learning_rate": 1.694682488242425e-05, "loss": 1.1974, "step": 1050 }, { "epoch": 0.7954588457899716, "grad_norm": 2.3873379230499268, "learning_rate": 1.6941047878011122e-05, "loss": 1.204, "step": 1051 }, { "epoch": 0.7962157048249764, "grad_norm": 2.3166935443878174, "learning_rate": 1.6935266400148963e-05, "loss": 1.1585, "step": 1052 }, { "epoch": 0.796972563859981, "grad_norm": 2.312579870223999, "learning_rate": 1.6929480452564438e-05, "loss": 1.2178, "step": 1053 }, { "epoch": 0.7977294228949858, "grad_norm": 2.3897957801818848, "learning_rate": 1.6923690038987075e-05, "loss": 1.1867, "step": 1054 }, { "epoch": 0.7984862819299905, "grad_norm": 2.5109200477600098, "learning_rate": 1.6917895163149282e-05, "loss": 1.2219, "step": 1055 }, { "epoch": 0.7992431409649953, "grad_norm": 2.4277844429016113, "learning_rate": 1.6912095828786353e-05, "loss": 1.2224, "step": 1056 }, { "epoch": 0.8, "grad_norm": 2.285210609436035, "learning_rate": 1.6906292039636452e-05, "loss": 1.233, "step": 1057 }, { "epoch": 0.8007568590350047, "grad_norm": 2.475517511367798, "learning_rate": 1.690048379944061e-05, "loss": 1.2606, "step": 1058 }, { "epoch": 0.8015137180700095, "grad_norm": 2.2740111351013184, "learning_rate": 1.6894671111942733e-05, "loss": 1.1516, "step": 1059 }, { "epoch": 0.8022705771050141, "grad_norm": 2.777266263961792, "learning_rate": 1.6888853980889583e-05, "loss": 1.2257, "step": 1060 }, { "epoch": 0.8030274361400189, "grad_norm": 2.4774162769317627, "learning_rate": 1.6883032410030796e-05, "loss": 1.2161, "step": 1061 }, { "epoch": 0.8037842951750237, "grad_norm": 2.4283878803253174, "learning_rate": 1.6877206403118875e-05, "loss": 1.2258, "step": 1062 }, { "epoch": 0.8045411542100284, "grad_norm": 2.2770519256591797, "learning_rate": 1.687137596390917e-05, "loss": 1.2209, "step": 1063 }, { "epoch": 0.8052980132450331, "grad_norm": 2.4034667015075684, "learning_rate": 1.6865541096159895e-05, "loss": 1.1773, "step": 1064 }, { "epoch": 0.8060548722800378, "grad_norm": 2.2633402347564697, "learning_rate": 1.6859701803632117e-05, "loss": 1.2552, "step": 1065 }, { "epoch": 0.8068117313150426, "grad_norm": 2.3959109783172607, "learning_rate": 1.6853858090089753e-05, "loss": 1.27, "step": 1066 }, { "epoch": 0.8075685903500474, "grad_norm": 2.4210898876190186, "learning_rate": 1.6848009959299575e-05, "loss": 1.2173, "step": 1067 }, { "epoch": 0.808325449385052, "grad_norm": 2.3308327198028564, "learning_rate": 1.6842157415031194e-05, "loss": 1.2738, "step": 1068 }, { "epoch": 0.8090823084200568, "grad_norm": 3.167160987854004, "learning_rate": 1.683630046105707e-05, "loss": 1.2349, "step": 1069 }, { "epoch": 0.8098391674550615, "grad_norm": 2.1552276611328125, "learning_rate": 1.6830439101152513e-05, "loss": 1.2436, "step": 1070 }, { "epoch": 0.8105960264900662, "grad_norm": 2.5437731742858887, "learning_rate": 1.682457333909566e-05, "loss": 1.2039, "step": 1071 }, { "epoch": 0.811352885525071, "grad_norm": 2.1334102153778076, "learning_rate": 1.6818703178667496e-05, "loss": 1.2173, "step": 1072 }, { "epoch": 0.8121097445600757, "grad_norm": 2.5743660926818848, "learning_rate": 1.6812828623651832e-05, "loss": 1.2132, "step": 1073 }, { "epoch": 0.8128666035950805, "grad_norm": 2.4903461933135986, "learning_rate": 1.6806949677835328e-05, "loss": 1.2428, "step": 1074 }, { "epoch": 0.8136234626300851, "grad_norm": 2.2703421115875244, "learning_rate": 1.6801066345007447e-05, "loss": 1.1828, "step": 1075 }, { "epoch": 0.8143803216650899, "grad_norm": 2.615246295928955, "learning_rate": 1.6795178628960508e-05, "loss": 1.2361, "step": 1076 }, { "epoch": 0.8151371807000946, "grad_norm": 2.6063549518585205, "learning_rate": 1.6789286533489635e-05, "loss": 1.2586, "step": 1077 }, { "epoch": 0.8158940397350993, "grad_norm": 2.725470542907715, "learning_rate": 1.6783390062392788e-05, "loss": 1.2166, "step": 1078 }, { "epoch": 0.8166508987701041, "grad_norm": 2.576597213745117, "learning_rate": 1.6777489219470743e-05, "loss": 1.231, "step": 1079 }, { "epoch": 0.8174077578051088, "grad_norm": 2.37703537940979, "learning_rate": 1.677158400852708e-05, "loss": 1.2663, "step": 1080 }, { "epoch": 0.8181646168401135, "grad_norm": 2.3021481037139893, "learning_rate": 1.6765674433368232e-05, "loss": 1.2091, "step": 1081 }, { "epoch": 0.8189214758751182, "grad_norm": 2.4437525272369385, "learning_rate": 1.67597604978034e-05, "loss": 1.2123, "step": 1082 }, { "epoch": 0.819678334910123, "grad_norm": 2.470407724380493, "learning_rate": 1.6753842205644628e-05, "loss": 1.1948, "step": 1083 }, { "epoch": 0.8204351939451278, "grad_norm": 2.5628767013549805, "learning_rate": 1.6747919560706752e-05, "loss": 1.2347, "step": 1084 }, { "epoch": 0.8211920529801324, "grad_norm": 2.5520646572113037, "learning_rate": 1.6741992566807416e-05, "loss": 1.2319, "step": 1085 }, { "epoch": 0.8219489120151372, "grad_norm": 2.4275975227355957, "learning_rate": 1.673606122776708e-05, "loss": 1.1666, "step": 1086 }, { "epoch": 0.8227057710501419, "grad_norm": 2.803802728652954, "learning_rate": 1.6730125547408984e-05, "loss": 1.1861, "step": 1087 }, { "epoch": 0.8234626300851466, "grad_norm": 3.4586920738220215, "learning_rate": 1.6724185529559185e-05, "loss": 1.2278, "step": 1088 }, { "epoch": 0.8242194891201514, "grad_norm": 2.3933305740356445, "learning_rate": 1.6718241178046526e-05, "loss": 1.2148, "step": 1089 }, { "epoch": 0.8249763481551561, "grad_norm": 2.64758038520813, "learning_rate": 1.671229249670264e-05, "loss": 1.2138, "step": 1090 }, { "epoch": 0.8257332071901609, "grad_norm": 2.389108896255493, "learning_rate": 1.6706339489361962e-05, "loss": 1.2295, "step": 1091 }, { "epoch": 0.8264900662251655, "grad_norm": 2.5130155086517334, "learning_rate": 1.6700382159861705e-05, "loss": 1.196, "step": 1092 }, { "epoch": 0.8272469252601703, "grad_norm": 2.287849187850952, "learning_rate": 1.6694420512041878e-05, "loss": 1.2286, "step": 1093 }, { "epoch": 0.8280037842951751, "grad_norm": 2.378422737121582, "learning_rate": 1.6688454549745263e-05, "loss": 1.2518, "step": 1094 }, { "epoch": 0.8287606433301797, "grad_norm": 2.3797566890716553, "learning_rate": 1.6682484276817433e-05, "loss": 1.2228, "step": 1095 }, { "epoch": 0.8295175023651845, "grad_norm": 2.276672124862671, "learning_rate": 1.667650969710673e-05, "loss": 1.2281, "step": 1096 }, { "epoch": 0.8302743614001892, "grad_norm": 2.550900459289551, "learning_rate": 1.6670530814464284e-05, "loss": 1.2279, "step": 1097 }, { "epoch": 0.831031220435194, "grad_norm": 2.5035128593444824, "learning_rate": 1.6664547632743987e-05, "loss": 1.1628, "step": 1098 }, { "epoch": 0.8317880794701987, "grad_norm": 2.664567708969116, "learning_rate": 1.6658560155802506e-05, "loss": 1.187, "step": 1099 }, { "epoch": 0.8325449385052034, "grad_norm": 2.5373306274414062, "learning_rate": 1.665256838749928e-05, "loss": 1.2422, "step": 1100 }, { "epoch": 0.8333017975402082, "grad_norm": 2.7911324501037598, "learning_rate": 1.664657233169651e-05, "loss": 1.1982, "step": 1101 }, { "epoch": 0.8340586565752128, "grad_norm": 2.663367509841919, "learning_rate": 1.664057199225916e-05, "loss": 1.2578, "step": 1102 }, { "epoch": 0.8348155156102176, "grad_norm": 2.486424684524536, "learning_rate": 1.663456737305496e-05, "loss": 1.2106, "step": 1103 }, { "epoch": 0.8355723746452224, "grad_norm": 2.503634214401245, "learning_rate": 1.66285584779544e-05, "loss": 1.2192, "step": 1104 }, { "epoch": 0.836329233680227, "grad_norm": 2.679033041000366, "learning_rate": 1.6622545310830712e-05, "loss": 1.204, "step": 1105 }, { "epoch": 0.8370860927152318, "grad_norm": 2.7814950942993164, "learning_rate": 1.66165278755599e-05, "loss": 1.2133, "step": 1106 }, { "epoch": 0.8378429517502365, "grad_norm": 2.5719947814941406, "learning_rate": 1.6610506176020707e-05, "loss": 1.2457, "step": 1107 }, { "epoch": 0.8385998107852413, "grad_norm": 3.662503957748413, "learning_rate": 1.660448021609463e-05, "loss": 1.2288, "step": 1108 }, { "epoch": 0.8393566698202459, "grad_norm": 2.62904691696167, "learning_rate": 1.659844999966591e-05, "loss": 1.2202, "step": 1109 }, { "epoch": 0.8401135288552507, "grad_norm": 2.6756417751312256, "learning_rate": 1.659241553062154e-05, "loss": 1.2438, "step": 1110 }, { "epoch": 0.8408703878902555, "grad_norm": 2.762983798980713, "learning_rate": 1.6586376812851233e-05, "loss": 1.1499, "step": 1111 }, { "epoch": 0.8416272469252601, "grad_norm": 2.4654974937438965, "learning_rate": 1.6580333850247462e-05, "loss": 1.2398, "step": 1112 }, { "epoch": 0.8423841059602649, "grad_norm": 2.5800747871398926, "learning_rate": 1.657428664670543e-05, "loss": 1.2564, "step": 1113 }, { "epoch": 0.8431409649952696, "grad_norm": 2.4179458618164062, "learning_rate": 1.6568235206123073e-05, "loss": 1.1874, "step": 1114 }, { "epoch": 0.8438978240302744, "grad_norm": 2.4252541065216064, "learning_rate": 1.6562179532401053e-05, "loss": 1.2568, "step": 1115 }, { "epoch": 0.8446546830652791, "grad_norm": 2.5447540283203125, "learning_rate": 1.6556119629442764e-05, "loss": 1.1884, "step": 1116 }, { "epoch": 0.8454115421002838, "grad_norm": 2.5056309700012207, "learning_rate": 1.655005550115433e-05, "loss": 1.2665, "step": 1117 }, { "epoch": 0.8461684011352886, "grad_norm": 2.7429358959198, "learning_rate": 1.65439871514446e-05, "loss": 1.1835, "step": 1118 }, { "epoch": 0.8469252601702932, "grad_norm": 2.6835551261901855, "learning_rate": 1.653791458422513e-05, "loss": 1.2781, "step": 1119 }, { "epoch": 0.847682119205298, "grad_norm": 2.8627474308013916, "learning_rate": 1.653183780341021e-05, "loss": 1.1931, "step": 1120 }, { "epoch": 0.8484389782403028, "grad_norm": 2.758310556411743, "learning_rate": 1.652575681291684e-05, "loss": 1.1944, "step": 1121 }, { "epoch": 0.8491958372753075, "grad_norm": 2.761715888977051, "learning_rate": 1.6519671616664734e-05, "loss": 1.2457, "step": 1122 }, { "epoch": 0.8499526963103122, "grad_norm": 2.8214142322540283, "learning_rate": 1.6513582218576315e-05, "loss": 1.2203, "step": 1123 }, { "epoch": 0.8507095553453169, "grad_norm": 2.6501047611236572, "learning_rate": 1.6507488622576712e-05, "loss": 1.2591, "step": 1124 }, { "epoch": 0.8514664143803217, "grad_norm": 2.4939935207366943, "learning_rate": 1.6501390832593777e-05, "loss": 1.2205, "step": 1125 }, { "epoch": 0.8522232734153264, "grad_norm": 2.5232126712799072, "learning_rate": 1.6495288852558036e-05, "loss": 1.2055, "step": 1126 }, { "epoch": 0.8529801324503311, "grad_norm": 2.805695056915283, "learning_rate": 1.6489182686402753e-05, "loss": 1.2069, "step": 1127 }, { "epoch": 0.8537369914853359, "grad_norm": 2.588597059249878, "learning_rate": 1.6483072338063844e-05, "loss": 1.1991, "step": 1128 }, { "epoch": 0.8544938505203405, "grad_norm": 2.632336378097534, "learning_rate": 1.6476957811479966e-05, "loss": 1.2556, "step": 1129 }, { "epoch": 0.8552507095553453, "grad_norm": 2.6084611415863037, "learning_rate": 1.6470839110592445e-05, "loss": 1.2257, "step": 1130 }, { "epoch": 0.8560075685903501, "grad_norm": 2.4824182987213135, "learning_rate": 1.6464716239345296e-05, "loss": 1.1898, "step": 1131 }, { "epoch": 0.8567644276253548, "grad_norm": 2.5742006301879883, "learning_rate": 1.6458589201685235e-05, "loss": 1.2024, "step": 1132 }, { "epoch": 0.8575212866603595, "grad_norm": 2.2470591068267822, "learning_rate": 1.6452458001561655e-05, "loss": 1.2154, "step": 1133 }, { "epoch": 0.8582781456953642, "grad_norm": 2.5146355628967285, "learning_rate": 1.6446322642926636e-05, "loss": 1.2202, "step": 1134 }, { "epoch": 0.859035004730369, "grad_norm": 2.580735683441162, "learning_rate": 1.644018312973493e-05, "loss": 1.1595, "step": 1135 }, { "epoch": 0.8597918637653738, "grad_norm": 2.558544635772705, "learning_rate": 1.6434039465943984e-05, "loss": 1.2048, "step": 1136 }, { "epoch": 0.8605487228003784, "grad_norm": 2.5136754512786865, "learning_rate": 1.64278916555139e-05, "loss": 1.2003, "step": 1137 }, { "epoch": 0.8613055818353832, "grad_norm": 2.7524209022521973, "learning_rate": 1.6421739702407468e-05, "loss": 1.1862, "step": 1138 }, { "epoch": 0.8620624408703879, "grad_norm": 2.519251585006714, "learning_rate": 1.6415583610590144e-05, "loss": 1.2471, "step": 1139 }, { "epoch": 0.8628192999053926, "grad_norm": 2.7237823009490967, "learning_rate": 1.6409423384030046e-05, "loss": 1.1878, "step": 1140 }, { "epoch": 0.8635761589403973, "grad_norm": 2.595668315887451, "learning_rate": 1.6403259026697967e-05, "loss": 1.2164, "step": 1141 }, { "epoch": 0.8643330179754021, "grad_norm": 2.6703858375549316, "learning_rate": 1.6397090542567356e-05, "loss": 1.1944, "step": 1142 }, { "epoch": 0.8650898770104068, "grad_norm": 2.558354139328003, "learning_rate": 1.639091793561432e-05, "loss": 1.2423, "step": 1143 }, { "epoch": 0.8658467360454115, "grad_norm": 2.445343494415283, "learning_rate": 1.6384741209817638e-05, "loss": 1.1989, "step": 1144 }, { "epoch": 0.8666035950804163, "grad_norm": 2.3659980297088623, "learning_rate": 1.6378560369158724e-05, "loss": 1.1969, "step": 1145 }, { "epoch": 0.867360454115421, "grad_norm": 2.6195647716522217, "learning_rate": 1.6372375417621654e-05, "loss": 1.2012, "step": 1146 }, { "epoch": 0.8681173131504257, "grad_norm": 2.528627395629883, "learning_rate": 1.6366186359193155e-05, "loss": 1.2365, "step": 1147 }, { "epoch": 0.8688741721854305, "grad_norm": 2.360337734222412, "learning_rate": 1.6359993197862604e-05, "loss": 1.2192, "step": 1148 }, { "epoch": 0.8696310312204352, "grad_norm": 2.3621413707733154, "learning_rate": 1.635379593762201e-05, "loss": 1.2015, "step": 1149 }, { "epoch": 0.8703878902554399, "grad_norm": 2.5273406505584717, "learning_rate": 1.6347594582466038e-05, "loss": 1.187, "step": 1150 }, { "epoch": 0.8711447492904446, "grad_norm": 2.8172874450683594, "learning_rate": 1.6341389136391985e-05, "loss": 1.2271, "step": 1151 }, { "epoch": 0.8719016083254494, "grad_norm": 2.3418102264404297, "learning_rate": 1.6335179603399788e-05, "loss": 1.2358, "step": 1152 }, { "epoch": 0.8726584673604542, "grad_norm": 2.415493965148926, "learning_rate": 1.632896598749202e-05, "loss": 1.2717, "step": 1153 }, { "epoch": 0.8734153263954588, "grad_norm": 2.3262200355529785, "learning_rate": 1.6322748292673875e-05, "loss": 1.2198, "step": 1154 }, { "epoch": 0.8741721854304636, "grad_norm": 2.8730580806732178, "learning_rate": 1.6316526522953195e-05, "loss": 1.183, "step": 1155 }, { "epoch": 0.8749290444654683, "grad_norm": 2.432713508605957, "learning_rate": 1.631030068234043e-05, "loss": 1.2093, "step": 1156 }, { "epoch": 0.875685903500473, "grad_norm": 2.6007068157196045, "learning_rate": 1.630407077484866e-05, "loss": 1.2506, "step": 1157 }, { "epoch": 0.8764427625354778, "grad_norm": 2.785717487335205, "learning_rate": 1.6297836804493598e-05, "loss": 1.2073, "step": 1158 }, { "epoch": 0.8771996215704825, "grad_norm": 2.498161792755127, "learning_rate": 1.629159877529356e-05, "loss": 1.2297, "step": 1159 }, { "epoch": 0.8779564806054873, "grad_norm": 2.6516387462615967, "learning_rate": 1.628535669126948e-05, "loss": 1.2242, "step": 1160 }, { "epoch": 0.8787133396404919, "grad_norm": 2.164231300354004, "learning_rate": 1.627911055644492e-05, "loss": 1.242, "step": 1161 }, { "epoch": 0.8794701986754967, "grad_norm": 2.1503818035125732, "learning_rate": 1.6272860374846037e-05, "loss": 1.2187, "step": 1162 }, { "epoch": 0.8802270577105015, "grad_norm": 1.9819633960723877, "learning_rate": 1.6266606150501608e-05, "loss": 1.2044, "step": 1163 }, { "epoch": 0.8809839167455061, "grad_norm": 2.251472234725952, "learning_rate": 1.6260347887443e-05, "loss": 1.2262, "step": 1164 }, { "epoch": 0.8817407757805109, "grad_norm": 2.2765519618988037, "learning_rate": 1.625408558970421e-05, "loss": 1.185, "step": 1165 }, { "epoch": 0.8824976348155156, "grad_norm": 2.4048166275024414, "learning_rate": 1.6247819261321803e-05, "loss": 1.1973, "step": 1166 }, { "epoch": 0.8832544938505204, "grad_norm": 2.234778881072998, "learning_rate": 1.624154890633497e-05, "loss": 1.1795, "step": 1167 }, { "epoch": 0.8840113528855251, "grad_norm": 2.2173893451690674, "learning_rate": 1.623527452878548e-05, "loss": 1.1897, "step": 1168 }, { "epoch": 0.8847682119205298, "grad_norm": 2.3917415142059326, "learning_rate": 1.6228996132717702e-05, "loss": 1.2425, "step": 1169 }, { "epoch": 0.8855250709555346, "grad_norm": 2.254404306411743, "learning_rate": 1.62227137221786e-05, "loss": 1.2495, "step": 1170 }, { "epoch": 0.8862819299905392, "grad_norm": 2.1990530490875244, "learning_rate": 1.6216427301217713e-05, "loss": 1.1757, "step": 1171 }, { "epoch": 0.887038789025544, "grad_norm": 2.3781630992889404, "learning_rate": 1.6210136873887176e-05, "loss": 1.2387, "step": 1172 }, { "epoch": 0.8877956480605487, "grad_norm": 2.1774098873138428, "learning_rate": 1.6203842444241703e-05, "loss": 1.1937, "step": 1173 }, { "epoch": 0.8885525070955534, "grad_norm": 2.3653695583343506, "learning_rate": 1.619754401633858e-05, "loss": 1.2115, "step": 1174 }, { "epoch": 0.8893093661305582, "grad_norm": 2.3032443523406982, "learning_rate": 1.619124159423769e-05, "loss": 1.1802, "step": 1175 }, { "epoch": 0.8900662251655629, "grad_norm": 2.2687666416168213, "learning_rate": 1.618493518200147e-05, "loss": 1.1868, "step": 1176 }, { "epoch": 0.8908230842005677, "grad_norm": 2.3060355186462402, "learning_rate": 1.6178624783694937e-05, "loss": 1.1933, "step": 1177 }, { "epoch": 0.8915799432355723, "grad_norm": 3.4064903259277344, "learning_rate": 1.6172310403385677e-05, "loss": 1.234, "step": 1178 }, { "epoch": 0.8923368022705771, "grad_norm": 2.568434715270996, "learning_rate": 1.616599204514385e-05, "loss": 1.2115, "step": 1179 }, { "epoch": 0.8930936613055819, "grad_norm": 2.2627182006835938, "learning_rate": 1.6159669713042166e-05, "loss": 1.2229, "step": 1180 }, { "epoch": 0.8938505203405865, "grad_norm": 2.2551677227020264, "learning_rate": 1.615334341115591e-05, "loss": 1.2481, "step": 1181 }, { "epoch": 0.8946073793755913, "grad_norm": 2.4630017280578613, "learning_rate": 1.6147013143562915e-05, "loss": 1.1769, "step": 1182 }, { "epoch": 0.895364238410596, "grad_norm": 2.2972466945648193, "learning_rate": 1.6140678914343575e-05, "loss": 1.2028, "step": 1183 }, { "epoch": 0.8961210974456008, "grad_norm": 2.343468189239502, "learning_rate": 1.6134340727580843e-05, "loss": 1.2356, "step": 1184 }, { "epoch": 0.8968779564806055, "grad_norm": 2.2180895805358887, "learning_rate": 1.6127998587360208e-05, "loss": 1.2082, "step": 1185 }, { "epoch": 0.8976348155156102, "grad_norm": 2.121718406677246, "learning_rate": 1.6121652497769727e-05, "loss": 1.2052, "step": 1186 }, { "epoch": 0.898391674550615, "grad_norm": 2.2796201705932617, "learning_rate": 1.6115302462899982e-05, "loss": 1.2056, "step": 1187 }, { "epoch": 0.8991485335856196, "grad_norm": 2.1909053325653076, "learning_rate": 1.6108948486844118e-05, "loss": 1.1556, "step": 1188 }, { "epoch": 0.8999053926206244, "grad_norm": 2.4068331718444824, "learning_rate": 1.610259057369781e-05, "loss": 1.2258, "step": 1189 }, { "epoch": 0.9006622516556292, "grad_norm": 2.168159246444702, "learning_rate": 1.6096228727559265e-05, "loss": 1.1805, "step": 1190 }, { "epoch": 0.9014191106906339, "grad_norm": 2.129645586013794, "learning_rate": 1.608986295252924e-05, "loss": 1.1725, "step": 1191 }, { "epoch": 0.9021759697256386, "grad_norm": 2.3025479316711426, "learning_rate": 1.608349325271101e-05, "loss": 1.1873, "step": 1192 }, { "epoch": 0.9029328287606433, "grad_norm": 2.3402655124664307, "learning_rate": 1.607711963221039e-05, "loss": 1.2049, "step": 1193 }, { "epoch": 0.9036896877956481, "grad_norm": 2.844715118408203, "learning_rate": 1.6070742095135722e-05, "loss": 1.1654, "step": 1194 }, { "epoch": 0.9044465468306528, "grad_norm": 2.337291717529297, "learning_rate": 1.6064360645597862e-05, "loss": 1.2119, "step": 1195 }, { "epoch": 0.9052034058656575, "grad_norm": 2.2666563987731934, "learning_rate": 1.60579752877102e-05, "loss": 1.1662, "step": 1196 }, { "epoch": 0.9059602649006623, "grad_norm": 2.2546000480651855, "learning_rate": 1.6051586025588634e-05, "loss": 1.1612, "step": 1197 }, { "epoch": 0.906717123935667, "grad_norm": 2.4789252281188965, "learning_rate": 1.6045192863351594e-05, "loss": 1.159, "step": 1198 }, { "epoch": 0.9074739829706717, "grad_norm": 2.5757458209991455, "learning_rate": 1.6038795805120005e-05, "loss": 1.1359, "step": 1199 }, { "epoch": 0.9082308420056765, "grad_norm": 2.5025620460510254, "learning_rate": 1.603239485501732e-05, "loss": 1.2513, "step": 1200 }, { "epoch": 0.9089877010406812, "grad_norm": 2.6904783248901367, "learning_rate": 1.6025990017169495e-05, "loss": 1.2275, "step": 1201 }, { "epoch": 0.9097445600756859, "grad_norm": 2.121021270751953, "learning_rate": 1.6019581295704985e-05, "loss": 1.2138, "step": 1202 }, { "epoch": 0.9105014191106906, "grad_norm": 2.2942512035369873, "learning_rate": 1.601316869475476e-05, "loss": 1.207, "step": 1203 }, { "epoch": 0.9112582781456954, "grad_norm": 2.4668707847595215, "learning_rate": 1.6006752218452283e-05, "loss": 1.2422, "step": 1204 }, { "epoch": 0.9120151371807, "grad_norm": 2.536863088607788, "learning_rate": 1.600033187093351e-05, "loss": 1.2256, "step": 1205 }, { "epoch": 0.9127719962157048, "grad_norm": 3.008856773376465, "learning_rate": 1.599390765633691e-05, "loss": 1.2129, "step": 1206 }, { "epoch": 0.9135288552507096, "grad_norm": 2.292177438735962, "learning_rate": 1.5987479578803425e-05, "loss": 1.2237, "step": 1207 }, { "epoch": 0.9142857142857143, "grad_norm": 2.4802656173706055, "learning_rate": 1.59810476424765e-05, "loss": 1.2307, "step": 1208 }, { "epoch": 0.915042573320719, "grad_norm": 2.225219964981079, "learning_rate": 1.5974611851502064e-05, "loss": 1.1845, "step": 1209 }, { "epoch": 0.9157994323557237, "grad_norm": 2.584470272064209, "learning_rate": 1.5968172210028525e-05, "loss": 1.1756, "step": 1210 }, { "epoch": 0.9165562913907285, "grad_norm": 2.3518307209014893, "learning_rate": 1.596172872220679e-05, "loss": 1.1788, "step": 1211 }, { "epoch": 0.9173131504257332, "grad_norm": 2.3497278690338135, "learning_rate": 1.595528139219021e-05, "loss": 1.2084, "step": 1212 }, { "epoch": 0.9180700094607379, "grad_norm": 2.117664337158203, "learning_rate": 1.594883022413466e-05, "loss": 1.1765, "step": 1213 }, { "epoch": 0.9188268684957427, "grad_norm": 2.1322619915008545, "learning_rate": 1.594237522219845e-05, "loss": 1.1835, "step": 1214 }, { "epoch": 0.9195837275307474, "grad_norm": 2.0177836418151855, "learning_rate": 1.5935916390542377e-05, "loss": 1.178, "step": 1215 }, { "epoch": 0.9203405865657521, "grad_norm": 2.2327425479888916, "learning_rate": 1.5929453733329713e-05, "loss": 1.1916, "step": 1216 }, { "epoch": 0.9210974456007569, "grad_norm": 2.168905258178711, "learning_rate": 1.592298725472618e-05, "loss": 1.2139, "step": 1217 }, { "epoch": 0.9218543046357616, "grad_norm": 2.275158166885376, "learning_rate": 1.591651695889998e-05, "loss": 1.2014, "step": 1218 }, { "epoch": 0.9226111636707663, "grad_norm": 2.153704881668091, "learning_rate": 1.5910042850021754e-05, "loss": 1.2219, "step": 1219 }, { "epoch": 0.923368022705771, "grad_norm": 2.161616802215576, "learning_rate": 1.5903564932264624e-05, "loss": 1.2452, "step": 1220 }, { "epoch": 0.9241248817407758, "grad_norm": 2.1606664657592773, "learning_rate": 1.589708320980416e-05, "loss": 1.1448, "step": 1221 }, { "epoch": 0.9248817407757806, "grad_norm": 2.040039300918579, "learning_rate": 1.589059768681837e-05, "loss": 1.235, "step": 1222 }, { "epoch": 0.9256385998107852, "grad_norm": 2.2927193641662598, "learning_rate": 1.5884108367487732e-05, "loss": 1.19, "step": 1223 }, { "epoch": 0.92639545884579, "grad_norm": 2.2096221446990967, "learning_rate": 1.587761525599516e-05, "loss": 1.2349, "step": 1224 }, { "epoch": 0.9271523178807947, "grad_norm": 2.1982614994049072, "learning_rate": 1.5871118356526017e-05, "loss": 1.206, "step": 1225 }, { "epoch": 0.9279091769157994, "grad_norm": 2.2477710247039795, "learning_rate": 1.5864617673268096e-05, "loss": 1.2044, "step": 1226 }, { "epoch": 0.9286660359508042, "grad_norm": 2.126891851425171, "learning_rate": 1.5858113210411646e-05, "loss": 1.1685, "step": 1227 }, { "epoch": 0.9294228949858089, "grad_norm": 2.6382102966308594, "learning_rate": 1.585160497214935e-05, "loss": 1.2247, "step": 1228 }, { "epoch": 0.9301797540208137, "grad_norm": 2.1951191425323486, "learning_rate": 1.5845092962676306e-05, "loss": 1.1517, "step": 1229 }, { "epoch": 0.9309366130558183, "grad_norm": 2.299997091293335, "learning_rate": 1.5838577186190064e-05, "loss": 1.2327, "step": 1230 }, { "epoch": 0.9316934720908231, "grad_norm": 2.69441556930542, "learning_rate": 1.5832057646890594e-05, "loss": 1.1622, "step": 1231 }, { "epoch": 0.9324503311258279, "grad_norm": 2.3268439769744873, "learning_rate": 1.582553434898029e-05, "loss": 1.2181, "step": 1232 }, { "epoch": 0.9332071901608325, "grad_norm": 2.1350252628326416, "learning_rate": 1.5819007296663974e-05, "loss": 1.1818, "step": 1233 }, { "epoch": 0.9339640491958373, "grad_norm": 2.0754928588867188, "learning_rate": 1.5812476494148876e-05, "loss": 1.1847, "step": 1234 }, { "epoch": 0.934720908230842, "grad_norm": 2.48238205909729, "learning_rate": 1.5805941945644658e-05, "loss": 1.222, "step": 1235 }, { "epoch": 0.9354777672658467, "grad_norm": 2.202993154525757, "learning_rate": 1.579940365536339e-05, "loss": 1.2381, "step": 1236 }, { "epoch": 0.9362346263008514, "grad_norm": 2.424055337905884, "learning_rate": 1.5792861627519554e-05, "loss": 1.2035, "step": 1237 }, { "epoch": 0.9369914853358562, "grad_norm": 2.270042896270752, "learning_rate": 1.578631586633004e-05, "loss": 1.2268, "step": 1238 }, { "epoch": 0.937748344370861, "grad_norm": 2.38864803314209, "learning_rate": 1.5779766376014146e-05, "loss": 1.2202, "step": 1239 }, { "epoch": 0.9385052034058656, "grad_norm": 2.137854814529419, "learning_rate": 1.5773213160793574e-05, "loss": 1.2246, "step": 1240 }, { "epoch": 0.9392620624408704, "grad_norm": 2.5035834312438965, "learning_rate": 1.5766656224892424e-05, "loss": 1.2246, "step": 1241 }, { "epoch": 0.9400189214758751, "grad_norm": 2.498552083969116, "learning_rate": 1.5760095572537207e-05, "loss": 1.2037, "step": 1242 }, { "epoch": 0.9407757805108798, "grad_norm": 2.0278542041778564, "learning_rate": 1.5753531207956806e-05, "loss": 1.2197, "step": 1243 }, { "epoch": 0.9415326395458846, "grad_norm": 2.1201868057250977, "learning_rate": 1.5746963135382522e-05, "loss": 1.1557, "step": 1244 }, { "epoch": 0.9422894985808893, "grad_norm": 2.480867385864258, "learning_rate": 1.574039135904802e-05, "loss": 1.2006, "step": 1245 }, { "epoch": 0.9430463576158941, "grad_norm": 2.257807970046997, "learning_rate": 1.573381588318938e-05, "loss": 1.2235, "step": 1246 }, { "epoch": 0.9438032166508987, "grad_norm": 2.2047722339630127, "learning_rate": 1.5727236712045053e-05, "loss": 1.1904, "step": 1247 }, { "epoch": 0.9445600756859035, "grad_norm": 2.2862167358398438, "learning_rate": 1.5720653849855862e-05, "loss": 1.2388, "step": 1248 }, { "epoch": 0.9453169347209083, "grad_norm": 2.2071452140808105, "learning_rate": 1.571406730086503e-05, "loss": 1.1967, "step": 1249 }, { "epoch": 0.9460737937559129, "grad_norm": 2.1805355548858643, "learning_rate": 1.5707477069318143e-05, "loss": 1.1927, "step": 1250 }, { "epoch": 0.9468306527909177, "grad_norm": 2.156611204147339, "learning_rate": 1.5700883159463162e-05, "loss": 1.2216, "step": 1251 }, { "epoch": 0.9475875118259224, "grad_norm": 2.2290961742401123, "learning_rate": 1.5694285575550416e-05, "loss": 1.2116, "step": 1252 }, { "epoch": 0.9483443708609272, "grad_norm": 2.0691416263580322, "learning_rate": 1.568768432183262e-05, "loss": 1.2077, "step": 1253 }, { "epoch": 0.9491012298959319, "grad_norm": 2.2860946655273438, "learning_rate": 1.568107940256483e-05, "loss": 1.2392, "step": 1254 }, { "epoch": 0.9498580889309366, "grad_norm": 2.3357367515563965, "learning_rate": 1.567447082200448e-05, "loss": 1.2469, "step": 1255 }, { "epoch": 0.9506149479659414, "grad_norm": 2.224269151687622, "learning_rate": 1.566785858441136e-05, "loss": 1.2553, "step": 1256 }, { "epoch": 0.951371807000946, "grad_norm": 2.274747133255005, "learning_rate": 1.566124269404762e-05, "loss": 1.1486, "step": 1257 }, { "epoch": 0.9521286660359508, "grad_norm": 2.205291271209717, "learning_rate": 1.5654623155177758e-05, "loss": 1.228, "step": 1258 }, { "epoch": 0.9528855250709556, "grad_norm": 2.059138774871826, "learning_rate": 1.564799997206863e-05, "loss": 1.152, "step": 1259 }, { "epoch": 0.9536423841059603, "grad_norm": 2.382854700088501, "learning_rate": 1.564137314898944e-05, "loss": 1.2262, "step": 1260 }, { "epoch": 0.954399243140965, "grad_norm": 2.1926519870758057, "learning_rate": 1.563474269021174e-05, "loss": 1.1439, "step": 1261 }, { "epoch": 0.9551561021759697, "grad_norm": 2.247835159301758, "learning_rate": 1.5628108600009414e-05, "loss": 1.2191, "step": 1262 }, { "epoch": 0.9559129612109745, "grad_norm": 2.6202445030212402, "learning_rate": 1.5621470882658696e-05, "loss": 1.1955, "step": 1263 }, { "epoch": 0.9566698202459792, "grad_norm": 2.1109254360198975, "learning_rate": 1.5614829542438162e-05, "loss": 1.2208, "step": 1264 }, { "epoch": 0.9574266792809839, "grad_norm": 2.0421035289764404, "learning_rate": 1.5608184583628723e-05, "loss": 1.2597, "step": 1265 }, { "epoch": 0.9581835383159887, "grad_norm": 2.3527796268463135, "learning_rate": 1.5601536010513608e-05, "loss": 1.2437, "step": 1266 }, { "epoch": 0.9589403973509933, "grad_norm": 2.39426851272583, "learning_rate": 1.559488382737839e-05, "loss": 1.2418, "step": 1267 }, { "epoch": 0.9596972563859981, "grad_norm": 2.546283483505249, "learning_rate": 1.558822803851097e-05, "loss": 1.2295, "step": 1268 }, { "epoch": 0.9604541154210028, "grad_norm": 2.275153160095215, "learning_rate": 1.558156864820156e-05, "loss": 1.1896, "step": 1269 }, { "epoch": 0.9612109744560076, "grad_norm": 2.1879630088806152, "learning_rate": 1.5574905660742707e-05, "loss": 1.1766, "step": 1270 }, { "epoch": 0.9619678334910123, "grad_norm": 2.3438518047332764, "learning_rate": 1.556823908042927e-05, "loss": 1.1828, "step": 1271 }, { "epoch": 0.962724692526017, "grad_norm": 2.673069477081299, "learning_rate": 1.5561568911558422e-05, "loss": 1.1909, "step": 1272 }, { "epoch": 0.9634815515610218, "grad_norm": 2.3552541732788086, "learning_rate": 1.5554895158429654e-05, "loss": 1.2246, "step": 1273 }, { "epoch": 0.9642384105960264, "grad_norm": 2.1169943809509277, "learning_rate": 1.5548217825344765e-05, "loss": 1.2018, "step": 1274 }, { "epoch": 0.9649952696310312, "grad_norm": 2.1841084957122803, "learning_rate": 1.5541536916607863e-05, "loss": 1.1496, "step": 1275 }, { "epoch": 0.965752128666036, "grad_norm": 2.1429550647735596, "learning_rate": 1.553485243652536e-05, "loss": 1.1537, "step": 1276 }, { "epoch": 0.9665089877010407, "grad_norm": 2.6029670238494873, "learning_rate": 1.5528164389405972e-05, "loss": 1.2348, "step": 1277 }, { "epoch": 0.9672658467360454, "grad_norm": 2.105222463607788, "learning_rate": 1.5521472779560705e-05, "loss": 1.217, "step": 1278 }, { "epoch": 0.9680227057710501, "grad_norm": 2.1541764736175537, "learning_rate": 1.5514777611302875e-05, "loss": 1.1844, "step": 1279 }, { "epoch": 0.9687795648060549, "grad_norm": 2.0249156951904297, "learning_rate": 1.5508078888948086e-05, "loss": 1.2191, "step": 1280 }, { "epoch": 0.9695364238410596, "grad_norm": 2.187110185623169, "learning_rate": 1.550137661681423e-05, "loss": 1.1811, "step": 1281 }, { "epoch": 0.9702932828760643, "grad_norm": 2.27626371383667, "learning_rate": 1.5494670799221485e-05, "loss": 1.186, "step": 1282 }, { "epoch": 0.9710501419110691, "grad_norm": 2.0355005264282227, "learning_rate": 1.5487961440492327e-05, "loss": 1.2338, "step": 1283 }, { "epoch": 0.9718070009460738, "grad_norm": 2.126351833343506, "learning_rate": 1.54812485449515e-05, "loss": 1.2129, "step": 1284 }, { "epoch": 0.9725638599810785, "grad_norm": 2.150451421737671, "learning_rate": 1.5474532116926037e-05, "loss": 1.1812, "step": 1285 }, { "epoch": 0.9733207190160833, "grad_norm": 2.0796091556549072, "learning_rate": 1.5467812160745245e-05, "loss": 1.2273, "step": 1286 }, { "epoch": 0.974077578051088, "grad_norm": 2.349214792251587, "learning_rate": 1.5461088680740702e-05, "loss": 1.2286, "step": 1287 }, { "epoch": 0.9748344370860927, "grad_norm": 2.1848902702331543, "learning_rate": 1.545436168124627e-05, "loss": 1.2239, "step": 1288 }, { "epoch": 0.9755912961210974, "grad_norm": 2.261702299118042, "learning_rate": 1.544763116659806e-05, "loss": 1.202, "step": 1289 }, { "epoch": 0.9763481551561022, "grad_norm": 2.2427971363067627, "learning_rate": 1.5440897141134464e-05, "loss": 1.2133, "step": 1290 }, { "epoch": 0.977105014191107, "grad_norm": 2.076875686645508, "learning_rate": 1.5434159609196128e-05, "loss": 1.2056, "step": 1291 }, { "epoch": 0.9778618732261116, "grad_norm": 2.26599383354187, "learning_rate": 1.542741857512597e-05, "loss": 1.195, "step": 1292 }, { "epoch": 0.9786187322611164, "grad_norm": 2.262747049331665, "learning_rate": 1.5420674043269152e-05, "loss": 1.2286, "step": 1293 }, { "epoch": 0.9793755912961211, "grad_norm": 2.1384646892547607, "learning_rate": 1.5413926017973097e-05, "loss": 1.1843, "step": 1294 }, { "epoch": 0.9801324503311258, "grad_norm": 2.3019633293151855, "learning_rate": 1.540717450358748e-05, "loss": 1.2474, "step": 1295 }, { "epoch": 0.9808893093661306, "grad_norm": 2.6212801933288574, "learning_rate": 1.5400419504464222e-05, "loss": 1.2305, "step": 1296 }, { "epoch": 0.9816461684011353, "grad_norm": 2.415092706680298, "learning_rate": 1.5393661024957495e-05, "loss": 1.2394, "step": 1297 }, { "epoch": 0.98240302743614, "grad_norm": 2.392845392227173, "learning_rate": 1.5386899069423712e-05, "loss": 1.1971, "step": 1298 }, { "epoch": 0.9831598864711447, "grad_norm": 2.291163206100464, "learning_rate": 1.5380133642221525e-05, "loss": 1.2004, "step": 1299 }, { "epoch": 0.9839167455061495, "grad_norm": 2.191312313079834, "learning_rate": 1.5373364747711825e-05, "loss": 1.1677, "step": 1300 }, { "epoch": 0.9846736045411542, "grad_norm": 2.2666783332824707, "learning_rate": 1.536659239025774e-05, "loss": 1.1656, "step": 1301 }, { "epoch": 0.9854304635761589, "grad_norm": 2.578672409057617, "learning_rate": 1.5359816574224626e-05, "loss": 1.2021, "step": 1302 }, { "epoch": 0.9861873226111637, "grad_norm": 2.1345741748809814, "learning_rate": 1.5353037303980075e-05, "loss": 1.2277, "step": 1303 }, { "epoch": 0.9869441816461684, "grad_norm": 2.1685898303985596, "learning_rate": 1.5346254583893895e-05, "loss": 1.206, "step": 1304 }, { "epoch": 0.9877010406811731, "grad_norm": 2.3150031566619873, "learning_rate": 1.533946841833813e-05, "loss": 1.1747, "step": 1305 }, { "epoch": 0.9884578997161778, "grad_norm": 2.3677496910095215, "learning_rate": 1.5332678811687034e-05, "loss": 1.2502, "step": 1306 }, { "epoch": 0.9892147587511826, "grad_norm": 2.0479371547698975, "learning_rate": 1.5325885768317085e-05, "loss": 1.129, "step": 1307 }, { "epoch": 0.9899716177861874, "grad_norm": 2.272096633911133, "learning_rate": 1.531908929260698e-05, "loss": 1.1515, "step": 1308 }, { "epoch": 0.990728476821192, "grad_norm": 2.233167886734009, "learning_rate": 1.5312289388937613e-05, "loss": 1.1576, "step": 1309 }, { "epoch": 0.9914853358561968, "grad_norm": 2.329028606414795, "learning_rate": 1.530548606169211e-05, "loss": 1.2331, "step": 1310 }, { "epoch": 0.9922421948912015, "grad_norm": 2.3802735805511475, "learning_rate": 1.5298679315255786e-05, "loss": 1.1576, "step": 1311 }, { "epoch": 0.9929990539262062, "grad_norm": 2.45041561126709, "learning_rate": 1.5291869154016167e-05, "loss": 1.2241, "step": 1312 }, { "epoch": 0.993755912961211, "grad_norm": 2.528601884841919, "learning_rate": 1.5285055582362975e-05, "loss": 1.2257, "step": 1313 }, { "epoch": 0.9945127719962157, "grad_norm": 2.2096829414367676, "learning_rate": 1.5278238604688143e-05, "loss": 1.1959, "step": 1314 }, { "epoch": 0.9952696310312205, "grad_norm": 2.218921184539795, "learning_rate": 1.5271418225385784e-05, "loss": 1.1444, "step": 1315 }, { "epoch": 0.9960264900662251, "grad_norm": 2.563999891281128, "learning_rate": 1.526459444885221e-05, "loss": 1.192, "step": 1316 }, { "epoch": 0.9967833491012299, "grad_norm": 2.6427245140075684, "learning_rate": 1.5257767279485934e-05, "loss": 1.1575, "step": 1317 }, { "epoch": 0.9975402081362347, "grad_norm": 2.0441269874572754, "learning_rate": 1.5250936721687628e-05, "loss": 1.1636, "step": 1318 }, { "epoch": 0.9982970671712393, "grad_norm": 2.560488700866699, "learning_rate": 1.5244102779860178e-05, "loss": 1.2198, "step": 1319 }, { "epoch": 0.9990539262062441, "grad_norm": 2.5017917156219482, "learning_rate": 1.5237265458408637e-05, "loss": 1.2215, "step": 1320 }, { "epoch": 0.9998107852412488, "grad_norm": 2.3458917140960693, "learning_rate": 1.5230424761740234e-05, "loss": 1.1645, "step": 1321 }, { "epoch": 1.0005676442762534, "grad_norm": 2.2212741374969482, "learning_rate": 1.5223580694264382e-05, "loss": 1.2054, "step": 1322 }, { "epoch": 1.0013245033112583, "grad_norm": 2.394789695739746, "learning_rate": 1.5216733260392658e-05, "loss": 1.1085, "step": 1323 }, { "epoch": 1.002081362346263, "grad_norm": 2.135875701904297, "learning_rate": 1.5209882464538817e-05, "loss": 1.1754, "step": 1324 }, { "epoch": 1.0028382213812677, "grad_norm": 2.4602649211883545, "learning_rate": 1.5203028311118778e-05, "loss": 1.1202, "step": 1325 }, { "epoch": 1.0035950804162725, "grad_norm": 2.5135326385498047, "learning_rate": 1.5196170804550618e-05, "loss": 1.147, "step": 1326 }, { "epoch": 1.0043519394512772, "grad_norm": 2.3376166820526123, "learning_rate": 1.518930994925459e-05, "loss": 1.1715, "step": 1327 }, { "epoch": 1.0051087984862819, "grad_norm": 2.2913684844970703, "learning_rate": 1.518244574965309e-05, "loss": 1.2037, "step": 1328 }, { "epoch": 1.0058656575212868, "grad_norm": 2.34970760345459, "learning_rate": 1.5175578210170678e-05, "loss": 1.155, "step": 1329 }, { "epoch": 1.0066225165562914, "grad_norm": 2.345945119857788, "learning_rate": 1.5168707335234067e-05, "loss": 1.1392, "step": 1330 }, { "epoch": 1.007379375591296, "grad_norm": 2.2599105834960938, "learning_rate": 1.5161833129272117e-05, "loss": 1.1402, "step": 1331 }, { "epoch": 1.0081362346263008, "grad_norm": 2.5355637073516846, "learning_rate": 1.5154955596715836e-05, "loss": 1.2212, "step": 1332 }, { "epoch": 1.0088930936613056, "grad_norm": 2.4323315620422363, "learning_rate": 1.5148074741998377e-05, "loss": 1.1289, "step": 1333 }, { "epoch": 1.0096499526963103, "grad_norm": 2.232952833175659, "learning_rate": 1.5141190569555033e-05, "loss": 1.1044, "step": 1334 }, { "epoch": 1.010406811731315, "grad_norm": 2.1613996028900146, "learning_rate": 1.513430308382324e-05, "loss": 1.1678, "step": 1335 }, { "epoch": 1.0111636707663199, "grad_norm": 2.369002342224121, "learning_rate": 1.5127412289242562e-05, "loss": 1.2219, "step": 1336 }, { "epoch": 1.0119205298013245, "grad_norm": 2.2522876262664795, "learning_rate": 1.51205181902547e-05, "loss": 1.1728, "step": 1337 }, { "epoch": 1.0126773888363292, "grad_norm": 2.172529935836792, "learning_rate": 1.5113620791303489e-05, "loss": 1.1599, "step": 1338 }, { "epoch": 1.013434247871334, "grad_norm": 2.265456199645996, "learning_rate": 1.5106720096834885e-05, "loss": 1.1496, "step": 1339 }, { "epoch": 1.0141911069063387, "grad_norm": 2.3640429973602295, "learning_rate": 1.5099816111296968e-05, "loss": 1.2001, "step": 1340 }, { "epoch": 1.0149479659413434, "grad_norm": 2.227107286453247, "learning_rate": 1.5092908839139948e-05, "loss": 1.1911, "step": 1341 }, { "epoch": 1.015704824976348, "grad_norm": 2.3177998065948486, "learning_rate": 1.5085998284816144e-05, "loss": 1.1233, "step": 1342 }, { "epoch": 1.016461684011353, "grad_norm": 2.167343854904175, "learning_rate": 1.507908445277999e-05, "loss": 1.2057, "step": 1343 }, { "epoch": 1.0172185430463576, "grad_norm": 2.2151575088500977, "learning_rate": 1.5072167347488042e-05, "loss": 1.1828, "step": 1344 }, { "epoch": 1.0179754020813623, "grad_norm": 2.031900405883789, "learning_rate": 1.5065246973398959e-05, "loss": 1.1408, "step": 1345 }, { "epoch": 1.0187322611163672, "grad_norm": 2.3186428546905518, "learning_rate": 1.5058323334973508e-05, "loss": 1.1698, "step": 1346 }, { "epoch": 1.0194891201513718, "grad_norm": 2.2243926525115967, "learning_rate": 1.5051396436674562e-05, "loss": 1.1983, "step": 1347 }, { "epoch": 1.0202459791863765, "grad_norm": 2.0334129333496094, "learning_rate": 1.5044466282967092e-05, "loss": 1.1257, "step": 1348 }, { "epoch": 1.0210028382213812, "grad_norm": 2.194042921066284, "learning_rate": 1.503753287831817e-05, "loss": 1.224, "step": 1349 }, { "epoch": 1.021759697256386, "grad_norm": 2.2667534351348877, "learning_rate": 1.5030596227196963e-05, "loss": 1.2042, "step": 1350 }, { "epoch": 1.0225165562913907, "grad_norm": 2.2039318084716797, "learning_rate": 1.5023656334074732e-05, "loss": 1.1965, "step": 1351 }, { "epoch": 1.0232734153263954, "grad_norm": 2.3508946895599365, "learning_rate": 1.5016713203424824e-05, "loss": 1.1718, "step": 1352 }, { "epoch": 1.0240302743614003, "grad_norm": 2.135310649871826, "learning_rate": 1.5009766839722679e-05, "loss": 1.1503, "step": 1353 }, { "epoch": 1.024787133396405, "grad_norm": 2.2958900928497314, "learning_rate": 1.5002817247445813e-05, "loss": 1.2141, "step": 1354 }, { "epoch": 1.0255439924314096, "grad_norm": 2.3174233436584473, "learning_rate": 1.4995864431073828e-05, "loss": 1.158, "step": 1355 }, { "epoch": 1.0263008514664145, "grad_norm": 2.1523966789245605, "learning_rate": 1.4988908395088405e-05, "loss": 1.1757, "step": 1356 }, { "epoch": 1.0270577105014191, "grad_norm": 2.2384963035583496, "learning_rate": 1.4981949143973297e-05, "loss": 1.1391, "step": 1357 }, { "epoch": 1.0278145695364238, "grad_norm": 2.1168923377990723, "learning_rate": 1.4974986682214332e-05, "loss": 1.1306, "step": 1358 }, { "epoch": 1.0285714285714285, "grad_norm": 2.392561435699463, "learning_rate": 1.4968021014299409e-05, "loss": 1.2224, "step": 1359 }, { "epoch": 1.0293282876064334, "grad_norm": 2.2257487773895264, "learning_rate": 1.4961052144718486e-05, "loss": 1.1284, "step": 1360 }, { "epoch": 1.030085146641438, "grad_norm": 2.0494847297668457, "learning_rate": 1.4954080077963596e-05, "loss": 1.1204, "step": 1361 }, { "epoch": 1.0308420056764427, "grad_norm": 2.2043280601501465, "learning_rate": 1.4947104818528822e-05, "loss": 1.2135, "step": 1362 }, { "epoch": 1.0315988647114476, "grad_norm": 2.1744041442871094, "learning_rate": 1.494012637091031e-05, "loss": 1.1533, "step": 1363 }, { "epoch": 1.0323557237464522, "grad_norm": 2.1696369647979736, "learning_rate": 1.4933144739606262e-05, "loss": 1.171, "step": 1364 }, { "epoch": 1.033112582781457, "grad_norm": 2.259871006011963, "learning_rate": 1.4926159929116934e-05, "loss": 1.1689, "step": 1365 }, { "epoch": 1.0338694418164618, "grad_norm": 2.3883163928985596, "learning_rate": 1.4919171943944628e-05, "loss": 1.1808, "step": 1366 }, { "epoch": 1.0346263008514665, "grad_norm": 2.4137635231018066, "learning_rate": 1.4912180788593686e-05, "loss": 1.2425, "step": 1367 }, { "epoch": 1.0353831598864711, "grad_norm": 2.2681548595428467, "learning_rate": 1.4905186467570509e-05, "loss": 1.1808, "step": 1368 }, { "epoch": 1.0361400189214758, "grad_norm": 2.2818410396575928, "learning_rate": 1.4898188985383522e-05, "loss": 1.1897, "step": 1369 }, { "epoch": 1.0368968779564807, "grad_norm": 2.2151215076446533, "learning_rate": 1.4891188346543201e-05, "loss": 1.136, "step": 1370 }, { "epoch": 1.0376537369914853, "grad_norm": 2.248666763305664, "learning_rate": 1.488418455556205e-05, "loss": 1.1758, "step": 1371 }, { "epoch": 1.03841059602649, "grad_norm": 2.2258033752441406, "learning_rate": 1.4877177616954602e-05, "loss": 1.1628, "step": 1372 }, { "epoch": 1.0391674550614949, "grad_norm": 2.117659091949463, "learning_rate": 1.4870167535237428e-05, "loss": 1.2191, "step": 1373 }, { "epoch": 1.0399243140964995, "grad_norm": 2.367983102798462, "learning_rate": 1.4863154314929114e-05, "loss": 1.1415, "step": 1374 }, { "epoch": 1.0406811731315042, "grad_norm": 2.134035587310791, "learning_rate": 1.4856137960550278e-05, "loss": 1.2032, "step": 1375 }, { "epoch": 1.0414380321665089, "grad_norm": 2.392430543899536, "learning_rate": 1.4849118476623556e-05, "loss": 1.197, "step": 1376 }, { "epoch": 1.0421948912015138, "grad_norm": 2.237036943435669, "learning_rate": 1.4842095867673603e-05, "loss": 1.168, "step": 1377 }, { "epoch": 1.0429517502365184, "grad_norm": 2.338472604751587, "learning_rate": 1.4835070138227077e-05, "loss": 1.1629, "step": 1378 }, { "epoch": 1.043708609271523, "grad_norm": 2.1505656242370605, "learning_rate": 1.4828041292812662e-05, "loss": 1.1559, "step": 1379 }, { "epoch": 1.044465468306528, "grad_norm": 2.155229330062866, "learning_rate": 1.4821009335961045e-05, "loss": 1.1477, "step": 1380 }, { "epoch": 1.0452223273415326, "grad_norm": 2.1831212043762207, "learning_rate": 1.4813974272204918e-05, "loss": 1.1486, "step": 1381 }, { "epoch": 1.0459791863765373, "grad_norm": 2.2904438972473145, "learning_rate": 1.4806936106078971e-05, "loss": 1.1605, "step": 1382 }, { "epoch": 1.0467360454115422, "grad_norm": 2.416222333908081, "learning_rate": 1.4799894842119906e-05, "loss": 1.1161, "step": 1383 }, { "epoch": 1.0474929044465469, "grad_norm": 2.2631683349609375, "learning_rate": 1.4792850484866408e-05, "loss": 1.173, "step": 1384 }, { "epoch": 1.0482497634815515, "grad_norm": 2.2983131408691406, "learning_rate": 1.4785803038859166e-05, "loss": 1.1584, "step": 1385 }, { "epoch": 1.0490066225165562, "grad_norm": 2.1680402755737305, "learning_rate": 1.4778752508640852e-05, "loss": 1.1689, "step": 1386 }, { "epoch": 1.049763481551561, "grad_norm": 2.161684036254883, "learning_rate": 1.4771698898756137e-05, "loss": 1.1772, "step": 1387 }, { "epoch": 1.0505203405865657, "grad_norm": 2.048295021057129, "learning_rate": 1.4764642213751664e-05, "loss": 1.1598, "step": 1388 }, { "epoch": 1.0512771996215704, "grad_norm": 2.0943684577941895, "learning_rate": 1.4757582458176067e-05, "loss": 1.1389, "step": 1389 }, { "epoch": 1.0520340586565753, "grad_norm": 2.5327534675598145, "learning_rate": 1.475051963657996e-05, "loss": 1.1496, "step": 1390 }, { "epoch": 1.05279091769158, "grad_norm": 2.1597166061401367, "learning_rate": 1.4743453753515924e-05, "loss": 1.1409, "step": 1391 }, { "epoch": 1.0535477767265846, "grad_norm": 2.877094268798828, "learning_rate": 1.4736384813538527e-05, "loss": 1.1359, "step": 1392 }, { "epoch": 1.0543046357615895, "grad_norm": 2.4092910289764404, "learning_rate": 1.472931282120429e-05, "loss": 1.1673, "step": 1393 }, { "epoch": 1.0550614947965942, "grad_norm": 2.26458740234375, "learning_rate": 1.4722237781071717e-05, "loss": 1.167, "step": 1394 }, { "epoch": 1.0558183538315988, "grad_norm": 2.2418289184570312, "learning_rate": 1.4715159697701276e-05, "loss": 1.1674, "step": 1395 }, { "epoch": 1.0565752128666035, "grad_norm": 2.948460102081299, "learning_rate": 1.470807857565538e-05, "loss": 1.1459, "step": 1396 }, { "epoch": 1.0573320719016084, "grad_norm": 2.1875085830688477, "learning_rate": 1.4700994419498423e-05, "loss": 1.1781, "step": 1397 }, { "epoch": 1.058088930936613, "grad_norm": 2.3909361362457275, "learning_rate": 1.4693907233796737e-05, "loss": 1.1346, "step": 1398 }, { "epoch": 1.0588457899716177, "grad_norm": 2.12752628326416, "learning_rate": 1.4686817023118619e-05, "loss": 1.1127, "step": 1399 }, { "epoch": 1.0596026490066226, "grad_norm": 2.3758580684661865, "learning_rate": 1.4679723792034304e-05, "loss": 1.1667, "step": 1400 }, { "epoch": 1.0603595080416273, "grad_norm": 2.23144793510437, "learning_rate": 1.4672627545115991e-05, "loss": 1.1693, "step": 1401 }, { "epoch": 1.061116367076632, "grad_norm": 2.2588181495666504, "learning_rate": 1.46655282869378e-05, "loss": 1.1555, "step": 1402 }, { "epoch": 1.0618732261116368, "grad_norm": 3.266263961791992, "learning_rate": 1.4658426022075816e-05, "loss": 1.1475, "step": 1403 }, { "epoch": 1.0626300851466415, "grad_norm": 2.344022750854492, "learning_rate": 1.4651320755108042e-05, "loss": 1.1151, "step": 1404 }, { "epoch": 1.0633869441816461, "grad_norm": 2.701164722442627, "learning_rate": 1.464421249061443e-05, "loss": 1.1547, "step": 1405 }, { "epoch": 1.0641438032166508, "grad_norm": 2.4534714221954346, "learning_rate": 1.4637101233176856e-05, "loss": 1.17, "step": 1406 }, { "epoch": 1.0649006622516557, "grad_norm": 2.413388252258301, "learning_rate": 1.462998698737913e-05, "loss": 1.1852, "step": 1407 }, { "epoch": 1.0656575212866604, "grad_norm": 2.2986247539520264, "learning_rate": 1.4622869757806983e-05, "loss": 1.1544, "step": 1408 }, { "epoch": 1.066414380321665, "grad_norm": 2.308239221572876, "learning_rate": 1.4615749549048076e-05, "loss": 1.1572, "step": 1409 }, { "epoch": 1.06717123935667, "grad_norm": 2.270495653152466, "learning_rate": 1.4608626365691986e-05, "loss": 1.1625, "step": 1410 }, { "epoch": 1.0679280983916746, "grad_norm": 2.0776920318603516, "learning_rate": 1.4601500212330213e-05, "loss": 1.1879, "step": 1411 }, { "epoch": 1.0686849574266792, "grad_norm": 2.279533863067627, "learning_rate": 1.4594371093556159e-05, "loss": 1.1844, "step": 1412 }, { "epoch": 1.069441816461684, "grad_norm": 2.263552188873291, "learning_rate": 1.4587239013965149e-05, "loss": 1.1192, "step": 1413 }, { "epoch": 1.0701986754966888, "grad_norm": 2.1875579357147217, "learning_rate": 1.4580103978154414e-05, "loss": 1.1921, "step": 1414 }, { "epoch": 1.0709555345316935, "grad_norm": 2.553298234939575, "learning_rate": 1.4572965990723083e-05, "loss": 1.1307, "step": 1415 }, { "epoch": 1.0717123935666981, "grad_norm": 2.0610175132751465, "learning_rate": 1.4565825056272199e-05, "loss": 1.2057, "step": 1416 }, { "epoch": 1.072469252601703, "grad_norm": 3.3085532188415527, "learning_rate": 1.4558681179404704e-05, "loss": 1.1566, "step": 1417 }, { "epoch": 1.0732261116367077, "grad_norm": 2.1448001861572266, "learning_rate": 1.4551534364725422e-05, "loss": 1.1958, "step": 1418 }, { "epoch": 1.0739829706717123, "grad_norm": 2.5602312088012695, "learning_rate": 1.4544384616841084e-05, "loss": 1.1513, "step": 1419 }, { "epoch": 1.0747398297067172, "grad_norm": 2.34245228767395, "learning_rate": 1.4537231940360315e-05, "loss": 1.1331, "step": 1420 }, { "epoch": 1.0754966887417219, "grad_norm": 2.5702669620513916, "learning_rate": 1.4530076339893615e-05, "loss": 1.1369, "step": 1421 }, { "epoch": 1.0762535477767265, "grad_norm": 2.3624837398529053, "learning_rate": 1.4522917820053375e-05, "loss": 1.1618, "step": 1422 }, { "epoch": 1.0770104068117312, "grad_norm": 2.3341963291168213, "learning_rate": 1.4515756385453868e-05, "loss": 1.1688, "step": 1423 }, { "epoch": 1.077767265846736, "grad_norm": 2.32336688041687, "learning_rate": 1.4508592040711246e-05, "loss": 1.136, "step": 1424 }, { "epoch": 1.0785241248817408, "grad_norm": 2.4956133365631104, "learning_rate": 1.4501424790443544e-05, "loss": 1.1893, "step": 1425 }, { "epoch": 1.0792809839167454, "grad_norm": 2.5766842365264893, "learning_rate": 1.4494254639270646e-05, "loss": 1.104, "step": 1426 }, { "epoch": 1.0800378429517503, "grad_norm": 2.3494839668273926, "learning_rate": 1.4487081591814336e-05, "loss": 1.1509, "step": 1427 }, { "epoch": 1.080794701986755, "grad_norm": 2.25639009475708, "learning_rate": 1.4479905652698248e-05, "loss": 1.1518, "step": 1428 }, { "epoch": 1.0815515610217596, "grad_norm": 2.454833984375, "learning_rate": 1.4472726826547876e-05, "loss": 1.2164, "step": 1429 }, { "epoch": 1.0823084200567645, "grad_norm": 2.320312976837158, "learning_rate": 1.4465545117990587e-05, "loss": 1.1562, "step": 1430 }, { "epoch": 1.0830652790917692, "grad_norm": 2.136070966720581, "learning_rate": 1.4458360531655606e-05, "loss": 1.1621, "step": 1431 }, { "epoch": 1.0838221381267739, "grad_norm": 2.3895716667175293, "learning_rate": 1.4451173072173996e-05, "loss": 1.1442, "step": 1432 }, { "epoch": 1.0845789971617785, "grad_norm": 2.185600757598877, "learning_rate": 1.4443982744178694e-05, "loss": 1.1352, "step": 1433 }, { "epoch": 1.0853358561967834, "grad_norm": 2.2408831119537354, "learning_rate": 1.4436789552304471e-05, "loss": 1.1771, "step": 1434 }, { "epoch": 1.086092715231788, "grad_norm": 2.2491331100463867, "learning_rate": 1.4429593501187952e-05, "loss": 1.1509, "step": 1435 }, { "epoch": 1.0868495742667927, "grad_norm": 2.4954214096069336, "learning_rate": 1.4422394595467597e-05, "loss": 1.1333, "step": 1436 }, { "epoch": 1.0876064333017976, "grad_norm": 2.427107572555542, "learning_rate": 1.4415192839783716e-05, "loss": 1.1806, "step": 1437 }, { "epoch": 1.0883632923368023, "grad_norm": 5.063598155975342, "learning_rate": 1.4407988238778448e-05, "loss": 1.1364, "step": 1438 }, { "epoch": 1.089120151371807, "grad_norm": 2.261101245880127, "learning_rate": 1.4400780797095769e-05, "loss": 1.171, "step": 1439 }, { "epoch": 1.0898770104068118, "grad_norm": 2.3400819301605225, "learning_rate": 1.4393570519381484e-05, "loss": 1.1354, "step": 1440 }, { "epoch": 1.0906338694418165, "grad_norm": 2.225931167602539, "learning_rate": 1.438635741028323e-05, "loss": 1.1674, "step": 1441 }, { "epoch": 1.0913907284768212, "grad_norm": 2.3904130458831787, "learning_rate": 1.437914147445047e-05, "loss": 1.1513, "step": 1442 }, { "epoch": 1.0921475875118258, "grad_norm": 2.4583778381347656, "learning_rate": 1.4371922716534483e-05, "loss": 1.1708, "step": 1443 }, { "epoch": 1.0929044465468307, "grad_norm": 2.270364761352539, "learning_rate": 1.436470114118837e-05, "loss": 1.1708, "step": 1444 }, { "epoch": 1.0936613055818354, "grad_norm": 2.190642833709717, "learning_rate": 1.4357476753067053e-05, "loss": 1.123, "step": 1445 }, { "epoch": 1.09441816461684, "grad_norm": 2.3521080017089844, "learning_rate": 1.4350249556827256e-05, "loss": 1.1793, "step": 1446 }, { "epoch": 1.095175023651845, "grad_norm": 2.4334235191345215, "learning_rate": 1.4343019557127522e-05, "loss": 1.175, "step": 1447 }, { "epoch": 1.0959318826868496, "grad_norm": 2.2919211387634277, "learning_rate": 1.4335786758628199e-05, "loss": 1.1601, "step": 1448 }, { "epoch": 1.0966887417218543, "grad_norm": 2.603358745574951, "learning_rate": 1.4328551165991435e-05, "loss": 1.1966, "step": 1449 }, { "epoch": 1.097445600756859, "grad_norm": 2.47110915184021, "learning_rate": 1.4321312783881192e-05, "loss": 1.1623, "step": 1450 }, { "epoch": 1.0982024597918638, "grad_norm": 2.54114031791687, "learning_rate": 1.431407161696321e-05, "loss": 1.1598, "step": 1451 }, { "epoch": 1.0989593188268685, "grad_norm": 2.1958112716674805, "learning_rate": 1.4306827669905041e-05, "loss": 1.1317, "step": 1452 }, { "epoch": 1.0997161778618731, "grad_norm": 2.2989349365234375, "learning_rate": 1.4299580947376022e-05, "loss": 1.1036, "step": 1453 }, { "epoch": 1.100473036896878, "grad_norm": 2.331904411315918, "learning_rate": 1.4292331454047278e-05, "loss": 1.1331, "step": 1454 }, { "epoch": 1.1012298959318827, "grad_norm": 2.376122236251831, "learning_rate": 1.4285079194591722e-05, "loss": 1.1649, "step": 1455 }, { "epoch": 1.1019867549668874, "grad_norm": 2.453084945678711, "learning_rate": 1.4277824173684056e-05, "loss": 1.1636, "step": 1456 }, { "epoch": 1.1027436140018922, "grad_norm": 2.4421586990356445, "learning_rate": 1.4270566396000744e-05, "loss": 1.1323, "step": 1457 }, { "epoch": 1.103500473036897, "grad_norm": 2.308035135269165, "learning_rate": 1.426330586622005e-05, "loss": 1.13, "step": 1458 }, { "epoch": 1.1042573320719016, "grad_norm": 2.403162956237793, "learning_rate": 1.4256042589021994e-05, "loss": 1.181, "step": 1459 }, { "epoch": 1.1050141911069062, "grad_norm": 2.4109246730804443, "learning_rate": 1.4248776569088377e-05, "loss": 1.1597, "step": 1460 }, { "epoch": 1.1057710501419111, "grad_norm": 2.412398099899292, "learning_rate": 1.4241507811102762e-05, "loss": 1.118, "step": 1461 }, { "epoch": 1.1065279091769158, "grad_norm": 2.8465075492858887, "learning_rate": 1.4234236319750482e-05, "loss": 1.1618, "step": 1462 }, { "epoch": 1.1072847682119205, "grad_norm": 2.306621789932251, "learning_rate": 1.4226962099718628e-05, "loss": 1.2062, "step": 1463 }, { "epoch": 1.1080416272469253, "grad_norm": 2.6074671745300293, "learning_rate": 1.4219685155696053e-05, "loss": 1.1676, "step": 1464 }, { "epoch": 1.10879848628193, "grad_norm": 2.509995460510254, "learning_rate": 1.421240549237336e-05, "loss": 1.1771, "step": 1465 }, { "epoch": 1.1095553453169347, "grad_norm": 2.535238742828369, "learning_rate": 1.4205123114442916e-05, "loss": 1.1682, "step": 1466 }, { "epoch": 1.1103122043519393, "grad_norm": 2.4258975982666016, "learning_rate": 1.4197838026598826e-05, "loss": 1.0947, "step": 1467 }, { "epoch": 1.1110690633869442, "grad_norm": 2.5997817516326904, "learning_rate": 1.4190550233536946e-05, "loss": 1.1471, "step": 1468 }, { "epoch": 1.1118259224219489, "grad_norm": 2.358372449874878, "learning_rate": 1.4183259739954877e-05, "loss": 1.1564, "step": 1469 }, { "epoch": 1.1125827814569536, "grad_norm": 2.323791027069092, "learning_rate": 1.4175966550551963e-05, "loss": 1.1936, "step": 1470 }, { "epoch": 1.1133396404919584, "grad_norm": 2.334627151489258, "learning_rate": 1.4168670670029277e-05, "loss": 1.1514, "step": 1471 }, { "epoch": 1.114096499526963, "grad_norm": 2.2344837188720703, "learning_rate": 1.4161372103089637e-05, "loss": 1.1378, "step": 1472 }, { "epoch": 1.1148533585619678, "grad_norm": 2.200742483139038, "learning_rate": 1.4154070854437587e-05, "loss": 1.1783, "step": 1473 }, { "epoch": 1.1156102175969727, "grad_norm": 2.2466723918914795, "learning_rate": 1.4146766928779396e-05, "loss": 1.1419, "step": 1474 }, { "epoch": 1.1163670766319773, "grad_norm": 2.4173378944396973, "learning_rate": 1.4139460330823071e-05, "loss": 1.0991, "step": 1475 }, { "epoch": 1.117123935666982, "grad_norm": 2.3149657249450684, "learning_rate": 1.413215106527833e-05, "loss": 1.1419, "step": 1476 }, { "epoch": 1.1178807947019869, "grad_norm": 2.2564306259155273, "learning_rate": 1.4124839136856612e-05, "loss": 1.1693, "step": 1477 }, { "epoch": 1.1186376537369915, "grad_norm": 2.131028652191162, "learning_rate": 1.4117524550271077e-05, "loss": 1.158, "step": 1478 }, { "epoch": 1.1193945127719962, "grad_norm": 2.4710068702697754, "learning_rate": 1.4110207310236595e-05, "loss": 1.1934, "step": 1479 }, { "epoch": 1.1201513718070009, "grad_norm": 2.138939380645752, "learning_rate": 1.4102887421469747e-05, "loss": 1.1196, "step": 1480 }, { "epoch": 1.1209082308420057, "grad_norm": 2.542495012283325, "learning_rate": 1.4095564888688822e-05, "loss": 1.1693, "step": 1481 }, { "epoch": 1.1216650898770104, "grad_norm": 2.4574832916259766, "learning_rate": 1.4088239716613816e-05, "loss": 1.1248, "step": 1482 }, { "epoch": 1.122421948912015, "grad_norm": 2.133028268814087, "learning_rate": 1.4080911909966419e-05, "loss": 1.2361, "step": 1483 }, { "epoch": 1.12317880794702, "grad_norm": 2.624393939971924, "learning_rate": 1.4073581473470023e-05, "loss": 1.1053, "step": 1484 }, { "epoch": 1.1239356669820246, "grad_norm": 2.0480175018310547, "learning_rate": 1.4066248411849717e-05, "loss": 1.1364, "step": 1485 }, { "epoch": 1.1246925260170293, "grad_norm": 2.2111339569091797, "learning_rate": 1.4058912729832286e-05, "loss": 1.1869, "step": 1486 }, { "epoch": 1.125449385052034, "grad_norm": 2.4910013675689697, "learning_rate": 1.4051574432146191e-05, "loss": 1.16, "step": 1487 }, { "epoch": 1.1262062440870388, "grad_norm": 2.419105052947998, "learning_rate": 1.4044233523521587e-05, "loss": 1.1637, "step": 1488 }, { "epoch": 1.1269631031220435, "grad_norm": 2.4131598472595215, "learning_rate": 1.4036890008690316e-05, "loss": 1.1814, "step": 1489 }, { "epoch": 1.1277199621570482, "grad_norm": 2.395854949951172, "learning_rate": 1.4029543892385898e-05, "loss": 1.1535, "step": 1490 }, { "epoch": 1.128476821192053, "grad_norm": 2.0963070392608643, "learning_rate": 1.4022195179343518e-05, "loss": 1.1366, "step": 1491 }, { "epoch": 1.1292336802270577, "grad_norm": 2.267829418182373, "learning_rate": 1.4014843874300052e-05, "loss": 1.1393, "step": 1492 }, { "epoch": 1.1299905392620624, "grad_norm": 2.1519582271575928, "learning_rate": 1.4007489981994038e-05, "loss": 1.1728, "step": 1493 }, { "epoch": 1.1307473982970673, "grad_norm": 2.194342613220215, "learning_rate": 1.4000133507165684e-05, "loss": 1.1586, "step": 1494 }, { "epoch": 1.131504257332072, "grad_norm": 2.3476803302764893, "learning_rate": 1.3992774454556855e-05, "loss": 1.2297, "step": 1495 }, { "epoch": 1.1322611163670766, "grad_norm": 2.1007235050201416, "learning_rate": 1.3985412828911088e-05, "loss": 1.1605, "step": 1496 }, { "epoch": 1.1330179754020813, "grad_norm": 2.512786388397217, "learning_rate": 1.397804863497358e-05, "loss": 1.1765, "step": 1497 }, { "epoch": 1.1337748344370862, "grad_norm": 2.1948659420013428, "learning_rate": 1.397068187749117e-05, "loss": 1.1912, "step": 1498 }, { "epoch": 1.1345316934720908, "grad_norm": 2.348325729370117, "learning_rate": 1.3963312561212359e-05, "loss": 1.152, "step": 1499 }, { "epoch": 1.1352885525070955, "grad_norm": 2.088045597076416, "learning_rate": 1.3955940690887301e-05, "loss": 1.0803, "step": 1500 }, { "epoch": 1.1360454115421004, "grad_norm": 2.110816240310669, "learning_rate": 1.3948566271267784e-05, "loss": 1.1599, "step": 1501 }, { "epoch": 1.136802270577105, "grad_norm": 2.306739330291748, "learning_rate": 1.3941189307107255e-05, "loss": 1.2, "step": 1502 }, { "epoch": 1.1375591296121097, "grad_norm": 2.494978666305542, "learning_rate": 1.3933809803160784e-05, "loss": 1.1418, "step": 1503 }, { "epoch": 1.1383159886471144, "grad_norm": 2.4510955810546875, "learning_rate": 1.3926427764185093e-05, "loss": 1.1455, "step": 1504 }, { "epoch": 1.1390728476821192, "grad_norm": 2.415323495864868, "learning_rate": 1.3919043194938528e-05, "loss": 1.1361, "step": 1505 }, { "epoch": 1.139829706717124, "grad_norm": 2.263831615447998, "learning_rate": 1.391165610018107e-05, "loss": 1.1087, "step": 1506 }, { "epoch": 1.1405865657521286, "grad_norm": 2.5898752212524414, "learning_rate": 1.3904266484674331e-05, "loss": 1.1339, "step": 1507 }, { "epoch": 1.1413434247871335, "grad_norm": 2.153635263442993, "learning_rate": 1.3896874353181542e-05, "loss": 1.1024, "step": 1508 }, { "epoch": 1.1421002838221381, "grad_norm": 2.095327138900757, "learning_rate": 1.3889479710467557e-05, "loss": 1.2094, "step": 1509 }, { "epoch": 1.1428571428571428, "grad_norm": 2.1070072650909424, "learning_rate": 1.388208256129885e-05, "loss": 1.143, "step": 1510 }, { "epoch": 1.1436140018921477, "grad_norm": 2.2180447578430176, "learning_rate": 1.3874682910443516e-05, "loss": 1.1682, "step": 1511 }, { "epoch": 1.1443708609271523, "grad_norm": 2.3871870040893555, "learning_rate": 1.3867280762671246e-05, "loss": 1.1671, "step": 1512 }, { "epoch": 1.145127719962157, "grad_norm": 2.165802001953125, "learning_rate": 1.3859876122753363e-05, "loss": 1.1138, "step": 1513 }, { "epoch": 1.145884578997162, "grad_norm": 2.162033796310425, "learning_rate": 1.3852468995462785e-05, "loss": 1.1719, "step": 1514 }, { "epoch": 1.1466414380321666, "grad_norm": 2.163429021835327, "learning_rate": 1.3845059385574023e-05, "loss": 1.1483, "step": 1515 }, { "epoch": 1.1473982970671712, "grad_norm": 2.177055597305298, "learning_rate": 1.3837647297863203e-05, "loss": 1.1372, "step": 1516 }, { "epoch": 1.1481551561021759, "grad_norm": 2.3266491889953613, "learning_rate": 1.383023273710805e-05, "loss": 1.1825, "step": 1517 }, { "epoch": 1.1489120151371808, "grad_norm": 2.4812657833099365, "learning_rate": 1.3822815708087865e-05, "loss": 1.1697, "step": 1518 }, { "epoch": 1.1496688741721854, "grad_norm": 2.1462526321411133, "learning_rate": 1.3815396215583564e-05, "loss": 1.1203, "step": 1519 }, { "epoch": 1.15042573320719, "grad_norm": 2.160487174987793, "learning_rate": 1.3807974264377629e-05, "loss": 1.1322, "step": 1520 }, { "epoch": 1.1511825922421948, "grad_norm": 2.234320640563965, "learning_rate": 1.3800549859254144e-05, "loss": 1.1393, "step": 1521 }, { "epoch": 1.1519394512771997, "grad_norm": 2.4396426677703857, "learning_rate": 1.3793123004998765e-05, "loss": 1.1671, "step": 1522 }, { "epoch": 1.1526963103122043, "grad_norm": 2.3154118061065674, "learning_rate": 1.3785693706398724e-05, "loss": 1.1867, "step": 1523 }, { "epoch": 1.153453169347209, "grad_norm": 2.2119319438934326, "learning_rate": 1.377826196824284e-05, "loss": 1.1582, "step": 1524 }, { "epoch": 1.1542100283822139, "grad_norm": 2.2084405422210693, "learning_rate": 1.3770827795321495e-05, "loss": 1.1613, "step": 1525 }, { "epoch": 1.1549668874172185, "grad_norm": 2.341912031173706, "learning_rate": 1.3763391192426644e-05, "loss": 1.1519, "step": 1526 }, { "epoch": 1.1557237464522232, "grad_norm": 2.2736034393310547, "learning_rate": 1.3755952164351814e-05, "loss": 1.1465, "step": 1527 }, { "epoch": 1.156480605487228, "grad_norm": 2.2359468936920166, "learning_rate": 1.3748510715892075e-05, "loss": 1.193, "step": 1528 }, { "epoch": 1.1572374645222328, "grad_norm": 1.9430551528930664, "learning_rate": 1.3741066851844082e-05, "loss": 1.139, "step": 1529 }, { "epoch": 1.1579943235572374, "grad_norm": 2.0962564945220947, "learning_rate": 1.3733620577006035e-05, "loss": 1.1442, "step": 1530 }, { "epoch": 1.1587511825922423, "grad_norm": 2.0686581134796143, "learning_rate": 1.3726171896177687e-05, "loss": 1.1778, "step": 1531 }, { "epoch": 1.159508041627247, "grad_norm": 2.120643138885498, "learning_rate": 1.3718720814160342e-05, "loss": 1.1789, "step": 1532 }, { "epoch": 1.1602649006622516, "grad_norm": 2.4168310165405273, "learning_rate": 1.3711267335756862e-05, "loss": 1.1816, "step": 1533 }, { "epoch": 1.1610217596972563, "grad_norm": 2.183661460876465, "learning_rate": 1.3703811465771636e-05, "loss": 1.1861, "step": 1534 }, { "epoch": 1.1617786187322612, "grad_norm": 2.196077346801758, "learning_rate": 1.3696353209010609e-05, "loss": 1.1845, "step": 1535 }, { "epoch": 1.1625354777672658, "grad_norm": 2.280958890914917, "learning_rate": 1.3688892570281261e-05, "loss": 1.1371, "step": 1536 }, { "epoch": 1.1632923368022705, "grad_norm": 2.3048434257507324, "learning_rate": 1.3681429554392602e-05, "loss": 1.15, "step": 1537 }, { "epoch": 1.1640491958372754, "grad_norm": 2.177098512649536, "learning_rate": 1.367396416615518e-05, "loss": 1.1537, "step": 1538 }, { "epoch": 1.16480605487228, "grad_norm": 2.419185161590576, "learning_rate": 1.3666496410381072e-05, "loss": 1.1634, "step": 1539 }, { "epoch": 1.1655629139072847, "grad_norm": 2.5214250087738037, "learning_rate": 1.3659026291883874e-05, "loss": 1.1669, "step": 1540 }, { "epoch": 1.1663197729422894, "grad_norm": 2.3618457317352295, "learning_rate": 1.365155381547872e-05, "loss": 1.2169, "step": 1541 }, { "epoch": 1.1670766319772943, "grad_norm": 2.3737759590148926, "learning_rate": 1.3644078985982243e-05, "loss": 1.2004, "step": 1542 }, { "epoch": 1.167833491012299, "grad_norm": 2.500761032104492, "learning_rate": 1.3636601808212613e-05, "loss": 1.1576, "step": 1543 }, { "epoch": 1.1685903500473036, "grad_norm": 2.429725170135498, "learning_rate": 1.36291222869895e-05, "loss": 1.1412, "step": 1544 }, { "epoch": 1.1693472090823085, "grad_norm": 2.4820287227630615, "learning_rate": 1.3621640427134095e-05, "loss": 1.1256, "step": 1545 }, { "epoch": 1.1701040681173132, "grad_norm": 2.5075745582580566, "learning_rate": 1.3614156233469081e-05, "loss": 1.1426, "step": 1546 }, { "epoch": 1.1708609271523178, "grad_norm": 2.5569803714752197, "learning_rate": 1.3606669710818665e-05, "loss": 1.1438, "step": 1547 }, { "epoch": 1.1716177861873227, "grad_norm": 2.4151296615600586, "learning_rate": 1.3599180864008538e-05, "loss": 1.1325, "step": 1548 }, { "epoch": 1.1723746452223274, "grad_norm": 2.1855361461639404, "learning_rate": 1.3591689697865902e-05, "loss": 1.1328, "step": 1549 }, { "epoch": 1.173131504257332, "grad_norm": 2.130683422088623, "learning_rate": 1.3584196217219443e-05, "loss": 1.1411, "step": 1550 }, { "epoch": 1.173888363292337, "grad_norm": 2.3407418727874756, "learning_rate": 1.357670042689935e-05, "loss": 1.1396, "step": 1551 }, { "epoch": 1.1746452223273416, "grad_norm": 2.5837795734405518, "learning_rate": 1.3569202331737292e-05, "loss": 1.1858, "step": 1552 }, { "epoch": 1.1754020813623463, "grad_norm": 2.2978811264038086, "learning_rate": 1.3561701936566426e-05, "loss": 1.1712, "step": 1553 }, { "epoch": 1.176158940397351, "grad_norm": 2.5813682079315186, "learning_rate": 1.355419924622139e-05, "loss": 1.1282, "step": 1554 }, { "epoch": 1.1769157994323558, "grad_norm": 2.0672824382781982, "learning_rate": 1.3546694265538316e-05, "loss": 1.1639, "step": 1555 }, { "epoch": 1.1776726584673605, "grad_norm": 2.138291358947754, "learning_rate": 1.3539186999354785e-05, "loss": 1.1583, "step": 1556 }, { "epoch": 1.1784295175023651, "grad_norm": 2.2805378437042236, "learning_rate": 1.3531677452509873e-05, "loss": 1.1315, "step": 1557 }, { "epoch": 1.1791863765373698, "grad_norm": 2.4369373321533203, "learning_rate": 1.3524165629844124e-05, "loss": 1.1395, "step": 1558 }, { "epoch": 1.1799432355723747, "grad_norm": 2.610330820083618, "learning_rate": 1.3516651536199536e-05, "loss": 1.1534, "step": 1559 }, { "epoch": 1.1807000946073793, "grad_norm": 2.1532680988311768, "learning_rate": 1.3509135176419583e-05, "loss": 1.1266, "step": 1560 }, { "epoch": 1.181456953642384, "grad_norm": 2.269569158554077, "learning_rate": 1.3501616555349195e-05, "loss": 1.1962, "step": 1561 }, { "epoch": 1.182213812677389, "grad_norm": 2.3179900646209717, "learning_rate": 1.3494095677834762e-05, "loss": 1.1554, "step": 1562 }, { "epoch": 1.1829706717123936, "grad_norm": 2.116596221923828, "learning_rate": 1.3486572548724126e-05, "loss": 1.124, "step": 1563 }, { "epoch": 1.1837275307473982, "grad_norm": 2.4712612628936768, "learning_rate": 1.347904717286658e-05, "loss": 1.1336, "step": 1564 }, { "epoch": 1.1844843897824031, "grad_norm": 2.1904261112213135, "learning_rate": 1.3471519555112866e-05, "loss": 1.1613, "step": 1565 }, { "epoch": 1.1852412488174078, "grad_norm": 2.235826253890991, "learning_rate": 1.3463989700315179e-05, "loss": 1.1404, "step": 1566 }, { "epoch": 1.1859981078524124, "grad_norm": 2.2662901878356934, "learning_rate": 1.3456457613327136e-05, "loss": 1.1985, "step": 1567 }, { "epoch": 1.1867549668874173, "grad_norm": 2.3666486740112305, "learning_rate": 1.3448923299003815e-05, "loss": 1.125, "step": 1568 }, { "epoch": 1.187511825922422, "grad_norm": 2.1387600898742676, "learning_rate": 1.344138676220172e-05, "loss": 1.157, "step": 1569 }, { "epoch": 1.1882686849574267, "grad_norm": 2.4021949768066406, "learning_rate": 1.3433848007778783e-05, "loss": 1.1628, "step": 1570 }, { "epoch": 1.1890255439924313, "grad_norm": 2.100867986679077, "learning_rate": 1.3426307040594372e-05, "loss": 1.1712, "step": 1571 }, { "epoch": 1.1897824030274362, "grad_norm": 2.6818182468414307, "learning_rate": 1.3418763865509283e-05, "loss": 1.1505, "step": 1572 }, { "epoch": 1.1905392620624409, "grad_norm": 2.2335827350616455, "learning_rate": 1.3411218487385725e-05, "loss": 1.1367, "step": 1573 }, { "epoch": 1.1912961210974455, "grad_norm": 2.332047939300537, "learning_rate": 1.3403670911087339e-05, "loss": 1.1186, "step": 1574 }, { "epoch": 1.1920529801324504, "grad_norm": 2.119150400161743, "learning_rate": 1.339612114147917e-05, "loss": 1.1748, "step": 1575 }, { "epoch": 1.192809839167455, "grad_norm": 2.413939952850342, "learning_rate": 1.3388569183427695e-05, "loss": 1.178, "step": 1576 }, { "epoch": 1.1935666982024598, "grad_norm": 2.265653371810913, "learning_rate": 1.3381015041800787e-05, "loss": 1.1532, "step": 1577 }, { "epoch": 1.1943235572374644, "grad_norm": 2.1941394805908203, "learning_rate": 1.3373458721467724e-05, "loss": 1.1027, "step": 1578 }, { "epoch": 1.1950804162724693, "grad_norm": 2.350780725479126, "learning_rate": 1.3365900227299205e-05, "loss": 1.1373, "step": 1579 }, { "epoch": 1.195837275307474, "grad_norm": 2.401061773300171, "learning_rate": 1.3358339564167313e-05, "loss": 1.1602, "step": 1580 }, { "epoch": 1.1965941343424786, "grad_norm": 2.3053834438323975, "learning_rate": 1.3350776736945539e-05, "loss": 1.0973, "step": 1581 }, { "epoch": 1.1973509933774835, "grad_norm": 2.3348872661590576, "learning_rate": 1.3343211750508769e-05, "loss": 1.1439, "step": 1582 }, { "epoch": 1.1981078524124882, "grad_norm": 2.255254030227661, "learning_rate": 1.333564460973327e-05, "loss": 1.1259, "step": 1583 }, { "epoch": 1.1988647114474928, "grad_norm": 2.1117663383483887, "learning_rate": 1.332807531949671e-05, "loss": 1.1075, "step": 1584 }, { "epoch": 1.1996215704824977, "grad_norm": 2.2909741401672363, "learning_rate": 1.3320503884678141e-05, "loss": 1.1518, "step": 1585 }, { "epoch": 1.2003784295175024, "grad_norm": 2.2066426277160645, "learning_rate": 1.331293031015799e-05, "loss": 1.1617, "step": 1586 }, { "epoch": 1.201135288552507, "grad_norm": 2.2523305416107178, "learning_rate": 1.3305354600818068e-05, "loss": 1.1961, "step": 1587 }, { "epoch": 1.201892147587512, "grad_norm": 2.1978890895843506, "learning_rate": 1.3297776761541566e-05, "loss": 1.154, "step": 1588 }, { "epoch": 1.2026490066225166, "grad_norm": 2.333961009979248, "learning_rate": 1.3290196797213037e-05, "loss": 1.1201, "step": 1589 }, { "epoch": 1.2034058656575213, "grad_norm": 2.1890499591827393, "learning_rate": 1.3282614712718412e-05, "loss": 1.1166, "step": 1590 }, { "epoch": 1.204162724692526, "grad_norm": 2.2715249061584473, "learning_rate": 1.3275030512944995e-05, "loss": 1.1702, "step": 1591 }, { "epoch": 1.2049195837275308, "grad_norm": 2.378854513168335, "learning_rate": 1.3267444202781434e-05, "loss": 1.1674, "step": 1592 }, { "epoch": 1.2056764427625355, "grad_norm": 2.085010290145874, "learning_rate": 1.3259855787117758e-05, "loss": 1.1709, "step": 1593 }, { "epoch": 1.2064333017975402, "grad_norm": 2.3028149604797363, "learning_rate": 1.3252265270845339e-05, "loss": 1.1304, "step": 1594 }, { "epoch": 1.2071901608325448, "grad_norm": 2.0950684547424316, "learning_rate": 1.3244672658856908e-05, "loss": 1.1585, "step": 1595 }, { "epoch": 1.2079470198675497, "grad_norm": 2.2300803661346436, "learning_rate": 1.3237077956046551e-05, "loss": 1.1123, "step": 1596 }, { "epoch": 1.2087038789025544, "grad_norm": 2.1364376544952393, "learning_rate": 1.3229481167309692e-05, "loss": 1.1112, "step": 1597 }, { "epoch": 1.209460737937559, "grad_norm": 2.9876246452331543, "learning_rate": 1.322188229754311e-05, "loss": 1.1989, "step": 1598 }, { "epoch": 1.210217596972564, "grad_norm": 2.1434969902038574, "learning_rate": 1.3214281351644918e-05, "loss": 1.1665, "step": 1599 }, { "epoch": 1.2109744560075686, "grad_norm": 2.142533779144287, "learning_rate": 1.3206678334514571e-05, "loss": 1.1229, "step": 1600 }, { "epoch": 1.2117313150425733, "grad_norm": 2.065274715423584, "learning_rate": 1.3199073251052854e-05, "loss": 1.1167, "step": 1601 }, { "epoch": 1.2124881740775781, "grad_norm": 2.128526449203491, "learning_rate": 1.3191466106161893e-05, "loss": 1.1622, "step": 1602 }, { "epoch": 1.2132450331125828, "grad_norm": 2.075362205505371, "learning_rate": 1.3183856904745135e-05, "loss": 1.1541, "step": 1603 }, { "epoch": 1.2140018921475875, "grad_norm": 2.4913156032562256, "learning_rate": 1.3176245651707357e-05, "loss": 1.1635, "step": 1604 }, { "epoch": 1.2147587511825924, "grad_norm": 2.1509463787078857, "learning_rate": 1.3168632351954653e-05, "loss": 1.1317, "step": 1605 }, { "epoch": 1.215515610217597, "grad_norm": 2.2484796047210693, "learning_rate": 1.3161017010394444e-05, "loss": 1.1342, "step": 1606 }, { "epoch": 1.2162724692526017, "grad_norm": 2.2622358798980713, "learning_rate": 1.3153399631935463e-05, "loss": 1.1416, "step": 1607 }, { "epoch": 1.2170293282876063, "grad_norm": 2.4243550300598145, "learning_rate": 1.3145780221487754e-05, "loss": 1.1653, "step": 1608 }, { "epoch": 1.2177861873226112, "grad_norm": 2.211627960205078, "learning_rate": 1.3138158783962668e-05, "loss": 1.177, "step": 1609 }, { "epoch": 1.218543046357616, "grad_norm": 2.025865316390991, "learning_rate": 1.3130535324272884e-05, "loss": 1.1536, "step": 1610 }, { "epoch": 1.2192999053926206, "grad_norm": 2.297100782394409, "learning_rate": 1.3122909847332349e-05, "loss": 1.2091, "step": 1611 }, { "epoch": 1.2200567644276255, "grad_norm": 2.41648006439209, "learning_rate": 1.3115282358056333e-05, "loss": 1.183, "step": 1612 }, { "epoch": 1.2208136234626301, "grad_norm": 2.1309831142425537, "learning_rate": 1.3107652861361408e-05, "loss": 1.1715, "step": 1613 }, { "epoch": 1.2215704824976348, "grad_norm": 2.268522262573242, "learning_rate": 1.3100021362165426e-05, "loss": 1.1762, "step": 1614 }, { "epoch": 1.2223273415326394, "grad_norm": 2.296552896499634, "learning_rate": 1.3092387865387533e-05, "loss": 1.104, "step": 1615 }, { "epoch": 1.2230842005676443, "grad_norm": 2.1397440433502197, "learning_rate": 1.3084752375948166e-05, "loss": 1.1284, "step": 1616 }, { "epoch": 1.223841059602649, "grad_norm": 2.097498893737793, "learning_rate": 1.3077114898769048e-05, "loss": 1.124, "step": 1617 }, { "epoch": 1.2245979186376537, "grad_norm": 2.212064027786255, "learning_rate": 1.3069475438773178e-05, "loss": 1.1184, "step": 1618 }, { "epoch": 1.2253547776726585, "grad_norm": 2.323784351348877, "learning_rate": 1.3061834000884831e-05, "loss": 1.1615, "step": 1619 }, { "epoch": 1.2261116367076632, "grad_norm": 2.1432077884674072, "learning_rate": 1.3054190590029572e-05, "loss": 1.1564, "step": 1620 }, { "epoch": 1.2268684957426679, "grad_norm": 2.3040294647216797, "learning_rate": 1.3046545211134218e-05, "loss": 1.1227, "step": 1621 }, { "epoch": 1.2276253547776728, "grad_norm": 2.406848669052124, "learning_rate": 1.3038897869126865e-05, "loss": 1.1577, "step": 1622 }, { "epoch": 1.2283822138126774, "grad_norm": 2.3050808906555176, "learning_rate": 1.3031248568936877e-05, "loss": 1.1924, "step": 1623 }, { "epoch": 1.229139072847682, "grad_norm": 2.03425669670105, "learning_rate": 1.3023597315494874e-05, "loss": 1.1474, "step": 1624 }, { "epoch": 1.2298959318826868, "grad_norm": 2.2921745777130127, "learning_rate": 1.3015944113732734e-05, "loss": 1.1815, "step": 1625 }, { "epoch": 1.2306527909176916, "grad_norm": 2.248823881149292, "learning_rate": 1.3008288968583603e-05, "loss": 1.1482, "step": 1626 }, { "epoch": 1.2314096499526963, "grad_norm": 2.3645107746124268, "learning_rate": 1.3000631884981858e-05, "loss": 1.1383, "step": 1627 }, { "epoch": 1.232166508987701, "grad_norm": 2.0877134799957275, "learning_rate": 1.2992972867863147e-05, "loss": 1.2064, "step": 1628 }, { "epoch": 1.2329233680227059, "grad_norm": 2.3611538410186768, "learning_rate": 1.2985311922164359e-05, "loss": 1.1582, "step": 1629 }, { "epoch": 1.2336802270577105, "grad_norm": 2.087958335876465, "learning_rate": 1.2977649052823616e-05, "loss": 1.1553, "step": 1630 }, { "epoch": 1.2344370860927152, "grad_norm": 2.2635905742645264, "learning_rate": 1.2969984264780283e-05, "loss": 1.1704, "step": 1631 }, { "epoch": 1.2351939451277199, "grad_norm": 2.251215934753418, "learning_rate": 1.2962317562974976e-05, "loss": 1.1855, "step": 1632 }, { "epoch": 1.2359508041627247, "grad_norm": 3.073653221130371, "learning_rate": 1.2954648952349527e-05, "loss": 1.1935, "step": 1633 }, { "epoch": 1.2367076631977294, "grad_norm": 2.2269108295440674, "learning_rate": 1.2946978437847004e-05, "loss": 1.146, "step": 1634 }, { "epoch": 1.237464522232734, "grad_norm": 2.4930577278137207, "learning_rate": 1.2939306024411713e-05, "loss": 1.1703, "step": 1635 }, { "epoch": 1.238221381267739, "grad_norm": 2.6076838970184326, "learning_rate": 1.2931631716989166e-05, "loss": 1.1756, "step": 1636 }, { "epoch": 1.2389782403027436, "grad_norm": 2.250298023223877, "learning_rate": 1.292395552052611e-05, "loss": 1.1607, "step": 1637 }, { "epoch": 1.2397350993377483, "grad_norm": 2.2543821334838867, "learning_rate": 1.29162774399705e-05, "loss": 1.1653, "step": 1638 }, { "epoch": 1.2404919583727532, "grad_norm": 2.1660284996032715, "learning_rate": 1.290859748027151e-05, "loss": 1.0827, "step": 1639 }, { "epoch": 1.2412488174077578, "grad_norm": 2.392023801803589, "learning_rate": 1.2900915646379524e-05, "loss": 1.1651, "step": 1640 }, { "epoch": 1.2420056764427625, "grad_norm": 2.147473096847534, "learning_rate": 1.2893231943246143e-05, "loss": 1.1012, "step": 1641 }, { "epoch": 1.2427625354777674, "grad_norm": 2.2261102199554443, "learning_rate": 1.2885546375824154e-05, "loss": 1.1313, "step": 1642 }, { "epoch": 1.243519394512772, "grad_norm": 2.1518611907958984, "learning_rate": 1.2877858949067564e-05, "loss": 1.1309, "step": 1643 }, { "epoch": 1.2442762535477767, "grad_norm": 2.2095835208892822, "learning_rate": 1.2870169667931567e-05, "loss": 1.109, "step": 1644 }, { "epoch": 1.2450331125827814, "grad_norm": 2.3556344509124756, "learning_rate": 1.2862478537372556e-05, "loss": 1.0937, "step": 1645 }, { "epoch": 1.2457899716177863, "grad_norm": 2.178274154663086, "learning_rate": 1.2854785562348121e-05, "loss": 1.1857, "step": 1646 }, { "epoch": 1.246546830652791, "grad_norm": 2.1965596675872803, "learning_rate": 1.2847090747817033e-05, "loss": 1.1292, "step": 1647 }, { "epoch": 1.2473036896877956, "grad_norm": 2.359292984008789, "learning_rate": 1.2839394098739257e-05, "loss": 1.1753, "step": 1648 }, { "epoch": 1.2480605487228003, "grad_norm": 2.189749002456665, "learning_rate": 1.2831695620075926e-05, "loss": 1.0852, "step": 1649 }, { "epoch": 1.2488174077578051, "grad_norm": 2.1658499240875244, "learning_rate": 1.2823995316789366e-05, "loss": 1.1278, "step": 1650 }, { "epoch": 1.2495742667928098, "grad_norm": 2.249729871749878, "learning_rate": 1.281629319384308e-05, "loss": 1.1829, "step": 1651 }, { "epoch": 1.2503311258278145, "grad_norm": 2.2419471740722656, "learning_rate": 1.2808589256201735e-05, "loss": 1.1373, "step": 1652 }, { "epoch": 1.2510879848628194, "grad_norm": 2.296644449234009, "learning_rate": 1.280088350883117e-05, "loss": 1.1724, "step": 1653 }, { "epoch": 1.251844843897824, "grad_norm": 2.77717661857605, "learning_rate": 1.2793175956698398e-05, "loss": 1.1367, "step": 1654 }, { "epoch": 1.2526017029328287, "grad_norm": 2.4535298347473145, "learning_rate": 1.2785466604771584e-05, "loss": 1.1874, "step": 1655 }, { "epoch": 1.2533585619678336, "grad_norm": 2.1663715839385986, "learning_rate": 1.2777755458020058e-05, "loss": 1.1819, "step": 1656 }, { "epoch": 1.2541154210028382, "grad_norm": 2.467954158782959, "learning_rate": 1.2770042521414314e-05, "loss": 1.1761, "step": 1657 }, { "epoch": 1.254872280037843, "grad_norm": 2.3556721210479736, "learning_rate": 1.2762327799925991e-05, "loss": 1.1574, "step": 1658 }, { "epoch": 1.2556291390728478, "grad_norm": 2.204136371612549, "learning_rate": 1.2754611298527875e-05, "loss": 1.0962, "step": 1659 }, { "epoch": 1.2563859981078525, "grad_norm": 2.3233225345611572, "learning_rate": 1.274689302219391e-05, "loss": 1.1369, "step": 1660 }, { "epoch": 1.2571428571428571, "grad_norm": 2.5201222896575928, "learning_rate": 1.2739172975899181e-05, "loss": 1.1593, "step": 1661 }, { "epoch": 1.257899716177862, "grad_norm": 2.531087875366211, "learning_rate": 1.273145116461991e-05, "loss": 1.1411, "step": 1662 }, { "epoch": 1.2586565752128667, "grad_norm": 2.510352373123169, "learning_rate": 1.2723727593333454e-05, "loss": 1.1426, "step": 1663 }, { "epoch": 1.2594134342478713, "grad_norm": 2.217392921447754, "learning_rate": 1.2716002267018314e-05, "loss": 1.0712, "step": 1664 }, { "epoch": 1.260170293282876, "grad_norm": 2.3988654613494873, "learning_rate": 1.2708275190654126e-05, "loss": 1.1884, "step": 1665 }, { "epoch": 1.2609271523178807, "grad_norm": 2.151139259338379, "learning_rate": 1.2700546369221628e-05, "loss": 1.095, "step": 1666 }, { "epoch": 1.2616840113528855, "grad_norm": 2.3287789821624756, "learning_rate": 1.2692815807702711e-05, "loss": 1.1726, "step": 1667 }, { "epoch": 1.2624408703878902, "grad_norm": 2.1874783039093018, "learning_rate": 1.268508351108038e-05, "loss": 1.1389, "step": 1668 }, { "epoch": 1.2631977294228949, "grad_norm": 2.501871347427368, "learning_rate": 1.2677349484338747e-05, "loss": 1.1717, "step": 1669 }, { "epoch": 1.2639545884578998, "grad_norm": 2.2890784740448, "learning_rate": 1.2669613732463053e-05, "loss": 1.1069, "step": 1670 }, { "epoch": 1.2647114474929044, "grad_norm": 2.41701340675354, "learning_rate": 1.2661876260439642e-05, "loss": 1.142, "step": 1671 }, { "epoch": 1.265468306527909, "grad_norm": 2.5459794998168945, "learning_rate": 1.2654137073255976e-05, "loss": 1.1071, "step": 1672 }, { "epoch": 1.266225165562914, "grad_norm": 2.1220319271087646, "learning_rate": 1.2646396175900612e-05, "loss": 1.1644, "step": 1673 }, { "epoch": 1.2669820245979186, "grad_norm": 2.383187770843506, "learning_rate": 1.2638653573363215e-05, "loss": 1.1515, "step": 1674 }, { "epoch": 1.2677388836329233, "grad_norm": 2.090808868408203, "learning_rate": 1.2630909270634554e-05, "loss": 1.1151, "step": 1675 }, { "epoch": 1.2684957426679282, "grad_norm": 2.306619882583618, "learning_rate": 1.2623163272706483e-05, "loss": 1.177, "step": 1676 }, { "epoch": 1.2692526017029329, "grad_norm": 2.4056408405303955, "learning_rate": 1.261541558457195e-05, "loss": 1.1811, "step": 1677 }, { "epoch": 1.2700094607379375, "grad_norm": 2.4404773712158203, "learning_rate": 1.2607666211225002e-05, "loss": 1.1686, "step": 1678 }, { "epoch": 1.2707663197729424, "grad_norm": 2.1623356342315674, "learning_rate": 1.2599915157660776e-05, "loss": 1.1472, "step": 1679 }, { "epoch": 1.271523178807947, "grad_norm": 2.5244863033294678, "learning_rate": 1.2592162428875465e-05, "loss": 1.1093, "step": 1680 }, { "epoch": 1.2722800378429517, "grad_norm": 2.649132251739502, "learning_rate": 1.2584408029866373e-05, "loss": 1.1644, "step": 1681 }, { "epoch": 1.2730368968779564, "grad_norm": 2.245384931564331, "learning_rate": 1.2576651965631862e-05, "loss": 1.2137, "step": 1682 }, { "epoch": 1.2737937559129613, "grad_norm": 3.0994908809661865, "learning_rate": 1.256889424117137e-05, "loss": 1.1189, "step": 1683 }, { "epoch": 1.274550614947966, "grad_norm": 2.228210210800171, "learning_rate": 1.2561134861485413e-05, "loss": 1.1694, "step": 1684 }, { "epoch": 1.2753074739829706, "grad_norm": 2.0974786281585693, "learning_rate": 1.2553373831575572e-05, "loss": 1.1661, "step": 1685 }, { "epoch": 1.2760643330179753, "grad_norm": 2.1458041667938232, "learning_rate": 1.2545611156444477e-05, "loss": 1.0814, "step": 1686 }, { "epoch": 1.2768211920529802, "grad_norm": 2.163115978240967, "learning_rate": 1.253784684109584e-05, "loss": 1.1643, "step": 1687 }, { "epoch": 1.2775780510879848, "grad_norm": 2.122997999191284, "learning_rate": 1.2530080890534416e-05, "loss": 1.172, "step": 1688 }, { "epoch": 1.2783349101229895, "grad_norm": 2.4748451709747314, "learning_rate": 1.2522313309766021e-05, "loss": 1.1489, "step": 1689 }, { "epoch": 1.2790917691579944, "grad_norm": 2.201387882232666, "learning_rate": 1.2514544103797517e-05, "loss": 1.1509, "step": 1690 }, { "epoch": 1.279848628192999, "grad_norm": 2.158069610595703, "learning_rate": 1.2506773277636812e-05, "loss": 1.1284, "step": 1691 }, { "epoch": 1.2806054872280037, "grad_norm": 2.192920684814453, "learning_rate": 1.2499000836292875e-05, "loss": 1.156, "step": 1692 }, { "epoch": 1.2813623462630086, "grad_norm": 2.266641855239868, "learning_rate": 1.2491226784775685e-05, "loss": 1.1298, "step": 1693 }, { "epoch": 1.2821192052980133, "grad_norm": 2.677654981613159, "learning_rate": 1.2483451128096289e-05, "loss": 1.1472, "step": 1694 }, { "epoch": 1.282876064333018, "grad_norm": 2.4137139320373535, "learning_rate": 1.2475673871266756e-05, "loss": 1.075, "step": 1695 }, { "epoch": 1.2836329233680228, "grad_norm": 2.201813220977783, "learning_rate": 1.2467895019300187e-05, "loss": 1.1224, "step": 1696 }, { "epoch": 1.2843897824030275, "grad_norm": 2.1659185886383057, "learning_rate": 1.2460114577210703e-05, "loss": 1.1606, "step": 1697 }, { "epoch": 1.2851466414380321, "grad_norm": 2.3215322494506836, "learning_rate": 1.245233255001347e-05, "loss": 1.1408, "step": 1698 }, { "epoch": 1.285903500473037, "grad_norm": 2.530764579772949, "learning_rate": 1.2444548942724657e-05, "loss": 1.1433, "step": 1699 }, { "epoch": 1.2866603595080417, "grad_norm": 2.5110771656036377, "learning_rate": 1.2436763760361461e-05, "loss": 1.1644, "step": 1700 }, { "epoch": 1.2874172185430464, "grad_norm": 2.0336203575134277, "learning_rate": 1.2428977007942092e-05, "loss": 1.1538, "step": 1701 }, { "epoch": 1.288174077578051, "grad_norm": 2.3316948413848877, "learning_rate": 1.2421188690485767e-05, "loss": 1.1349, "step": 1702 }, { "epoch": 1.2889309366130557, "grad_norm": 2.151745319366455, "learning_rate": 1.241339881301273e-05, "loss": 1.1464, "step": 1703 }, { "epoch": 1.2896877956480606, "grad_norm": 2.1439285278320312, "learning_rate": 1.2405607380544198e-05, "loss": 1.1384, "step": 1704 }, { "epoch": 1.2904446546830652, "grad_norm": 2.0676236152648926, "learning_rate": 1.239781439810242e-05, "loss": 1.1315, "step": 1705 }, { "epoch": 1.29120151371807, "grad_norm": 2.1534860134124756, "learning_rate": 1.239001987071064e-05, "loss": 1.1232, "step": 1706 }, { "epoch": 1.2919583727530748, "grad_norm": 2.4337046146392822, "learning_rate": 1.238222380339308e-05, "loss": 1.1637, "step": 1707 }, { "epoch": 1.2927152317880795, "grad_norm": 3.3103768825531006, "learning_rate": 1.2374426201174974e-05, "loss": 1.1255, "step": 1708 }, { "epoch": 1.2934720908230841, "grad_norm": 2.3964853286743164, "learning_rate": 1.2366627069082533e-05, "loss": 1.1474, "step": 1709 }, { "epoch": 1.294228949858089, "grad_norm": 2.196171760559082, "learning_rate": 1.235882641214296e-05, "loss": 1.1152, "step": 1710 }, { "epoch": 1.2949858088930937, "grad_norm": 2.2231311798095703, "learning_rate": 1.2351024235384435e-05, "loss": 1.0872, "step": 1711 }, { "epoch": 1.2957426679280983, "grad_norm": 3.2890310287475586, "learning_rate": 1.2343220543836132e-05, "loss": 1.1376, "step": 1712 }, { "epoch": 1.2964995269631032, "grad_norm": 2.37038516998291, "learning_rate": 1.2335415342528186e-05, "loss": 1.1309, "step": 1713 }, { "epoch": 1.2972563859981079, "grad_norm": 2.0955164432525635, "learning_rate": 1.2327608636491706e-05, "loss": 1.1721, "step": 1714 }, { "epoch": 1.2980132450331126, "grad_norm": 2.3492562770843506, "learning_rate": 1.2319800430758787e-05, "loss": 1.1855, "step": 1715 }, { "epoch": 1.2987701040681174, "grad_norm": 2.366028308868408, "learning_rate": 1.231199073036247e-05, "loss": 1.1547, "step": 1716 }, { "epoch": 1.299526963103122, "grad_norm": 2.163280725479126, "learning_rate": 1.230417954033677e-05, "loss": 1.1289, "step": 1717 }, { "epoch": 1.3002838221381268, "grad_norm": 2.1231632232666016, "learning_rate": 1.2296366865716663e-05, "loss": 1.1386, "step": 1718 }, { "epoch": 1.3010406811731314, "grad_norm": 2.1293210983276367, "learning_rate": 1.2288552711538076e-05, "loss": 1.185, "step": 1719 }, { "epoch": 1.3017975402081363, "grad_norm": 2.0795953273773193, "learning_rate": 1.2280737082837903e-05, "loss": 1.0935, "step": 1720 }, { "epoch": 1.302554399243141, "grad_norm": 2.1011762619018555, "learning_rate": 1.2272919984653972e-05, "loss": 1.1672, "step": 1721 }, { "epoch": 1.3033112582781456, "grad_norm": 2.221156120300293, "learning_rate": 1.2265101422025064e-05, "loss": 1.1073, "step": 1722 }, { "epoch": 1.3040681173131503, "grad_norm": 2.2249984741210938, "learning_rate": 1.2257281399990913e-05, "loss": 1.1659, "step": 1723 }, { "epoch": 1.3048249763481552, "grad_norm": 4.573660850524902, "learning_rate": 1.2249459923592176e-05, "loss": 1.1835, "step": 1724 }, { "epoch": 1.3055818353831599, "grad_norm": 2.1640846729278564, "learning_rate": 1.2241636997870459e-05, "loss": 1.1342, "step": 1725 }, { "epoch": 1.3063386944181645, "grad_norm": 2.010333299636841, "learning_rate": 1.223381262786831e-05, "loss": 1.1, "step": 1726 }, { "epoch": 1.3070955534531694, "grad_norm": 2.026108980178833, "learning_rate": 1.2225986818629188e-05, "loss": 1.1424, "step": 1727 }, { "epoch": 1.307852412488174, "grad_norm": 2.0564801692962646, "learning_rate": 1.22181595751975e-05, "loss": 1.1336, "step": 1728 }, { "epoch": 1.3086092715231787, "grad_norm": 1.9752734899520874, "learning_rate": 1.2210330902618555e-05, "loss": 1.1617, "step": 1729 }, { "epoch": 1.3093661305581836, "grad_norm": 2.1132519245147705, "learning_rate": 1.2202500805938606e-05, "loss": 1.1841, "step": 1730 }, { "epoch": 1.3101229895931883, "grad_norm": 2.167475461959839, "learning_rate": 1.2194669290204813e-05, "loss": 1.1255, "step": 1731 }, { "epoch": 1.310879848628193, "grad_norm": 2.1125247478485107, "learning_rate": 1.2186836360465249e-05, "loss": 1.1434, "step": 1732 }, { "epoch": 1.3116367076631978, "grad_norm": 2.126776933670044, "learning_rate": 1.21790020217689e-05, "loss": 1.1626, "step": 1733 }, { "epoch": 1.3123935666982025, "grad_norm": 2.1454262733459473, "learning_rate": 1.2171166279165668e-05, "loss": 1.116, "step": 1734 }, { "epoch": 1.3131504257332072, "grad_norm": 2.166027545928955, "learning_rate": 1.216332913770634e-05, "loss": 1.145, "step": 1735 }, { "epoch": 1.313907284768212, "grad_norm": 2.0442612171173096, "learning_rate": 1.2155490602442628e-05, "loss": 1.1443, "step": 1736 }, { "epoch": 1.3146641438032167, "grad_norm": 2.2408742904663086, "learning_rate": 1.2147650678427136e-05, "loss": 1.1297, "step": 1737 }, { "epoch": 1.3154210028382214, "grad_norm": 2.039287805557251, "learning_rate": 1.213980937071335e-05, "loss": 1.1183, "step": 1738 }, { "epoch": 1.316177861873226, "grad_norm": 2.4958298206329346, "learning_rate": 1.213196668435566e-05, "loss": 1.1127, "step": 1739 }, { "epoch": 1.3169347209082307, "grad_norm": 2.0684995651245117, "learning_rate": 1.212412262440935e-05, "loss": 1.1092, "step": 1740 }, { "epoch": 1.3176915799432356, "grad_norm": 2.2518489360809326, "learning_rate": 1.2116277195930566e-05, "loss": 1.1256, "step": 1741 }, { "epoch": 1.3184484389782403, "grad_norm": 2.2096285820007324, "learning_rate": 1.2108430403976363e-05, "loss": 1.1785, "step": 1742 }, { "epoch": 1.319205298013245, "grad_norm": 2.1161551475524902, "learning_rate": 1.2100582253604663e-05, "loss": 1.1816, "step": 1743 }, { "epoch": 1.3199621570482498, "grad_norm": 2.2261106967926025, "learning_rate": 1.2092732749874258e-05, "loss": 1.1512, "step": 1744 }, { "epoch": 1.3207190160832545, "grad_norm": 2.4035398960113525, "learning_rate": 1.2084881897844827e-05, "loss": 1.1505, "step": 1745 }, { "epoch": 1.3214758751182591, "grad_norm": 2.2090861797332764, "learning_rate": 1.2077029702576898e-05, "loss": 1.1521, "step": 1746 }, { "epoch": 1.322232734153264, "grad_norm": 2.054429769515991, "learning_rate": 1.2069176169131889e-05, "loss": 1.1863, "step": 1747 }, { "epoch": 1.3229895931882687, "grad_norm": 2.0456814765930176, "learning_rate": 1.2061321302572063e-05, "loss": 1.1125, "step": 1748 }, { "epoch": 1.3237464522232734, "grad_norm": 2.066861629486084, "learning_rate": 1.2053465107960536e-05, "loss": 1.1107, "step": 1749 }, { "epoch": 1.3245033112582782, "grad_norm": 2.2116339206695557, "learning_rate": 1.204560759036131e-05, "loss": 1.1796, "step": 1750 }, { "epoch": 1.325260170293283, "grad_norm": 2.355694532394409, "learning_rate": 1.203774875483921e-05, "loss": 1.1221, "step": 1751 }, { "epoch": 1.3260170293282876, "grad_norm": 2.3318047523498535, "learning_rate": 1.202988860645992e-05, "loss": 1.1482, "step": 1752 }, { "epoch": 1.3267738883632925, "grad_norm": 2.3973910808563232, "learning_rate": 1.202202715028998e-05, "loss": 1.09, "step": 1753 }, { "epoch": 1.3275307473982971, "grad_norm": 2.3162357807159424, "learning_rate": 1.2014164391396761e-05, "loss": 1.1362, "step": 1754 }, { "epoch": 1.3282876064333018, "grad_norm": 2.232311964035034, "learning_rate": 1.2006300334848472e-05, "loss": 1.1419, "step": 1755 }, { "epoch": 1.3290444654683065, "grad_norm": 2.3239498138427734, "learning_rate": 1.1998434985714172e-05, "loss": 1.151, "step": 1756 }, { "epoch": 1.3298013245033111, "grad_norm": 2.5459787845611572, "learning_rate": 1.1990568349063742e-05, "loss": 1.1649, "step": 1757 }, { "epoch": 1.330558183538316, "grad_norm": 2.184105396270752, "learning_rate": 1.1982700429967893e-05, "loss": 1.1334, "step": 1758 }, { "epoch": 1.3313150425733207, "grad_norm": 2.092010498046875, "learning_rate": 1.1974831233498175e-05, "loss": 1.0945, "step": 1759 }, { "epoch": 1.3320719016083253, "grad_norm": 2.204160690307617, "learning_rate": 1.1966960764726937e-05, "loss": 1.1411, "step": 1760 }, { "epoch": 1.3328287606433302, "grad_norm": 2.467329978942871, "learning_rate": 1.195908902872738e-05, "loss": 1.1259, "step": 1761 }, { "epoch": 1.3335856196783349, "grad_norm": 2.2322754859924316, "learning_rate": 1.1951216030573489e-05, "loss": 1.1204, "step": 1762 }, { "epoch": 1.3343424787133396, "grad_norm": 2.1422557830810547, "learning_rate": 1.1943341775340087e-05, "loss": 1.1306, "step": 1763 }, { "epoch": 1.3350993377483444, "grad_norm": 2.393411159515381, "learning_rate": 1.1935466268102802e-05, "loss": 1.1409, "step": 1764 }, { "epoch": 1.335856196783349, "grad_norm": 2.2010276317596436, "learning_rate": 1.192758951393806e-05, "loss": 1.0952, "step": 1765 }, { "epoch": 1.3366130558183538, "grad_norm": 2.128002166748047, "learning_rate": 1.1919711517923095e-05, "loss": 1.1084, "step": 1766 }, { "epoch": 1.3373699148533587, "grad_norm": 2.090876340866089, "learning_rate": 1.1911832285135953e-05, "loss": 1.1409, "step": 1767 }, { "epoch": 1.3381267738883633, "grad_norm": 2.232081890106201, "learning_rate": 1.1903951820655458e-05, "loss": 1.176, "step": 1768 }, { "epoch": 1.338883632923368, "grad_norm": 2.2187860012054443, "learning_rate": 1.1896070129561237e-05, "loss": 1.1094, "step": 1769 }, { "epoch": 1.3396404919583729, "grad_norm": 2.2788565158843994, "learning_rate": 1.1888187216933715e-05, "loss": 1.1302, "step": 1770 }, { "epoch": 1.3403973509933775, "grad_norm": 2.153656482696533, "learning_rate": 1.1880303087854093e-05, "loss": 1.1742, "step": 1771 }, { "epoch": 1.3411542100283822, "grad_norm": 2.2120232582092285, "learning_rate": 1.187241774740436e-05, "loss": 1.1553, "step": 1772 }, { "epoch": 1.3419110690633869, "grad_norm": 2.09063720703125, "learning_rate": 1.1864531200667283e-05, "loss": 1.1231, "step": 1773 }, { "epoch": 1.3426679280983917, "grad_norm": 2.186126232147217, "learning_rate": 1.1856643452726417e-05, "loss": 1.1121, "step": 1774 }, { "epoch": 1.3434247871333964, "grad_norm": 2.706040620803833, "learning_rate": 1.1848754508666084e-05, "loss": 1.1323, "step": 1775 }, { "epoch": 1.344181646168401, "grad_norm": 2.1138103008270264, "learning_rate": 1.1840864373571368e-05, "loss": 1.1255, "step": 1776 }, { "epoch": 1.3449385052034057, "grad_norm": 2.1181037425994873, "learning_rate": 1.1832973052528136e-05, "loss": 1.1406, "step": 1777 }, { "epoch": 1.3456953642384106, "grad_norm": 2.0773799419403076, "learning_rate": 1.1825080550623014e-05, "loss": 1.1159, "step": 1778 }, { "epoch": 1.3464522232734153, "grad_norm": 2.3848013877868652, "learning_rate": 1.1817186872943385e-05, "loss": 1.1687, "step": 1779 }, { "epoch": 1.34720908230842, "grad_norm": 2.052957534790039, "learning_rate": 1.1809292024577397e-05, "loss": 1.176, "step": 1780 }, { "epoch": 1.3479659413434248, "grad_norm": 2.1066739559173584, "learning_rate": 1.1801396010613947e-05, "loss": 1.1563, "step": 1781 }, { "epoch": 1.3487228003784295, "grad_norm": 2.2263689041137695, "learning_rate": 1.1793498836142685e-05, "loss": 1.1763, "step": 1782 }, { "epoch": 1.3494796594134342, "grad_norm": 2.263129711151123, "learning_rate": 1.178560050625401e-05, "loss": 1.1737, "step": 1783 }, { "epoch": 1.350236518448439, "grad_norm": 2.3742623329162598, "learning_rate": 1.1777701026039063e-05, "loss": 1.123, "step": 1784 }, { "epoch": 1.3509933774834437, "grad_norm": 2.1886773109436035, "learning_rate": 1.1769800400589733e-05, "loss": 1.1635, "step": 1785 }, { "epoch": 1.3517502365184484, "grad_norm": 2.190129041671753, "learning_rate": 1.1761898634998635e-05, "loss": 1.1131, "step": 1786 }, { "epoch": 1.3525070955534533, "grad_norm": 2.258070468902588, "learning_rate": 1.1753995734359131e-05, "loss": 1.1794, "step": 1787 }, { "epoch": 1.353263954588458, "grad_norm": 2.0639896392822266, "learning_rate": 1.1746091703765316e-05, "loss": 1.1047, "step": 1788 }, { "epoch": 1.3540208136234626, "grad_norm": 2.4623801708221436, "learning_rate": 1.1738186548311998e-05, "loss": 1.1642, "step": 1789 }, { "epoch": 1.3547776726584675, "grad_norm": 2.082294225692749, "learning_rate": 1.1730280273094724e-05, "loss": 1.1374, "step": 1790 }, { "epoch": 1.3555345316934722, "grad_norm": 1.9872076511383057, "learning_rate": 1.1722372883209758e-05, "loss": 1.1305, "step": 1791 }, { "epoch": 1.3562913907284768, "grad_norm": 2.2372727394104004, "learning_rate": 1.1714464383754085e-05, "loss": 1.1261, "step": 1792 }, { "epoch": 1.3570482497634815, "grad_norm": 2.16003680229187, "learning_rate": 1.1706554779825399e-05, "loss": 1.1289, "step": 1793 }, { "epoch": 1.3578051087984861, "grad_norm": 2.2329182624816895, "learning_rate": 1.1698644076522112e-05, "loss": 1.1331, "step": 1794 }, { "epoch": 1.358561967833491, "grad_norm": 2.2425284385681152, "learning_rate": 1.1690732278943344e-05, "loss": 1.2247, "step": 1795 }, { "epoch": 1.3593188268684957, "grad_norm": 2.589672803878784, "learning_rate": 1.1682819392188917e-05, "loss": 1.144, "step": 1796 }, { "epoch": 1.3600756859035004, "grad_norm": 2.2635231018066406, "learning_rate": 1.1674905421359358e-05, "loss": 1.1585, "step": 1797 }, { "epoch": 1.3608325449385053, "grad_norm": 2.291184425354004, "learning_rate": 1.1666990371555893e-05, "loss": 1.1063, "step": 1798 }, { "epoch": 1.36158940397351, "grad_norm": 2.289581298828125, "learning_rate": 1.1659074247880442e-05, "loss": 1.183, "step": 1799 }, { "epoch": 1.3623462630085146, "grad_norm": 2.1125857830047607, "learning_rate": 1.1651157055435616e-05, "loss": 1.1226, "step": 1800 }, { "epoch": 1.3631031220435195, "grad_norm": 2.1084022521972656, "learning_rate": 1.1643238799324714e-05, "loss": 1.1741, "step": 1801 }, { "epoch": 1.3638599810785241, "grad_norm": 2.2463252544403076, "learning_rate": 1.1635319484651733e-05, "loss": 1.1459, "step": 1802 }, { "epoch": 1.3646168401135288, "grad_norm": 2.2021613121032715, "learning_rate": 1.1627399116521334e-05, "loss": 1.1939, "step": 1803 }, { "epoch": 1.3653736991485337, "grad_norm": 2.190654754638672, "learning_rate": 1.1619477700038863e-05, "loss": 1.0967, "step": 1804 }, { "epoch": 1.3661305581835383, "grad_norm": 1.9912575483322144, "learning_rate": 1.1611555240310356e-05, "loss": 1.1268, "step": 1805 }, { "epoch": 1.366887417218543, "grad_norm": 2.1702189445495605, "learning_rate": 1.16036317424425e-05, "loss": 1.1957, "step": 1806 }, { "epoch": 1.367644276253548, "grad_norm": 2.0921695232391357, "learning_rate": 1.1595707211542662e-05, "loss": 1.1161, "step": 1807 }, { "epoch": 1.3684011352885526, "grad_norm": 2.1319305896759033, "learning_rate": 1.1587781652718877e-05, "loss": 1.1411, "step": 1808 }, { "epoch": 1.3691579943235572, "grad_norm": 2.2225658893585205, "learning_rate": 1.1579855071079838e-05, "loss": 1.1259, "step": 1809 }, { "epoch": 1.369914853358562, "grad_norm": 1.943051815032959, "learning_rate": 1.1571927471734894e-05, "loss": 1.1088, "step": 1810 }, { "epoch": 1.3706717123935668, "grad_norm": 2.3888943195343018, "learning_rate": 1.156399885979406e-05, "loss": 1.1416, "step": 1811 }, { "epoch": 1.3714285714285714, "grad_norm": 2.145301580429077, "learning_rate": 1.1556069240368002e-05, "loss": 1.1412, "step": 1812 }, { "epoch": 1.372185430463576, "grad_norm": 2.0973587036132812, "learning_rate": 1.1548138618568023e-05, "loss": 1.1358, "step": 1813 }, { "epoch": 1.3729422894985808, "grad_norm": 2.2998507022857666, "learning_rate": 1.1540206999506086e-05, "loss": 1.152, "step": 1814 }, { "epoch": 1.3736991485335857, "grad_norm": 2.1464428901672363, "learning_rate": 1.1532274388294789e-05, "loss": 1.105, "step": 1815 }, { "epoch": 1.3744560075685903, "grad_norm": 2.0508806705474854, "learning_rate": 1.152434079004738e-05, "loss": 1.1425, "step": 1816 }, { "epoch": 1.375212866603595, "grad_norm": 2.05656099319458, "learning_rate": 1.151640620987772e-05, "loss": 1.1042, "step": 1817 }, { "epoch": 1.3759697256385999, "grad_norm": 2.153604030609131, "learning_rate": 1.1508470652900332e-05, "loss": 1.1361, "step": 1818 }, { "epoch": 1.3767265846736045, "grad_norm": 2.9740560054779053, "learning_rate": 1.1500534124230354e-05, "loss": 1.1646, "step": 1819 }, { "epoch": 1.3774834437086092, "grad_norm": 2.213672637939453, "learning_rate": 1.149259662898354e-05, "loss": 1.1348, "step": 1820 }, { "epoch": 1.378240302743614, "grad_norm": 2.2703373432159424, "learning_rate": 1.148465817227629e-05, "loss": 1.1456, "step": 1821 }, { "epoch": 1.3789971617786188, "grad_norm": 2.1815407276153564, "learning_rate": 1.1476718759225611e-05, "loss": 1.16, "step": 1822 }, { "epoch": 1.3797540208136234, "grad_norm": 2.198272943496704, "learning_rate": 1.1468778394949123e-05, "loss": 1.1677, "step": 1823 }, { "epoch": 1.3805108798486283, "grad_norm": 2.1629281044006348, "learning_rate": 1.1460837084565064e-05, "loss": 1.1269, "step": 1824 }, { "epoch": 1.381267738883633, "grad_norm": 1.9979993104934692, "learning_rate": 1.1452894833192287e-05, "loss": 1.1243, "step": 1825 }, { "epoch": 1.3820245979186376, "grad_norm": 2.1406540870666504, "learning_rate": 1.144495164595024e-05, "loss": 1.1819, "step": 1826 }, { "epoch": 1.3827814569536425, "grad_norm": 2.2074644565582275, "learning_rate": 1.1437007527958985e-05, "loss": 1.1368, "step": 1827 }, { "epoch": 1.3835383159886472, "grad_norm": 2.279019355773926, "learning_rate": 1.1429062484339175e-05, "loss": 1.1293, "step": 1828 }, { "epoch": 1.3842951750236518, "grad_norm": 2.179516315460205, "learning_rate": 1.1421116520212066e-05, "loss": 1.1538, "step": 1829 }, { "epoch": 1.3850520340586565, "grad_norm": 2.0977933406829834, "learning_rate": 1.1413169640699505e-05, "loss": 1.1259, "step": 1830 }, { "epoch": 1.3858088930936612, "grad_norm": 2.1527068614959717, "learning_rate": 1.1405221850923932e-05, "loss": 1.0934, "step": 1831 }, { "epoch": 1.386565752128666, "grad_norm": 2.2525691986083984, "learning_rate": 1.1397273156008364e-05, "loss": 1.2084, "step": 1832 }, { "epoch": 1.3873226111636707, "grad_norm": 2.0335781574249268, "learning_rate": 1.1389323561076419e-05, "loss": 1.1224, "step": 1833 }, { "epoch": 1.3880794701986754, "grad_norm": 2.142940044403076, "learning_rate": 1.1381373071252273e-05, "loss": 1.0934, "step": 1834 }, { "epoch": 1.3888363292336803, "grad_norm": 2.2513163089752197, "learning_rate": 1.1373421691660697e-05, "loss": 1.161, "step": 1835 }, { "epoch": 1.389593188268685, "grad_norm": 2.1784231662750244, "learning_rate": 1.1365469427427037e-05, "loss": 1.127, "step": 1836 }, { "epoch": 1.3903500473036896, "grad_norm": 2.2235348224639893, "learning_rate": 1.1357516283677185e-05, "loss": 1.1595, "step": 1837 }, { "epoch": 1.3911069063386945, "grad_norm": 2.08614182472229, "learning_rate": 1.1349562265537626e-05, "loss": 1.1083, "step": 1838 }, { "epoch": 1.3918637653736992, "grad_norm": 1.9127520322799683, "learning_rate": 1.1341607378135395e-05, "loss": 1.1516, "step": 1839 }, { "epoch": 1.3926206244087038, "grad_norm": 2.100748300552368, "learning_rate": 1.1333651626598095e-05, "loss": 1.1306, "step": 1840 }, { "epoch": 1.3933774834437087, "grad_norm": 2.3174188137054443, "learning_rate": 1.1325695016053878e-05, "loss": 1.1893, "step": 1841 }, { "epoch": 1.3941343424787134, "grad_norm": 2.4146411418914795, "learning_rate": 1.1317737551631455e-05, "loss": 1.1463, "step": 1842 }, { "epoch": 1.394891201513718, "grad_norm": 2.604128837585449, "learning_rate": 1.130977923846009e-05, "loss": 1.131, "step": 1843 }, { "epoch": 1.395648060548723, "grad_norm": 2.1692941188812256, "learning_rate": 1.1301820081669586e-05, "loss": 1.1504, "step": 1844 }, { "epoch": 1.3964049195837276, "grad_norm": 1.9960031509399414, "learning_rate": 1.1293860086390294e-05, "loss": 1.133, "step": 1845 }, { "epoch": 1.3971617786187323, "grad_norm": 2.2130203247070312, "learning_rate": 1.1285899257753105e-05, "loss": 1.1375, "step": 1846 }, { "epoch": 1.397918637653737, "grad_norm": 2.1830358505249023, "learning_rate": 1.1277937600889458e-05, "loss": 1.1391, "step": 1847 }, { "epoch": 1.3986754966887418, "grad_norm": 2.188948392868042, "learning_rate": 1.1269975120931301e-05, "loss": 1.1241, "step": 1848 }, { "epoch": 1.3994323557237465, "grad_norm": 2.3037242889404297, "learning_rate": 1.1262011823011132e-05, "loss": 1.1221, "step": 1849 }, { "epoch": 1.4001892147587511, "grad_norm": 2.0598981380462646, "learning_rate": 1.1254047712261975e-05, "loss": 1.108, "step": 1850 }, { "epoch": 1.4009460737937558, "grad_norm": 2.116628885269165, "learning_rate": 1.1246082793817372e-05, "loss": 1.1166, "step": 1851 }, { "epoch": 1.4017029328287607, "grad_norm": 2.09624981880188, "learning_rate": 1.1238117072811389e-05, "loss": 1.1216, "step": 1852 }, { "epoch": 1.4024597918637653, "grad_norm": 2.092494487762451, "learning_rate": 1.1230150554378606e-05, "loss": 1.1447, "step": 1853 }, { "epoch": 1.40321665089877, "grad_norm": 1.9815505743026733, "learning_rate": 1.1222183243654119e-05, "loss": 1.1939, "step": 1854 }, { "epoch": 1.403973509933775, "grad_norm": 2.285538673400879, "learning_rate": 1.121421514577354e-05, "loss": 1.1981, "step": 1855 }, { "epoch": 1.4047303689687796, "grad_norm": 2.4579432010650635, "learning_rate": 1.1206246265872975e-05, "loss": 1.1246, "step": 1856 }, { "epoch": 1.4054872280037842, "grad_norm": 2.2195796966552734, "learning_rate": 1.1198276609089051e-05, "loss": 1.0943, "step": 1857 }, { "epoch": 1.4062440870387891, "grad_norm": 2.3332061767578125, "learning_rate": 1.1190306180558886e-05, "loss": 1.1896, "step": 1858 }, { "epoch": 1.4070009460737938, "grad_norm": 2.257955551147461, "learning_rate": 1.1182334985420088e-05, "loss": 1.1565, "step": 1859 }, { "epoch": 1.4077578051087984, "grad_norm": 2.1527364253997803, "learning_rate": 1.1174363028810782e-05, "loss": 1.1269, "step": 1860 }, { "epoch": 1.4085146641438033, "grad_norm": 2.168989896774292, "learning_rate": 1.1166390315869555e-05, "loss": 1.118, "step": 1861 }, { "epoch": 1.409271523178808, "grad_norm": 2.1610758304595947, "learning_rate": 1.1158416851735505e-05, "loss": 1.1126, "step": 1862 }, { "epoch": 1.4100283822138127, "grad_norm": 2.423572063446045, "learning_rate": 1.1150442641548205e-05, "loss": 1.1681, "step": 1863 }, { "epoch": 1.4107852412488175, "grad_norm": 2.1142797470092773, "learning_rate": 1.1142467690447708e-05, "loss": 1.1159, "step": 1864 }, { "epoch": 1.4115421002838222, "grad_norm": 2.206160068511963, "learning_rate": 1.1134492003574541e-05, "loss": 1.1007, "step": 1865 }, { "epoch": 1.4122989593188269, "grad_norm": 2.223226547241211, "learning_rate": 1.1126515586069716e-05, "loss": 1.0648, "step": 1866 }, { "epoch": 1.4130558183538315, "grad_norm": 2.488703966140747, "learning_rate": 1.1118538443074713e-05, "loss": 1.107, "step": 1867 }, { "epoch": 1.4138126773888362, "grad_norm": 2.2958545684814453, "learning_rate": 1.1110560579731469e-05, "loss": 1.1553, "step": 1868 }, { "epoch": 1.414569536423841, "grad_norm": 2.343440055847168, "learning_rate": 1.1102582001182399e-05, "loss": 1.1225, "step": 1869 }, { "epoch": 1.4153263954588458, "grad_norm": 2.963460922241211, "learning_rate": 1.1094602712570366e-05, "loss": 1.1211, "step": 1870 }, { "epoch": 1.4160832544938504, "grad_norm": 2.123777151107788, "learning_rate": 1.1086622719038708e-05, "loss": 1.0919, "step": 1871 }, { "epoch": 1.4168401135288553, "grad_norm": 2.1496341228485107, "learning_rate": 1.1078642025731197e-05, "loss": 1.0807, "step": 1872 }, { "epoch": 1.41759697256386, "grad_norm": 2.147340774536133, "learning_rate": 1.107066063779207e-05, "loss": 1.1372, "step": 1873 }, { "epoch": 1.4183538315988646, "grad_norm": 2.05765438079834, "learning_rate": 1.1062678560366013e-05, "loss": 1.1531, "step": 1874 }, { "epoch": 1.4191106906338695, "grad_norm": 2.409080982208252, "learning_rate": 1.1054695798598142e-05, "loss": 1.1531, "step": 1875 }, { "epoch": 1.4198675496688742, "grad_norm": 1.9932847023010254, "learning_rate": 1.104671235763403e-05, "loss": 1.1766, "step": 1876 }, { "epoch": 1.4206244087038788, "grad_norm": 2.2019896507263184, "learning_rate": 1.1038728242619686e-05, "loss": 1.1037, "step": 1877 }, { "epoch": 1.4213812677388837, "grad_norm": 2.263040065765381, "learning_rate": 1.1030743458701533e-05, "loss": 1.1475, "step": 1878 }, { "epoch": 1.4221381267738884, "grad_norm": 2.0611464977264404, "learning_rate": 1.1022758011026455e-05, "loss": 1.1652, "step": 1879 }, { "epoch": 1.422894985808893, "grad_norm": 2.175058364868164, "learning_rate": 1.1014771904741746e-05, "loss": 1.1164, "step": 1880 }, { "epoch": 1.423651844843898, "grad_norm": 2.152921676635742, "learning_rate": 1.1006785144995127e-05, "loss": 1.1327, "step": 1881 }, { "epoch": 1.4244087038789026, "grad_norm": 2.4787025451660156, "learning_rate": 1.0998797736934743e-05, "loss": 1.1633, "step": 1882 }, { "epoch": 1.4251655629139073, "grad_norm": 2.7934088706970215, "learning_rate": 1.0990809685709149e-05, "loss": 1.1831, "step": 1883 }, { "epoch": 1.425922421948912, "grad_norm": 2.058727502822876, "learning_rate": 1.0982820996467334e-05, "loss": 1.1262, "step": 1884 }, { "epoch": 1.4266792809839166, "grad_norm": 2.1673519611358643, "learning_rate": 1.0974831674358674e-05, "loss": 1.1478, "step": 1885 }, { "epoch": 1.4274361400189215, "grad_norm": 2.481576442718506, "learning_rate": 1.0966841724532966e-05, "loss": 1.1166, "step": 1886 }, { "epoch": 1.4281929990539262, "grad_norm": 2.131117820739746, "learning_rate": 1.0958851152140413e-05, "loss": 1.084, "step": 1887 }, { "epoch": 1.4289498580889308, "grad_norm": 2.3017077445983887, "learning_rate": 1.095085996233162e-05, "loss": 1.1406, "step": 1888 }, { "epoch": 1.4297067171239357, "grad_norm": 2.4619855880737305, "learning_rate": 1.0942868160257574e-05, "loss": 1.1287, "step": 1889 }, { "epoch": 1.4304635761589404, "grad_norm": 2.3940885066986084, "learning_rate": 1.0934875751069679e-05, "loss": 1.1135, "step": 1890 }, { "epoch": 1.431220435193945, "grad_norm": 2.3564260005950928, "learning_rate": 1.0926882739919718e-05, "loss": 1.1474, "step": 1891 }, { "epoch": 1.43197729422895, "grad_norm": 2.1630711555480957, "learning_rate": 1.091888913195986e-05, "loss": 1.1622, "step": 1892 }, { "epoch": 1.4327341532639546, "grad_norm": 2.173370838165283, "learning_rate": 1.0910894932342666e-05, "loss": 1.1384, "step": 1893 }, { "epoch": 1.4334910122989593, "grad_norm": 2.09320068359375, "learning_rate": 1.0902900146221075e-05, "loss": 1.1625, "step": 1894 }, { "epoch": 1.4342478713339641, "grad_norm": 2.508751630783081, "learning_rate": 1.0894904778748406e-05, "loss": 1.1457, "step": 1895 }, { "epoch": 1.4350047303689688, "grad_norm": 2.234450578689575, "learning_rate": 1.0886908835078349e-05, "loss": 1.1785, "step": 1896 }, { "epoch": 1.4357615894039735, "grad_norm": 2.431640148162842, "learning_rate": 1.0878912320364962e-05, "loss": 1.1465, "step": 1897 }, { "epoch": 1.4365184484389784, "grad_norm": 2.068406581878662, "learning_rate": 1.087091523976269e-05, "loss": 1.1386, "step": 1898 }, { "epoch": 1.437275307473983, "grad_norm": 2.1216137409210205, "learning_rate": 1.0862917598426315e-05, "loss": 1.1177, "step": 1899 }, { "epoch": 1.4380321665089877, "grad_norm": 2.36860990524292, "learning_rate": 1.0854919401511002e-05, "loss": 1.1612, "step": 1900 }, { "epoch": 1.4387890255439924, "grad_norm": 2.2473835945129395, "learning_rate": 1.0846920654172264e-05, "loss": 1.1123, "step": 1901 }, { "epoch": 1.4395458845789972, "grad_norm": 2.1833431720733643, "learning_rate": 1.0838921361565978e-05, "loss": 1.133, "step": 1902 }, { "epoch": 1.440302743614002, "grad_norm": 2.078742742538452, "learning_rate": 1.0830921528848355e-05, "loss": 1.1634, "step": 1903 }, { "epoch": 1.4410596026490066, "grad_norm": 2.1691906452178955, "learning_rate": 1.0822921161175974e-05, "loss": 1.1557, "step": 1904 }, { "epoch": 1.4418164616840112, "grad_norm": 2.200441598892212, "learning_rate": 1.0814920263705746e-05, "loss": 1.1438, "step": 1905 }, { "epoch": 1.4425733207190161, "grad_norm": 2.1800084114074707, "learning_rate": 1.0806918841594929e-05, "loss": 1.1395, "step": 1906 }, { "epoch": 1.4433301797540208, "grad_norm": 2.34407901763916, "learning_rate": 1.0798916900001117e-05, "loss": 1.1448, "step": 1907 }, { "epoch": 1.4440870387890254, "grad_norm": 2.1149091720581055, "learning_rate": 1.0790914444082244e-05, "loss": 1.1664, "step": 1908 }, { "epoch": 1.4448438978240303, "grad_norm": 2.3421874046325684, "learning_rate": 1.0782911478996559e-05, "loss": 1.1109, "step": 1909 }, { "epoch": 1.445600756859035, "grad_norm": 2.139888286590576, "learning_rate": 1.0774908009902663e-05, "loss": 1.1852, "step": 1910 }, { "epoch": 1.4463576158940397, "grad_norm": 2.1743266582489014, "learning_rate": 1.0766904041959465e-05, "loss": 1.0994, "step": 1911 }, { "epoch": 1.4471144749290445, "grad_norm": 2.0239481925964355, "learning_rate": 1.0758899580326203e-05, "loss": 1.0712, "step": 1912 }, { "epoch": 1.4478713339640492, "grad_norm": 2.5557572841644287, "learning_rate": 1.0750894630162429e-05, "loss": 1.0855, "step": 1913 }, { "epoch": 1.4486281929990539, "grad_norm": 2.1770548820495605, "learning_rate": 1.0742889196628014e-05, "loss": 1.1541, "step": 1914 }, { "epoch": 1.4493850520340588, "grad_norm": 2.065044641494751, "learning_rate": 1.073488328488314e-05, "loss": 1.0722, "step": 1915 }, { "epoch": 1.4501419110690634, "grad_norm": 2.274731159210205, "learning_rate": 1.0726876900088287e-05, "loss": 1.1562, "step": 1916 }, { "epoch": 1.450898770104068, "grad_norm": 2.2915658950805664, "learning_rate": 1.0718870047404253e-05, "loss": 1.1573, "step": 1917 }, { "epoch": 1.451655629139073, "grad_norm": 2.391997814178467, "learning_rate": 1.0710862731992138e-05, "loss": 1.1434, "step": 1918 }, { "epoch": 1.4524124881740776, "grad_norm": 2.3179776668548584, "learning_rate": 1.0702854959013332e-05, "loss": 1.1352, "step": 1919 }, { "epoch": 1.4531693472090823, "grad_norm": 2.223360776901245, "learning_rate": 1.0694846733629519e-05, "loss": 1.1152, "step": 1920 }, { "epoch": 1.453926206244087, "grad_norm": 2.222038984298706, "learning_rate": 1.0686838061002684e-05, "loss": 1.0796, "step": 1921 }, { "epoch": 1.4546830652790916, "grad_norm": 2.1372921466827393, "learning_rate": 1.0678828946295099e-05, "loss": 1.1047, "step": 1922 }, { "epoch": 1.4554399243140965, "grad_norm": 2.1707942485809326, "learning_rate": 1.0670819394669308e-05, "loss": 1.1509, "step": 1923 }, { "epoch": 1.4561967833491012, "grad_norm": 2.112736463546753, "learning_rate": 1.066280941128815e-05, "loss": 1.1266, "step": 1924 }, { "epoch": 1.4569536423841059, "grad_norm": 2.3490540981292725, "learning_rate": 1.065479900131474e-05, "loss": 1.1621, "step": 1925 }, { "epoch": 1.4577105014191107, "grad_norm": 2.17901873588562, "learning_rate": 1.0646788169912465e-05, "loss": 1.0735, "step": 1926 }, { "epoch": 1.4584673604541154, "grad_norm": 2.0860230922698975, "learning_rate": 1.0638776922244982e-05, "loss": 1.1362, "step": 1927 }, { "epoch": 1.45922421948912, "grad_norm": 2.0391974449157715, "learning_rate": 1.0630765263476221e-05, "loss": 1.1316, "step": 1928 }, { "epoch": 1.459981078524125, "grad_norm": 2.0687365531921387, "learning_rate": 1.062275319877038e-05, "loss": 1.128, "step": 1929 }, { "epoch": 1.4607379375591296, "grad_norm": 2.0217580795288086, "learning_rate": 1.0614740733291902e-05, "loss": 1.1377, "step": 1930 }, { "epoch": 1.4614947965941343, "grad_norm": 2.0296125411987305, "learning_rate": 1.060672787220551e-05, "loss": 1.1236, "step": 1931 }, { "epoch": 1.4622516556291392, "grad_norm": 2.0273191928863525, "learning_rate": 1.0598714620676171e-05, "loss": 1.1271, "step": 1932 }, { "epoch": 1.4630085146641438, "grad_norm": 2.011613130569458, "learning_rate": 1.05907009838691e-05, "loss": 1.1457, "step": 1933 }, { "epoch": 1.4637653736991485, "grad_norm": 2.0048105716705322, "learning_rate": 1.058268696694977e-05, "loss": 1.1499, "step": 1934 }, { "epoch": 1.4645222327341534, "grad_norm": 2.086610794067383, "learning_rate": 1.0574672575083891e-05, "loss": 1.1376, "step": 1935 }, { "epoch": 1.465279091769158, "grad_norm": 2.2125232219696045, "learning_rate": 1.0566657813437419e-05, "loss": 1.1103, "step": 1936 }, { "epoch": 1.4660359508041627, "grad_norm": 2.172860622406006, "learning_rate": 1.0558642687176548e-05, "loss": 1.1306, "step": 1937 }, { "epoch": 1.4667928098391674, "grad_norm": 2.1361825466156006, "learning_rate": 1.0550627201467702e-05, "loss": 1.0978, "step": 1938 }, { "epoch": 1.4675496688741723, "grad_norm": 2.0148260593414307, "learning_rate": 1.0542611361477548e-05, "loss": 1.0851, "step": 1939 }, { "epoch": 1.468306527909177, "grad_norm": 2.103895664215088, "learning_rate": 1.0534595172372967e-05, "loss": 1.1197, "step": 1940 }, { "epoch": 1.4690633869441816, "grad_norm": 2.1808462142944336, "learning_rate": 1.0526578639321078e-05, "loss": 1.1192, "step": 1941 }, { "epoch": 1.4698202459791863, "grad_norm": 2.2360849380493164, "learning_rate": 1.0518561767489211e-05, "loss": 1.1142, "step": 1942 }, { "epoch": 1.4705771050141911, "grad_norm": 2.243360996246338, "learning_rate": 1.0510544562044925e-05, "loss": 1.2133, "step": 1943 }, { "epoch": 1.4713339640491958, "grad_norm": 2.07759690284729, "learning_rate": 1.050252702815598e-05, "loss": 1.1227, "step": 1944 }, { "epoch": 1.4720908230842005, "grad_norm": 2.0380797386169434, "learning_rate": 1.0494509170990362e-05, "loss": 1.0894, "step": 1945 }, { "epoch": 1.4728476821192054, "grad_norm": 2.184549570083618, "learning_rate": 1.0486490995716264e-05, "loss": 1.1314, "step": 1946 }, { "epoch": 1.47360454115421, "grad_norm": 2.1510207653045654, "learning_rate": 1.0478472507502069e-05, "loss": 1.1688, "step": 1947 }, { "epoch": 1.4743614001892147, "grad_norm": 2.1699905395507812, "learning_rate": 1.0470453711516377e-05, "loss": 1.1374, "step": 1948 }, { "epoch": 1.4751182592242196, "grad_norm": 2.0163750648498535, "learning_rate": 1.0462434612927984e-05, "loss": 1.1469, "step": 1949 }, { "epoch": 1.4758751182592242, "grad_norm": 2.176668882369995, "learning_rate": 1.0454415216905875e-05, "loss": 1.154, "step": 1950 }, { "epoch": 1.476631977294229, "grad_norm": 2.18507981300354, "learning_rate": 1.0446395528619236e-05, "loss": 1.1175, "step": 1951 }, { "epoch": 1.4773888363292338, "grad_norm": 2.033001661300659, "learning_rate": 1.0438375553237428e-05, "loss": 1.1129, "step": 1952 }, { "epoch": 1.4781456953642385, "grad_norm": 2.0419886112213135, "learning_rate": 1.0430355295930008e-05, "loss": 1.1455, "step": 1953 }, { "epoch": 1.4789025543992431, "grad_norm": 2.083308696746826, "learning_rate": 1.0422334761866715e-05, "loss": 1.1069, "step": 1954 }, { "epoch": 1.479659413434248, "grad_norm": 2.0463309288024902, "learning_rate": 1.0414313956217456e-05, "loss": 1.1456, "step": 1955 }, { "epoch": 1.4804162724692527, "grad_norm": 2.0065863132476807, "learning_rate": 1.0406292884152327e-05, "loss": 1.0829, "step": 1956 }, { "epoch": 1.4811731315042573, "grad_norm": 1.8798035383224487, "learning_rate": 1.0398271550841586e-05, "loss": 1.1378, "step": 1957 }, { "epoch": 1.481929990539262, "grad_norm": 2.483062267303467, "learning_rate": 1.0390249961455658e-05, "loss": 1.0775, "step": 1958 }, { "epoch": 1.4826868495742667, "grad_norm": 1.995613694190979, "learning_rate": 1.0382228121165146e-05, "loss": 1.1936, "step": 1959 }, { "epoch": 1.4834437086092715, "grad_norm": 2.1545281410217285, "learning_rate": 1.03742060351408e-05, "loss": 1.1802, "step": 1960 }, { "epoch": 1.4842005676442762, "grad_norm": 2.1138501167297363, "learning_rate": 1.0366183708553532e-05, "loss": 1.102, "step": 1961 }, { "epoch": 1.4849574266792809, "grad_norm": 2.1736159324645996, "learning_rate": 1.0358161146574417e-05, "loss": 1.1844, "step": 1962 }, { "epoch": 1.4857142857142858, "grad_norm": 2.0476620197296143, "learning_rate": 1.0350138354374675e-05, "loss": 1.1117, "step": 1963 }, { "epoch": 1.4864711447492904, "grad_norm": 2.070690631866455, "learning_rate": 1.034211533712567e-05, "loss": 1.0858, "step": 1964 }, { "epoch": 1.487228003784295, "grad_norm": 2.256793975830078, "learning_rate": 1.0334092099998926e-05, "loss": 1.1564, "step": 1965 }, { "epoch": 1.4879848628193, "grad_norm": 1.8769042491912842, "learning_rate": 1.0326068648166088e-05, "loss": 1.1211, "step": 1966 }, { "epoch": 1.4887417218543046, "grad_norm": 2.271409749984741, "learning_rate": 1.0318044986798961e-05, "loss": 1.1329, "step": 1967 }, { "epoch": 1.4894985808893093, "grad_norm": 2.035731315612793, "learning_rate": 1.031002112106947e-05, "loss": 1.0566, "step": 1968 }, { "epoch": 1.4902554399243142, "grad_norm": 1.9846116304397583, "learning_rate": 1.0301997056149678e-05, "loss": 1.1373, "step": 1969 }, { "epoch": 1.4910122989593189, "grad_norm": 2.304295301437378, "learning_rate": 1.0293972797211774e-05, "loss": 1.1098, "step": 1970 }, { "epoch": 1.4917691579943235, "grad_norm": 2.189412832260132, "learning_rate": 1.028594834942807e-05, "loss": 1.1026, "step": 1971 }, { "epoch": 1.4925260170293284, "grad_norm": 2.1527864933013916, "learning_rate": 1.0277923717971006e-05, "loss": 1.1262, "step": 1972 }, { "epoch": 1.493282876064333, "grad_norm": 2.20159912109375, "learning_rate": 1.026989890801314e-05, "loss": 1.092, "step": 1973 }, { "epoch": 1.4940397350993377, "grad_norm": 2.2014966011047363, "learning_rate": 1.0261873924727138e-05, "loss": 1.1267, "step": 1974 }, { "epoch": 1.4947965941343424, "grad_norm": 2.194817304611206, "learning_rate": 1.0253848773285778e-05, "loss": 1.1565, "step": 1975 }, { "epoch": 1.4955534531693473, "grad_norm": 2.061915636062622, "learning_rate": 1.0245823458861958e-05, "loss": 1.1291, "step": 1976 }, { "epoch": 1.496310312204352, "grad_norm": 2.1642725467681885, "learning_rate": 1.0237797986628672e-05, "loss": 1.1161, "step": 1977 }, { "epoch": 1.4970671712393566, "grad_norm": 2.0526773929595947, "learning_rate": 1.022977236175901e-05, "loss": 1.1583, "step": 1978 }, { "epoch": 1.4978240302743613, "grad_norm": 2.0349247455596924, "learning_rate": 1.0221746589426176e-05, "loss": 1.1161, "step": 1979 }, { "epoch": 1.4985808893093662, "grad_norm": 2.1763689517974854, "learning_rate": 1.0213720674803458e-05, "loss": 1.1344, "step": 1980 }, { "epoch": 1.4993377483443708, "grad_norm": 2.139963150024414, "learning_rate": 1.0205694623064236e-05, "loss": 1.2086, "step": 1981 }, { "epoch": 1.5000946073793755, "grad_norm": 2.633737564086914, "learning_rate": 1.0197668439381978e-05, "loss": 1.1523, "step": 1982 }, { "epoch": 1.5008514664143804, "grad_norm": 2.0594277381896973, "learning_rate": 1.0189642128930246e-05, "loss": 1.1436, "step": 1983 }, { "epoch": 1.501608325449385, "grad_norm": 2.1511809825897217, "learning_rate": 1.0181615696882676e-05, "loss": 1.1195, "step": 1984 }, { "epoch": 1.5023651844843897, "grad_norm": 1.992146372795105, "learning_rate": 1.0173589148412981e-05, "loss": 1.1534, "step": 1985 }, { "epoch": 1.5031220435193946, "grad_norm": 2.000650644302368, "learning_rate": 1.0165562488694953e-05, "loss": 1.1158, "step": 1986 }, { "epoch": 1.5038789025543993, "grad_norm": 2.0944910049438477, "learning_rate": 1.0157535722902456e-05, "loss": 1.0991, "step": 1987 }, { "epoch": 1.504635761589404, "grad_norm": 2.3380539417266846, "learning_rate": 1.0149508856209416e-05, "loss": 1.148, "step": 1988 }, { "epoch": 1.5053926206244088, "grad_norm": 2.1337814331054688, "learning_rate": 1.014148189378983e-05, "loss": 1.1508, "step": 1989 }, { "epoch": 1.5061494796594135, "grad_norm": 2.069946765899658, "learning_rate": 1.0133454840817765e-05, "loss": 1.1449, "step": 1990 }, { "epoch": 1.5069063386944181, "grad_norm": 2.4194324016571045, "learning_rate": 1.0125427702467327e-05, "loss": 1.1833, "step": 1991 }, { "epoch": 1.507663197729423, "grad_norm": 2.0037777423858643, "learning_rate": 1.0117400483912687e-05, "loss": 1.1053, "step": 1992 }, { "epoch": 1.5084200567644275, "grad_norm": 1.9638372659683228, "learning_rate": 1.010937319032807e-05, "loss": 1.1249, "step": 1993 }, { "epoch": 1.5091769157994324, "grad_norm": 2.185102939605713, "learning_rate": 1.0101345826887752e-05, "loss": 1.1369, "step": 1994 }, { "epoch": 1.5099337748344372, "grad_norm": 2.193578004837036, "learning_rate": 1.0093318398766042e-05, "loss": 1.1268, "step": 1995 }, { "epoch": 1.5106906338694417, "grad_norm": 2.1746068000793457, "learning_rate": 1.0085290911137298e-05, "loss": 1.1316, "step": 1996 }, { "epoch": 1.5114474929044466, "grad_norm": 2.308969736099243, "learning_rate": 1.0077263369175918e-05, "loss": 1.11, "step": 1997 }, { "epoch": 1.5122043519394512, "grad_norm": 2.2050511837005615, "learning_rate": 1.0069235778056336e-05, "loss": 1.1363, "step": 1998 }, { "epoch": 1.512961210974456, "grad_norm": 2.351792812347412, "learning_rate": 1.0061208142953012e-05, "loss": 1.1222, "step": 1999 }, { "epoch": 1.5137180700094608, "grad_norm": 2.144644021987915, "learning_rate": 1.0053180469040433e-05, "loss": 1.0997, "step": 2000 }, { "epoch": 1.5144749290444655, "grad_norm": 2.1637988090515137, "learning_rate": 1.0045152761493127e-05, "loss": 1.0968, "step": 2001 }, { "epoch": 1.5152317880794701, "grad_norm": 2.200721502304077, "learning_rate": 1.0037125025485616e-05, "loss": 1.1016, "step": 2002 }, { "epoch": 1.515988647114475, "grad_norm": 2.3035366535186768, "learning_rate": 1.0029097266192467e-05, "loss": 1.1659, "step": 2003 }, { "epoch": 1.5167455061494797, "grad_norm": 2.34773588180542, "learning_rate": 1.0021069488788253e-05, "loss": 1.0888, "step": 2004 }, { "epoch": 1.5175023651844843, "grad_norm": 2.268134117126465, "learning_rate": 1.0013041698447547e-05, "loss": 1.1519, "step": 2005 }, { "epoch": 1.5182592242194892, "grad_norm": 2.331434726715088, "learning_rate": 1.000501390034495e-05, "loss": 1.1335, "step": 2006 }, { "epoch": 1.5190160832544939, "grad_norm": 2.3400261402130127, "learning_rate": 9.996986099655052e-06, "loss": 1.1808, "step": 2007 }, { "epoch": 1.5197729422894986, "grad_norm": 2.348576068878174, "learning_rate": 9.988958301552454e-06, "loss": 1.1358, "step": 2008 }, { "epoch": 1.5205298013245034, "grad_norm": 2.131770610809326, "learning_rate": 9.980930511211751e-06, "loss": 1.0952, "step": 2009 }, { "epoch": 1.5212866603595079, "grad_norm": 2.3337466716766357, "learning_rate": 9.972902733807532e-06, "loss": 1.1449, "step": 2010 }, { "epoch": 1.5220435193945128, "grad_norm": 2.0936343669891357, "learning_rate": 9.964874974514386e-06, "loss": 1.1176, "step": 2011 }, { "epoch": 1.5228003784295177, "grad_norm": 2.0963313579559326, "learning_rate": 9.95684723850688e-06, "loss": 1.1481, "step": 2012 }, { "epoch": 1.523557237464522, "grad_norm": 2.055452823638916, "learning_rate": 9.948819530959566e-06, "loss": 1.1008, "step": 2013 }, { "epoch": 1.524314096499527, "grad_norm": 2.257266044616699, "learning_rate": 9.94079185704699e-06, "loss": 1.1544, "step": 2014 }, { "epoch": 1.5250709555345316, "grad_norm": 2.06075119972229, "learning_rate": 9.932764221943666e-06, "loss": 1.1153, "step": 2015 }, { "epoch": 1.5258278145695363, "grad_norm": 2.2544174194335938, "learning_rate": 9.924736630824083e-06, "loss": 1.0718, "step": 2016 }, { "epoch": 1.5265846736045412, "grad_norm": 2.1009559631347656, "learning_rate": 9.916709088862707e-06, "loss": 1.137, "step": 2017 }, { "epoch": 1.5273415326395459, "grad_norm": 2.014848470687866, "learning_rate": 9.908681601233964e-06, "loss": 1.1268, "step": 2018 }, { "epoch": 1.5280983916745505, "grad_norm": 2.2673892974853516, "learning_rate": 9.900654173112251e-06, "loss": 1.1528, "step": 2019 }, { "epoch": 1.5288552507095554, "grad_norm": 2.206071138381958, "learning_rate": 9.89262680967193e-06, "loss": 1.1013, "step": 2020 }, { "epoch": 1.52961210974456, "grad_norm": 2.102032423019409, "learning_rate": 9.884599516087314e-06, "loss": 1.105, "step": 2021 }, { "epoch": 1.5303689687795647, "grad_norm": 2.1362051963806152, "learning_rate": 9.876572297532677e-06, "loss": 1.1479, "step": 2022 }, { "epoch": 1.5311258278145696, "grad_norm": 2.0666024684906006, "learning_rate": 9.868545159182238e-06, "loss": 1.1257, "step": 2023 }, { "epoch": 1.5318826868495743, "grad_norm": 1.9618515968322754, "learning_rate": 9.860518106210167e-06, "loss": 1.0649, "step": 2024 }, { "epoch": 1.532639545884579, "grad_norm": 2.202753782272339, "learning_rate": 9.852491143790587e-06, "loss": 1.1016, "step": 2025 }, { "epoch": 1.5333964049195838, "grad_norm": 1.9656624794006348, "learning_rate": 9.844464277097549e-06, "loss": 1.1435, "step": 2026 }, { "epoch": 1.5341532639545885, "grad_norm": 2.06479811668396, "learning_rate": 9.83643751130505e-06, "loss": 1.1393, "step": 2027 }, { "epoch": 1.5349101229895932, "grad_norm": 2.1722230911254883, "learning_rate": 9.828410851587023e-06, "loss": 1.1587, "step": 2028 }, { "epoch": 1.535666982024598, "grad_norm": 2.090200901031494, "learning_rate": 9.820384303117328e-06, "loss": 1.109, "step": 2029 }, { "epoch": 1.5364238410596025, "grad_norm": 1.9950278997421265, "learning_rate": 9.812357871069754e-06, "loss": 1.0648, "step": 2030 }, { "epoch": 1.5371807000946074, "grad_norm": 2.141153573989868, "learning_rate": 9.804331560618023e-06, "loss": 1.1327, "step": 2031 }, { "epoch": 1.5379375591296123, "grad_norm": 1.9659839868545532, "learning_rate": 9.79630537693577e-06, "loss": 1.1194, "step": 2032 }, { "epoch": 1.5386944181646167, "grad_norm": 2.187727212905884, "learning_rate": 9.788279325196547e-06, "loss": 1.1225, "step": 2033 }, { "epoch": 1.5394512771996216, "grad_norm": 2.4570298194885254, "learning_rate": 9.780253410573827e-06, "loss": 1.1252, "step": 2034 }, { "epoch": 1.5402081362346263, "grad_norm": 2.2557146549224854, "learning_rate": 9.772227638240993e-06, "loss": 1.1698, "step": 2035 }, { "epoch": 1.540964995269631, "grad_norm": 2.384152889251709, "learning_rate": 9.764202013371333e-06, "loss": 1.1447, "step": 2036 }, { "epoch": 1.5417218543046358, "grad_norm": 2.0934481620788574, "learning_rate": 9.756176541138045e-06, "loss": 1.1429, "step": 2037 }, { "epoch": 1.5424787133396405, "grad_norm": 2.418853282928467, "learning_rate": 9.748151226714222e-06, "loss": 1.1321, "step": 2038 }, { "epoch": 1.5432355723746451, "grad_norm": 2.0321297645568848, "learning_rate": 9.740126075272868e-06, "loss": 1.0983, "step": 2039 }, { "epoch": 1.54399243140965, "grad_norm": 2.088118076324463, "learning_rate": 9.732101091986864e-06, "loss": 1.1566, "step": 2040 }, { "epoch": 1.5447492904446547, "grad_norm": 2.135477066040039, "learning_rate": 9.724076282028993e-06, "loss": 1.0886, "step": 2041 }, { "epoch": 1.5455061494796594, "grad_norm": 2.1262335777282715, "learning_rate": 9.716051650571933e-06, "loss": 1.1461, "step": 2042 }, { "epoch": 1.5462630085146643, "grad_norm": 2.2694787979125977, "learning_rate": 9.708027202788229e-06, "loss": 1.1725, "step": 2043 }, { "epoch": 1.547019867549669, "grad_norm": 2.0884077548980713, "learning_rate": 9.700002943850323e-06, "loss": 1.1098, "step": 2044 }, { "epoch": 1.5477767265846736, "grad_norm": 2.5522899627685547, "learning_rate": 9.691978878930532e-06, "loss": 1.0861, "step": 2045 }, { "epoch": 1.5485335856196785, "grad_norm": 2.099339008331299, "learning_rate": 9.68395501320104e-06, "loss": 1.135, "step": 2046 }, { "epoch": 1.549290444654683, "grad_norm": 2.0966038703918457, "learning_rate": 9.675931351833911e-06, "loss": 1.1468, "step": 2047 }, { "epoch": 1.5500473036896878, "grad_norm": 1.972170352935791, "learning_rate": 9.667907900001079e-06, "loss": 1.0958, "step": 2048 }, { "epoch": 1.5508041627246927, "grad_norm": 2.228671073913574, "learning_rate": 9.659884662874332e-06, "loss": 1.1338, "step": 2049 }, { "epoch": 1.5515610217596971, "grad_norm": 1.9483565092086792, "learning_rate": 9.65186164562533e-06, "loss": 1.0757, "step": 2050 }, { "epoch": 1.552317880794702, "grad_norm": 2.0136473178863525, "learning_rate": 9.643838853425586e-06, "loss": 1.1177, "step": 2051 }, { "epoch": 1.5530747398297067, "grad_norm": 2.0868184566497803, "learning_rate": 9.635816291446469e-06, "loss": 1.1814, "step": 2052 }, { "epoch": 1.5538315988647113, "grad_norm": 1.9814927577972412, "learning_rate": 9.627793964859205e-06, "loss": 1.1342, "step": 2053 }, { "epoch": 1.5545884578997162, "grad_norm": 2.0822665691375732, "learning_rate": 9.619771878834858e-06, "loss": 1.0962, "step": 2054 }, { "epoch": 1.555345316934721, "grad_norm": 2.0414929389953613, "learning_rate": 9.611750038544343e-06, "loss": 1.1178, "step": 2055 }, { "epoch": 1.5561021759697256, "grad_norm": 2.134589672088623, "learning_rate": 9.60372844915842e-06, "loss": 1.133, "step": 2056 }, { "epoch": 1.5568590350047304, "grad_norm": 2.3856427669525146, "learning_rate": 9.595707115847676e-06, "loss": 1.1252, "step": 2057 }, { "epoch": 1.557615894039735, "grad_norm": 2.275172710418701, "learning_rate": 9.587686043782545e-06, "loss": 1.1195, "step": 2058 }, { "epoch": 1.5583727530747398, "grad_norm": 2.2985713481903076, "learning_rate": 9.579665238133291e-06, "loss": 1.148, "step": 2059 }, { "epoch": 1.5591296121097447, "grad_norm": 2.0514907836914062, "learning_rate": 9.571644704069995e-06, "loss": 1.1307, "step": 2060 }, { "epoch": 1.5598864711447493, "grad_norm": 2.2182204723358154, "learning_rate": 9.563624446762576e-06, "loss": 1.0913, "step": 2061 }, { "epoch": 1.560643330179754, "grad_norm": 2.628448486328125, "learning_rate": 9.555604471380767e-06, "loss": 1.1385, "step": 2062 }, { "epoch": 1.5614001892147589, "grad_norm": 1.9690390825271606, "learning_rate": 9.547584783094126e-06, "loss": 1.1314, "step": 2063 }, { "epoch": 1.5621570482497635, "grad_norm": 2.0425853729248047, "learning_rate": 9.539565387072019e-06, "loss": 1.1048, "step": 2064 }, { "epoch": 1.5629139072847682, "grad_norm": 2.025308609008789, "learning_rate": 9.531546288483624e-06, "loss": 1.1012, "step": 2065 }, { "epoch": 1.563670766319773, "grad_norm": 2.17830491065979, "learning_rate": 9.523527492497934e-06, "loss": 1.1926, "step": 2066 }, { "epoch": 1.5644276253547775, "grad_norm": 3.244462251663208, "learning_rate": 9.51550900428374e-06, "loss": 1.0953, "step": 2067 }, { "epoch": 1.5651844843897824, "grad_norm": 2.0593700408935547, "learning_rate": 9.507490829009639e-06, "loss": 1.1161, "step": 2068 }, { "epoch": 1.565941343424787, "grad_norm": 2.0308477878570557, "learning_rate": 9.49947297184402e-06, "loss": 1.0959, "step": 2069 }, { "epoch": 1.5666982024597917, "grad_norm": 2.1143085956573486, "learning_rate": 9.491455437955081e-06, "loss": 1.1541, "step": 2070 }, { "epoch": 1.5674550614947966, "grad_norm": 2.3376524448394775, "learning_rate": 9.483438232510792e-06, "loss": 1.1283, "step": 2071 }, { "epoch": 1.5682119205298013, "grad_norm": 2.194188117980957, "learning_rate": 9.475421360678926e-06, "loss": 1.1256, "step": 2072 }, { "epoch": 1.568968779564806, "grad_norm": 2.12689208984375, "learning_rate": 9.467404827627036e-06, "loss": 1.1638, "step": 2073 }, { "epoch": 1.5697256385998108, "grad_norm": 1.9852758646011353, "learning_rate": 9.459388638522455e-06, "loss": 1.1458, "step": 2074 }, { "epoch": 1.5704824976348155, "grad_norm": 1.958489179611206, "learning_rate": 9.4513727985323e-06, "loss": 1.1222, "step": 2075 }, { "epoch": 1.5712393566698202, "grad_norm": 2.1876025199890137, "learning_rate": 9.443357312823454e-06, "loss": 1.1322, "step": 2076 }, { "epoch": 1.571996215704825, "grad_norm": 2.1041505336761475, "learning_rate": 9.435342186562582e-06, "loss": 1.1237, "step": 2077 }, { "epoch": 1.5727530747398297, "grad_norm": 2.247180461883545, "learning_rate": 9.427327424916113e-06, "loss": 1.056, "step": 2078 }, { "epoch": 1.5735099337748344, "grad_norm": 2.184521436691284, "learning_rate": 9.419313033050232e-06, "loss": 1.1022, "step": 2079 }, { "epoch": 1.5742667928098393, "grad_norm": 2.078411340713501, "learning_rate": 9.411299016130902e-06, "loss": 1.1526, "step": 2080 }, { "epoch": 1.575023651844844, "grad_norm": 2.154078245162964, "learning_rate": 9.403285379323833e-06, "loss": 1.2138, "step": 2081 }, { "epoch": 1.5757805108798486, "grad_norm": 2.0813803672790527, "learning_rate": 9.395272127794491e-06, "loss": 1.0913, "step": 2082 }, { "epoch": 1.5765373699148535, "grad_norm": 1.975311517715454, "learning_rate": 9.387259266708104e-06, "loss": 1.1674, "step": 2083 }, { "epoch": 1.577294228949858, "grad_norm": 2.022935152053833, "learning_rate": 9.379246801229626e-06, "loss": 1.0664, "step": 2084 }, { "epoch": 1.5780510879848628, "grad_norm": 2.3060450553894043, "learning_rate": 9.371234736523781e-06, "loss": 1.0884, "step": 2085 }, { "epoch": 1.5788079470198677, "grad_norm": 2.224121570587158, "learning_rate": 9.36322307775502e-06, "loss": 1.1056, "step": 2086 }, { "epoch": 1.5795648060548722, "grad_norm": 2.4794466495513916, "learning_rate": 9.35521183008754e-06, "loss": 1.1189, "step": 2087 }, { "epoch": 1.580321665089877, "grad_norm": 2.0150938034057617, "learning_rate": 9.347200998685261e-06, "loss": 1.1063, "step": 2088 }, { "epoch": 1.5810785241248817, "grad_norm": 2.3067104816436768, "learning_rate": 9.339190588711852e-06, "loss": 1.1081, "step": 2089 }, { "epoch": 1.5818353831598864, "grad_norm": 2.071730613708496, "learning_rate": 9.331180605330695e-06, "loss": 1.1256, "step": 2090 }, { "epoch": 1.5825922421948913, "grad_norm": 2.099440097808838, "learning_rate": 9.323171053704904e-06, "loss": 1.1306, "step": 2091 }, { "epoch": 1.583349101229896, "grad_norm": 2.1519389152526855, "learning_rate": 9.315161938997315e-06, "loss": 1.1495, "step": 2092 }, { "epoch": 1.5841059602649006, "grad_norm": 2.1621830463409424, "learning_rate": 9.30715326637048e-06, "loss": 1.1637, "step": 2093 }, { "epoch": 1.5848628192999055, "grad_norm": 2.2661333084106445, "learning_rate": 9.299145040986674e-06, "loss": 1.1102, "step": 2094 }, { "epoch": 1.5856196783349101, "grad_norm": 2.2131712436676025, "learning_rate": 9.291137268007863e-06, "loss": 1.1336, "step": 2095 }, { "epoch": 1.5863765373699148, "grad_norm": 2.1026811599731445, "learning_rate": 9.283129952595747e-06, "loss": 1.0903, "step": 2096 }, { "epoch": 1.5871333964049197, "grad_norm": 2.0890021324157715, "learning_rate": 9.275123099911719e-06, "loss": 1.1232, "step": 2097 }, { "epoch": 1.5878902554399243, "grad_norm": 2.1274547576904297, "learning_rate": 9.267116715116866e-06, "loss": 1.1067, "step": 2098 }, { "epoch": 1.588647114474929, "grad_norm": 2.193621873855591, "learning_rate": 9.259110803371987e-06, "loss": 1.1304, "step": 2099 }, { "epoch": 1.589403973509934, "grad_norm": 2.1822807788848877, "learning_rate": 9.251105369837574e-06, "loss": 1.1568, "step": 2100 }, { "epoch": 1.5901608325449383, "grad_norm": 1.883682131767273, "learning_rate": 9.243100419673798e-06, "loss": 1.129, "step": 2101 }, { "epoch": 1.5909176915799432, "grad_norm": 2.0487449169158936, "learning_rate": 9.235095958040535e-06, "loss": 1.1436, "step": 2102 }, { "epoch": 1.5916745506149481, "grad_norm": 2.079259157180786, "learning_rate": 9.22709199009734e-06, "loss": 1.1538, "step": 2103 }, { "epoch": 1.5924314096499526, "grad_norm": 2.1335606575012207, "learning_rate": 9.219088521003444e-06, "loss": 1.1106, "step": 2104 }, { "epoch": 1.5931882686849574, "grad_norm": 3.516350507736206, "learning_rate": 9.211085555917764e-06, "loss": 1.1, "step": 2105 }, { "epoch": 1.593945127719962, "grad_norm": 2.226984739303589, "learning_rate": 9.203083099998885e-06, "loss": 1.149, "step": 2106 }, { "epoch": 1.5947019867549668, "grad_norm": 2.139308452606201, "learning_rate": 9.195081158405074e-06, "loss": 1.1636, "step": 2107 }, { "epoch": 1.5954588457899717, "grad_norm": 2.194244146347046, "learning_rate": 9.187079736294258e-06, "loss": 1.1441, "step": 2108 }, { "epoch": 1.5962157048249763, "grad_norm": 2.349120855331421, "learning_rate": 9.179078838824029e-06, "loss": 1.1093, "step": 2109 }, { "epoch": 1.596972563859981, "grad_norm": 2.0828843116760254, "learning_rate": 9.171078471151646e-06, "loss": 1.1357, "step": 2110 }, { "epoch": 1.5977294228949859, "grad_norm": 2.10848331451416, "learning_rate": 9.163078638434028e-06, "loss": 1.1125, "step": 2111 }, { "epoch": 1.5984862819299905, "grad_norm": 2.1101763248443604, "learning_rate": 9.155079345827737e-06, "loss": 1.0964, "step": 2112 }, { "epoch": 1.5992431409649952, "grad_norm": 2.0837841033935547, "learning_rate": 9.147080598488999e-06, "loss": 1.1121, "step": 2113 }, { "epoch": 1.6, "grad_norm": 1.9847743511199951, "learning_rate": 9.13908240157369e-06, "loss": 1.1153, "step": 2114 }, { "epoch": 1.6007568590350048, "grad_norm": 2.0701873302459717, "learning_rate": 9.131084760237314e-06, "loss": 1.1046, "step": 2115 }, { "epoch": 1.6015137180700094, "grad_norm": 2.1153228282928467, "learning_rate": 9.123087679635039e-06, "loss": 1.1126, "step": 2116 }, { "epoch": 1.6022705771050143, "grad_norm": 2.0515284538269043, "learning_rate": 9.115091164921654e-06, "loss": 1.1266, "step": 2117 }, { "epoch": 1.603027436140019, "grad_norm": 2.08406662940979, "learning_rate": 9.107095221251597e-06, "loss": 1.1355, "step": 2118 }, { "epoch": 1.6037842951750236, "grad_norm": 2.6160190105438232, "learning_rate": 9.099099853778927e-06, "loss": 1.1226, "step": 2119 }, { "epoch": 1.6045411542100285, "grad_norm": 2.024075984954834, "learning_rate": 9.091105067657335e-06, "loss": 1.0951, "step": 2120 }, { "epoch": 1.605298013245033, "grad_norm": 1.9903373718261719, "learning_rate": 9.083110868040142e-06, "loss": 1.1485, "step": 2121 }, { "epoch": 1.6060548722800378, "grad_norm": 2.307220458984375, "learning_rate": 9.075117260080286e-06, "loss": 1.1698, "step": 2122 }, { "epoch": 1.6068117313150427, "grad_norm": 2.160867929458618, "learning_rate": 9.067124248930324e-06, "loss": 1.0852, "step": 2123 }, { "epoch": 1.6075685903500472, "grad_norm": 2.255039691925049, "learning_rate": 9.059131839742425e-06, "loss": 1.1299, "step": 2124 }, { "epoch": 1.608325449385052, "grad_norm": 2.122530698776245, "learning_rate": 9.051140037668385e-06, "loss": 1.1148, "step": 2125 }, { "epoch": 1.6090823084200567, "grad_norm": 2.067059278488159, "learning_rate": 9.043148847859588e-06, "loss": 1.1335, "step": 2126 }, { "epoch": 1.6098391674550614, "grad_norm": 3.3186850547790527, "learning_rate": 9.035158275467037e-06, "loss": 1.0892, "step": 2127 }, { "epoch": 1.6105960264900663, "grad_norm": 2.4546923637390137, "learning_rate": 9.02716832564133e-06, "loss": 1.1939, "step": 2128 }, { "epoch": 1.611352885525071, "grad_norm": 2.36734938621521, "learning_rate": 9.01917900353267e-06, "loss": 1.163, "step": 2129 }, { "epoch": 1.6121097445600756, "grad_norm": 2.2100653648376465, "learning_rate": 9.011190314290852e-06, "loss": 1.0951, "step": 2130 }, { "epoch": 1.6128666035950805, "grad_norm": 2.239097833633423, "learning_rate": 9.003202263065263e-06, "loss": 1.1554, "step": 2131 }, { "epoch": 1.6136234626300852, "grad_norm": 2.2774319648742676, "learning_rate": 8.995214855004877e-06, "loss": 1.1237, "step": 2132 }, { "epoch": 1.6143803216650898, "grad_norm": 2.1328752040863037, "learning_rate": 8.987228095258256e-06, "loss": 1.1154, "step": 2133 }, { "epoch": 1.6151371807000947, "grad_norm": 2.3373916149139404, "learning_rate": 8.979241988973546e-06, "loss": 1.1058, "step": 2134 }, { "epoch": 1.6158940397350994, "grad_norm": 2.126988172531128, "learning_rate": 8.971256541298468e-06, "loss": 1.1709, "step": 2135 }, { "epoch": 1.616650898770104, "grad_norm": 2.3820157051086426, "learning_rate": 8.963271757380319e-06, "loss": 1.1332, "step": 2136 }, { "epoch": 1.617407757805109, "grad_norm": 2.0995140075683594, "learning_rate": 8.955287642365969e-06, "loss": 1.1341, "step": 2137 }, { "epoch": 1.6181646168401134, "grad_norm": 2.2463080883026123, "learning_rate": 8.94730420140186e-06, "loss": 1.1455, "step": 2138 }, { "epoch": 1.6189214758751183, "grad_norm": 2.293729066848755, "learning_rate": 8.939321439633991e-06, "loss": 1.103, "step": 2139 }, { "epoch": 1.6196783349101231, "grad_norm": 2.1475143432617188, "learning_rate": 8.931339362207931e-06, "loss": 1.124, "step": 2140 }, { "epoch": 1.6204351939451276, "grad_norm": 2.087843179702759, "learning_rate": 8.923357974268806e-06, "loss": 1.1743, "step": 2141 }, { "epoch": 1.6211920529801325, "grad_norm": 2.0908894538879395, "learning_rate": 8.915377280961298e-06, "loss": 1.0961, "step": 2142 }, { "epoch": 1.6219489120151371, "grad_norm": 2.312263250350952, "learning_rate": 8.907397287429635e-06, "loss": 1.1523, "step": 2143 }, { "epoch": 1.6227057710501418, "grad_norm": 2.30190110206604, "learning_rate": 8.899417998817605e-06, "loss": 1.1319, "step": 2144 }, { "epoch": 1.6234626300851467, "grad_norm": 2.0427803993225098, "learning_rate": 8.891439420268534e-06, "loss": 1.1007, "step": 2145 }, { "epoch": 1.6242194891201513, "grad_norm": 2.142066717147827, "learning_rate": 8.88346155692529e-06, "loss": 1.1465, "step": 2146 }, { "epoch": 1.624976348155156, "grad_norm": 1.9997434616088867, "learning_rate": 8.875484413930283e-06, "loss": 1.0966, "step": 2147 }, { "epoch": 1.625733207190161, "grad_norm": 2.0591166019439697, "learning_rate": 8.86750799642546e-06, "loss": 1.1409, "step": 2148 }, { "epoch": 1.6264900662251656, "grad_norm": 2.044402599334717, "learning_rate": 8.859532309552298e-06, "loss": 1.1145, "step": 2149 }, { "epoch": 1.6272469252601702, "grad_norm": 2.3767645359039307, "learning_rate": 8.8515573584518e-06, "loss": 1.1278, "step": 2150 }, { "epoch": 1.6280037842951751, "grad_norm": 2.088170289993286, "learning_rate": 8.843583148264496e-06, "loss": 1.1607, "step": 2151 }, { "epoch": 1.6287606433301798, "grad_norm": 2.026031255722046, "learning_rate": 8.835609684130448e-06, "loss": 1.1173, "step": 2152 }, { "epoch": 1.6295175023651844, "grad_norm": 2.108065366744995, "learning_rate": 8.827636971189222e-06, "loss": 1.1735, "step": 2153 }, { "epoch": 1.6302743614001893, "grad_norm": 2.12156081199646, "learning_rate": 8.819665014579911e-06, "loss": 1.0851, "step": 2154 }, { "epoch": 1.631031220435194, "grad_norm": 2.0895984172821045, "learning_rate": 8.81169381944112e-06, "loss": 1.1261, "step": 2155 }, { "epoch": 1.6317880794701987, "grad_norm": 2.119001865386963, "learning_rate": 8.803723390910951e-06, "loss": 1.1236, "step": 2156 }, { "epoch": 1.6325449385052035, "grad_norm": 2.0929312705993652, "learning_rate": 8.795753734127024e-06, "loss": 1.157, "step": 2157 }, { "epoch": 1.633301797540208, "grad_norm": 2.0275888442993164, "learning_rate": 8.787784854226465e-06, "loss": 1.1407, "step": 2158 }, { "epoch": 1.6340586565752129, "grad_norm": 2.332402467727661, "learning_rate": 8.779816756345884e-06, "loss": 1.0916, "step": 2159 }, { "epoch": 1.6348155156102178, "grad_norm": 2.0872373580932617, "learning_rate": 8.7718494456214e-06, "loss": 1.1492, "step": 2160 }, { "epoch": 1.6355723746452222, "grad_norm": 2.1566085815429688, "learning_rate": 8.763882927188615e-06, "loss": 1.1397, "step": 2161 }, { "epoch": 1.636329233680227, "grad_norm": 2.134572744369507, "learning_rate": 8.75591720618263e-06, "loss": 1.0967, "step": 2162 }, { "epoch": 1.6370860927152318, "grad_norm": 2.061708450317383, "learning_rate": 8.74795228773803e-06, "loss": 1.0709, "step": 2163 }, { "epoch": 1.6378429517502364, "grad_norm": 2.04203724861145, "learning_rate": 8.739988176988869e-06, "loss": 1.0671, "step": 2164 }, { "epoch": 1.6385998107852413, "grad_norm": 2.0386204719543457, "learning_rate": 8.732024879068702e-06, "loss": 1.1021, "step": 2165 }, { "epoch": 1.639356669820246, "grad_norm": 2.104109764099121, "learning_rate": 8.724062399110547e-06, "loss": 1.0964, "step": 2166 }, { "epoch": 1.6401135288552506, "grad_norm": 2.078735113143921, "learning_rate": 8.716100742246894e-06, "loss": 1.1241, "step": 2167 }, { "epoch": 1.6408703878902555, "grad_norm": 2.1530871391296387, "learning_rate": 8.708139913609705e-06, "loss": 1.118, "step": 2168 }, { "epoch": 1.6416272469252602, "grad_norm": 2.1105563640594482, "learning_rate": 8.700179918330419e-06, "loss": 1.0883, "step": 2169 }, { "epoch": 1.6423841059602649, "grad_norm": 2.056195020675659, "learning_rate": 8.692220761539912e-06, "loss": 1.1549, "step": 2170 }, { "epoch": 1.6431409649952697, "grad_norm": 2.327533006668091, "learning_rate": 8.684262448368546e-06, "loss": 1.1097, "step": 2171 }, { "epoch": 1.6438978240302744, "grad_norm": 2.111985206604004, "learning_rate": 8.676304983946122e-06, "loss": 1.1048, "step": 2172 }, { "epoch": 1.644654683065279, "grad_norm": 2.1778697967529297, "learning_rate": 8.668348373401908e-06, "loss": 1.1644, "step": 2173 }, { "epoch": 1.645411542100284, "grad_norm": 2.3045222759246826, "learning_rate": 8.660392621864608e-06, "loss": 1.1873, "step": 2174 }, { "epoch": 1.6461684011352884, "grad_norm": 2.2667534351348877, "learning_rate": 8.652437734462377e-06, "loss": 1.0519, "step": 2175 }, { "epoch": 1.6469252601702933, "grad_norm": 2.394404888153076, "learning_rate": 8.644483716322818e-06, "loss": 1.1324, "step": 2176 }, { "epoch": 1.6476821192052982, "grad_norm": 2.009328603744507, "learning_rate": 8.63653057257297e-06, "loss": 1.147, "step": 2177 }, { "epoch": 1.6484389782403026, "grad_norm": 2.072662591934204, "learning_rate": 8.6285783083393e-06, "loss": 1.1435, "step": 2178 }, { "epoch": 1.6491958372753075, "grad_norm": 2.184267044067383, "learning_rate": 8.620626928747725e-06, "loss": 1.1896, "step": 2179 }, { "epoch": 1.6499526963103122, "grad_norm": 2.0765388011932373, "learning_rate": 8.612676438923587e-06, "loss": 1.125, "step": 2180 }, { "epoch": 1.6507095553453168, "grad_norm": 2.2719030380249023, "learning_rate": 8.604726843991637e-06, "loss": 1.074, "step": 2181 }, { "epoch": 1.6514664143803217, "grad_norm": 2.0793111324310303, "learning_rate": 8.596778149076073e-06, "loss": 1.099, "step": 2182 }, { "epoch": 1.6522232734153264, "grad_norm": 1.9967988729476929, "learning_rate": 8.588830359300499e-06, "loss": 1.1454, "step": 2183 }, { "epoch": 1.652980132450331, "grad_norm": 2.3746399879455566, "learning_rate": 8.580883479787936e-06, "loss": 1.1424, "step": 2184 }, { "epoch": 1.653736991485336, "grad_norm": 2.0627639293670654, "learning_rate": 8.57293751566083e-06, "loss": 1.1052, "step": 2185 }, { "epoch": 1.6544938505203406, "grad_norm": 2.196162700653076, "learning_rate": 8.564992472041021e-06, "loss": 1.1002, "step": 2186 }, { "epoch": 1.6552507095553453, "grad_norm": 2.3567469120025635, "learning_rate": 8.557048354049763e-06, "loss": 1.1426, "step": 2187 }, { "epoch": 1.6560075685903501, "grad_norm": 2.0467660427093506, "learning_rate": 8.549105166807716e-06, "loss": 1.0916, "step": 2188 }, { "epoch": 1.6567644276253548, "grad_norm": 2.107483386993408, "learning_rate": 8.541162915434935e-06, "loss": 1.1226, "step": 2189 }, { "epoch": 1.6575212866603595, "grad_norm": 2.2864937782287598, "learning_rate": 8.533221605050878e-06, "loss": 1.1246, "step": 2190 }, { "epoch": 1.6582781456953644, "grad_norm": 2.135864734649658, "learning_rate": 8.525281240774391e-06, "loss": 1.0364, "step": 2191 }, { "epoch": 1.659035004730369, "grad_norm": 2.136951446533203, "learning_rate": 8.517341827723709e-06, "loss": 1.1753, "step": 2192 }, { "epoch": 1.6597918637653737, "grad_norm": 2.1684775352478027, "learning_rate": 8.509403371016462e-06, "loss": 1.0812, "step": 2193 }, { "epoch": 1.6605487228003786, "grad_norm": 2.195051431655884, "learning_rate": 8.501465875769652e-06, "loss": 1.0883, "step": 2194 }, { "epoch": 1.661305581835383, "grad_norm": 2.2236487865448, "learning_rate": 8.493529347099669e-06, "loss": 1.0637, "step": 2195 }, { "epoch": 1.662062440870388, "grad_norm": 2.1652839183807373, "learning_rate": 8.48559379012228e-06, "loss": 1.1285, "step": 2196 }, { "epoch": 1.6628192999053926, "grad_norm": 2.3312926292419434, "learning_rate": 8.477659209952627e-06, "loss": 1.1303, "step": 2197 }, { "epoch": 1.6635761589403972, "grad_norm": 2.1476340293884277, "learning_rate": 8.46972561170521e-06, "loss": 1.0911, "step": 2198 }, { "epoch": 1.6643330179754021, "grad_norm": 2.205261707305908, "learning_rate": 8.461793000493917e-06, "loss": 1.1075, "step": 2199 }, { "epoch": 1.6650898770104068, "grad_norm": 2.3584821224212646, "learning_rate": 8.45386138143198e-06, "loss": 1.1429, "step": 2200 }, { "epoch": 1.6658467360454114, "grad_norm": 1.990213394165039, "learning_rate": 8.445930759632e-06, "loss": 1.0731, "step": 2201 }, { "epoch": 1.6666035950804163, "grad_norm": 2.114382266998291, "learning_rate": 8.43800114020594e-06, "loss": 1.1304, "step": 2202 }, { "epoch": 1.667360454115421, "grad_norm": 2.6425230503082275, "learning_rate": 8.430072528265107e-06, "loss": 1.1223, "step": 2203 }, { "epoch": 1.6681173131504257, "grad_norm": 2.238675594329834, "learning_rate": 8.422144928920168e-06, "loss": 1.1187, "step": 2204 }, { "epoch": 1.6688741721854305, "grad_norm": 2.0409348011016846, "learning_rate": 8.414218347281127e-06, "loss": 1.0912, "step": 2205 }, { "epoch": 1.6696310312204352, "grad_norm": 2.5583693981170654, "learning_rate": 8.406292788457338e-06, "loss": 1.1433, "step": 2206 }, { "epoch": 1.6703878902554399, "grad_norm": 2.24996018409729, "learning_rate": 8.398368257557505e-06, "loss": 1.1177, "step": 2207 }, { "epoch": 1.6711447492904448, "grad_norm": 2.2110371589660645, "learning_rate": 8.390444759689646e-06, "loss": 1.1334, "step": 2208 }, { "epoch": 1.6719016083254494, "grad_norm": 2.0102930068969727, "learning_rate": 8.382522299961135e-06, "loss": 1.0807, "step": 2209 }, { "epoch": 1.672658467360454, "grad_norm": 2.319051504135132, "learning_rate": 8.37460088347867e-06, "loss": 1.0997, "step": 2210 }, { "epoch": 1.673415326395459, "grad_norm": 2.3122832775115967, "learning_rate": 8.36668051534827e-06, "loss": 1.1591, "step": 2211 }, { "epoch": 1.6741721854304634, "grad_norm": 2.39446759223938, "learning_rate": 8.358761200675284e-06, "loss": 1.1201, "step": 2212 }, { "epoch": 1.6749290444654683, "grad_norm": 2.257894515991211, "learning_rate": 8.350842944564386e-06, "loss": 1.1094, "step": 2213 }, { "epoch": 1.6756859035004732, "grad_norm": 2.2300925254821777, "learning_rate": 8.34292575211956e-06, "loss": 1.1613, "step": 2214 }, { "epoch": 1.6764427625354776, "grad_norm": 2.2363715171813965, "learning_rate": 8.33500962844411e-06, "loss": 1.1485, "step": 2215 }, { "epoch": 1.6771996215704825, "grad_norm": 2.0160231590270996, "learning_rate": 8.327094578640643e-06, "loss": 1.1136, "step": 2216 }, { "epoch": 1.6779564806054872, "grad_norm": 2.279360294342041, "learning_rate": 8.319180607811085e-06, "loss": 1.1193, "step": 2217 }, { "epoch": 1.6787133396404919, "grad_norm": 2.280641555786133, "learning_rate": 8.31126772105666e-06, "loss": 1.1655, "step": 2218 }, { "epoch": 1.6794701986754967, "grad_norm": 2.077263832092285, "learning_rate": 8.303355923477889e-06, "loss": 1.1435, "step": 2219 }, { "epoch": 1.6802270577105014, "grad_norm": 2.271101236343384, "learning_rate": 8.295445220174604e-06, "loss": 1.0986, "step": 2220 }, { "epoch": 1.680983916745506, "grad_norm": 2.257680892944336, "learning_rate": 8.28753561624592e-06, "loss": 1.095, "step": 2221 }, { "epoch": 1.681740775780511, "grad_norm": 2.18369722366333, "learning_rate": 8.279627116790244e-06, "loss": 1.1007, "step": 2222 }, { "epoch": 1.6824976348155156, "grad_norm": 2.0165445804595947, "learning_rate": 8.271719726905275e-06, "loss": 1.1165, "step": 2223 }, { "epoch": 1.6832544938505203, "grad_norm": 2.2388248443603516, "learning_rate": 8.263813451688006e-06, "loss": 1.1186, "step": 2224 }, { "epoch": 1.6840113528855252, "grad_norm": 2.32745099067688, "learning_rate": 8.255908296234688e-06, "loss": 1.1812, "step": 2225 }, { "epoch": 1.6847682119205298, "grad_norm": 2.361375093460083, "learning_rate": 8.248004265640868e-06, "loss": 1.1649, "step": 2226 }, { "epoch": 1.6855250709555345, "grad_norm": 2.2414417266845703, "learning_rate": 8.240101365001368e-06, "loss": 1.1013, "step": 2227 }, { "epoch": 1.6862819299905394, "grad_norm": 1.9859745502471924, "learning_rate": 8.232199599410273e-06, "loss": 1.1295, "step": 2228 }, { "epoch": 1.6870387890255438, "grad_norm": 2.1757733821868896, "learning_rate": 8.22429897396094e-06, "loss": 1.1223, "step": 2229 }, { "epoch": 1.6877956480605487, "grad_norm": 2.36989164352417, "learning_rate": 8.216399493745992e-06, "loss": 1.1337, "step": 2230 }, { "epoch": 1.6885525070955536, "grad_norm": 1.9508718252182007, "learning_rate": 8.208501163857318e-06, "loss": 1.1351, "step": 2231 }, { "epoch": 1.689309366130558, "grad_norm": 2.057548999786377, "learning_rate": 8.200603989386055e-06, "loss": 1.1382, "step": 2232 }, { "epoch": 1.690066225165563, "grad_norm": 2.3355371952056885, "learning_rate": 8.192707975422604e-06, "loss": 1.1393, "step": 2233 }, { "epoch": 1.6908230842005676, "grad_norm": 2.1525468826293945, "learning_rate": 8.184813127056616e-06, "loss": 1.1665, "step": 2234 }, { "epoch": 1.6915799432355723, "grad_norm": 2.2342827320098877, "learning_rate": 8.176919449376989e-06, "loss": 1.1385, "step": 2235 }, { "epoch": 1.6923368022705771, "grad_norm": 2.1949453353881836, "learning_rate": 8.169026947471866e-06, "loss": 1.1635, "step": 2236 }, { "epoch": 1.6930936613055818, "grad_norm": 2.183218240737915, "learning_rate": 8.161135626428633e-06, "loss": 1.1433, "step": 2237 }, { "epoch": 1.6938505203405865, "grad_norm": 2.1611173152923584, "learning_rate": 8.153245491333922e-06, "loss": 1.1083, "step": 2238 }, { "epoch": 1.6946073793755914, "grad_norm": 2.0343189239501953, "learning_rate": 8.145356547273584e-06, "loss": 1.1334, "step": 2239 }, { "epoch": 1.695364238410596, "grad_norm": 2.0303726196289062, "learning_rate": 8.13746879933272e-06, "loss": 1.1545, "step": 2240 }, { "epoch": 1.6961210974456007, "grad_norm": 2.152381658554077, "learning_rate": 8.129582252595645e-06, "loss": 1.1316, "step": 2241 }, { "epoch": 1.6968779564806056, "grad_norm": 2.0682196617126465, "learning_rate": 8.12169691214591e-06, "loss": 1.1396, "step": 2242 }, { "epoch": 1.6976348155156102, "grad_norm": 2.07098388671875, "learning_rate": 8.113812783066288e-06, "loss": 1.0784, "step": 2243 }, { "epoch": 1.698391674550615, "grad_norm": 2.1533405780792236, "learning_rate": 8.105929870438762e-06, "loss": 1.1151, "step": 2244 }, { "epoch": 1.6991485335856198, "grad_norm": 2.348604679107666, "learning_rate": 8.098048179344545e-06, "loss": 1.0913, "step": 2245 }, { "epoch": 1.6999053926206245, "grad_norm": 2.0196192264556885, "learning_rate": 8.090167714864051e-06, "loss": 1.1334, "step": 2246 }, { "epoch": 1.7006622516556291, "grad_norm": 2.2407851219177246, "learning_rate": 8.082288482076904e-06, "loss": 1.1362, "step": 2247 }, { "epoch": 1.701419110690634, "grad_norm": 2.0429224967956543, "learning_rate": 8.074410486061943e-06, "loss": 1.0377, "step": 2248 }, { "epoch": 1.7021759697256384, "grad_norm": 2.149394989013672, "learning_rate": 8.066533731897202e-06, "loss": 1.1324, "step": 2249 }, { "epoch": 1.7029328287606433, "grad_norm": 2.5104711055755615, "learning_rate": 8.058658224659914e-06, "loss": 1.1037, "step": 2250 }, { "epoch": 1.7036896877956482, "grad_norm": 1.9572622776031494, "learning_rate": 8.050783969426517e-06, "loss": 1.1231, "step": 2251 }, { "epoch": 1.7044465468306527, "grad_norm": 2.344362497329712, "learning_rate": 8.042910971272627e-06, "loss": 1.1054, "step": 2252 }, { "epoch": 1.7052034058656576, "grad_norm": 2.160923719406128, "learning_rate": 8.035039235273063e-06, "loss": 1.1429, "step": 2253 }, { "epoch": 1.7059602649006622, "grad_norm": 2.273373603820801, "learning_rate": 8.027168766501831e-06, "loss": 1.1073, "step": 2254 }, { "epoch": 1.7067171239356669, "grad_norm": 2.043576955795288, "learning_rate": 8.019299570032108e-06, "loss": 1.1326, "step": 2255 }, { "epoch": 1.7074739829706718, "grad_norm": 2.3227038383483887, "learning_rate": 8.011431650936259e-06, "loss": 1.0762, "step": 2256 }, { "epoch": 1.7082308420056764, "grad_norm": 2.2618634700775146, "learning_rate": 8.003565014285829e-06, "loss": 1.1246, "step": 2257 }, { "epoch": 1.708987701040681, "grad_norm": 2.072833776473999, "learning_rate": 7.99569966515153e-06, "loss": 1.128, "step": 2258 }, { "epoch": 1.709744560075686, "grad_norm": 2.305095911026001, "learning_rate": 7.987835608603241e-06, "loss": 1.0533, "step": 2259 }, { "epoch": 1.7105014191106906, "grad_norm": 2.25026535987854, "learning_rate": 7.979972849710022e-06, "loss": 1.1497, "step": 2260 }, { "epoch": 1.7112582781456953, "grad_norm": 2.259713888168335, "learning_rate": 7.972111393540079e-06, "loss": 1.1364, "step": 2261 }, { "epoch": 1.7120151371807002, "grad_norm": 2.3140814304351807, "learning_rate": 7.964251245160795e-06, "loss": 1.1363, "step": 2262 }, { "epoch": 1.7127719962157049, "grad_norm": 2.25529408454895, "learning_rate": 7.956392409638693e-06, "loss": 1.1304, "step": 2263 }, { "epoch": 1.7135288552507095, "grad_norm": 2.118211030960083, "learning_rate": 7.948534892039462e-06, "loss": 1.1227, "step": 2264 }, { "epoch": 1.7142857142857144, "grad_norm": 2.260540008544922, "learning_rate": 7.940678697427945e-06, "loss": 1.0716, "step": 2265 }, { "epoch": 1.7150425733207189, "grad_norm": 2.334322690963745, "learning_rate": 7.932823830868114e-06, "loss": 1.1458, "step": 2266 }, { "epoch": 1.7157994323557237, "grad_norm": 2.1086597442626953, "learning_rate": 7.9249702974231e-06, "loss": 1.1264, "step": 2267 }, { "epoch": 1.7165562913907286, "grad_norm": 2.286928176879883, "learning_rate": 7.917118102155175e-06, "loss": 1.1079, "step": 2268 }, { "epoch": 1.717313150425733, "grad_norm": 2.132174491882324, "learning_rate": 7.909267250125743e-06, "loss": 1.1201, "step": 2269 }, { "epoch": 1.718070009460738, "grad_norm": 2.0687386989593506, "learning_rate": 7.901417746395338e-06, "loss": 1.0981, "step": 2270 }, { "epoch": 1.7188268684957426, "grad_norm": 2.2579548358917236, "learning_rate": 7.893569596023638e-06, "loss": 1.1193, "step": 2271 }, { "epoch": 1.7195837275307473, "grad_norm": 2.1385035514831543, "learning_rate": 7.885722804069435e-06, "loss": 1.1826, "step": 2272 }, { "epoch": 1.7203405865657522, "grad_norm": 1.9475092887878418, "learning_rate": 7.877877375590657e-06, "loss": 1.101, "step": 2273 }, { "epoch": 1.7210974456007568, "grad_norm": 2.2321999073028564, "learning_rate": 7.87003331564434e-06, "loss": 1.121, "step": 2274 }, { "epoch": 1.7218543046357615, "grad_norm": 2.091757297515869, "learning_rate": 7.86219062928665e-06, "loss": 1.0453, "step": 2275 }, { "epoch": 1.7226111636707664, "grad_norm": 1.965161681175232, "learning_rate": 7.854349321572868e-06, "loss": 1.1628, "step": 2276 }, { "epoch": 1.723368022705771, "grad_norm": 2.090461492538452, "learning_rate": 7.846509397557372e-06, "loss": 1.1603, "step": 2277 }, { "epoch": 1.7241248817407757, "grad_norm": 2.0913045406341553, "learning_rate": 7.83867086229366e-06, "loss": 1.186, "step": 2278 }, { "epoch": 1.7248817407757806, "grad_norm": 2.137932300567627, "learning_rate": 7.83083372083434e-06, "loss": 1.129, "step": 2279 }, { "epoch": 1.7256385998107853, "grad_norm": 2.1641454696655273, "learning_rate": 7.822997978231101e-06, "loss": 1.1261, "step": 2280 }, { "epoch": 1.72639545884579, "grad_norm": 1.977129340171814, "learning_rate": 7.815163639534752e-06, "loss": 1.1332, "step": 2281 }, { "epoch": 1.7271523178807948, "grad_norm": 2.3945367336273193, "learning_rate": 7.807330709795191e-06, "loss": 1.0711, "step": 2282 }, { "epoch": 1.7279091769157995, "grad_norm": 1.9358054399490356, "learning_rate": 7.799499194061395e-06, "loss": 1.0986, "step": 2283 }, { "epoch": 1.7286660359508041, "grad_norm": 2.242386817932129, "learning_rate": 7.791669097381447e-06, "loss": 1.1113, "step": 2284 }, { "epoch": 1.729422894985809, "grad_norm": 2.1633381843566895, "learning_rate": 7.783840424802504e-06, "loss": 1.1581, "step": 2285 }, { "epoch": 1.7301797540208135, "grad_norm": 2.0929739475250244, "learning_rate": 7.776013181370813e-06, "loss": 1.1479, "step": 2286 }, { "epoch": 1.7309366130558184, "grad_norm": 2.2305848598480225, "learning_rate": 7.768187372131693e-06, "loss": 1.1683, "step": 2287 }, { "epoch": 1.7316934720908232, "grad_norm": 1.938439130783081, "learning_rate": 7.76036300212954e-06, "loss": 1.0774, "step": 2288 }, { "epoch": 1.7324503311258277, "grad_norm": 2.031890869140625, "learning_rate": 7.752540076407829e-06, "loss": 1.0838, "step": 2289 }, { "epoch": 1.7332071901608326, "grad_norm": 2.242338180541992, "learning_rate": 7.744718600009093e-06, "loss": 1.1161, "step": 2290 }, { "epoch": 1.7339640491958372, "grad_norm": 2.1008989810943604, "learning_rate": 7.736898577974936e-06, "loss": 1.071, "step": 2291 }, { "epoch": 1.734720908230842, "grad_norm": 1.9958288669586182, "learning_rate": 7.72908001534603e-06, "loss": 1.0919, "step": 2292 }, { "epoch": 1.7354777672658468, "grad_norm": 2.2547950744628906, "learning_rate": 7.7212629171621e-06, "loss": 1.1024, "step": 2293 }, { "epoch": 1.7362346263008515, "grad_norm": 2.198230743408203, "learning_rate": 7.713447288461922e-06, "loss": 1.1567, "step": 2294 }, { "epoch": 1.7369914853358561, "grad_norm": 2.047135591506958, "learning_rate": 7.705633134283342e-06, "loss": 1.1345, "step": 2295 }, { "epoch": 1.737748344370861, "grad_norm": 2.021092176437378, "learning_rate": 7.697820459663234e-06, "loss": 1.0968, "step": 2296 }, { "epoch": 1.7385052034058657, "grad_norm": 2.04164719581604, "learning_rate": 7.690009269637535e-06, "loss": 1.1234, "step": 2297 }, { "epoch": 1.7392620624408703, "grad_norm": 2.0042457580566406, "learning_rate": 7.68219956924122e-06, "loss": 1.1361, "step": 2298 }, { "epoch": 1.7400189214758752, "grad_norm": 2.207336902618408, "learning_rate": 7.674391363508293e-06, "loss": 1.1241, "step": 2299 }, { "epoch": 1.7407757805108799, "grad_norm": 2.4397289752960205, "learning_rate": 7.666584657471819e-06, "loss": 1.1499, "step": 2300 }, { "epoch": 1.7415326395458846, "grad_norm": 1.9008210897445679, "learning_rate": 7.65877945616387e-06, "loss": 1.075, "step": 2301 }, { "epoch": 1.7422894985808894, "grad_norm": 2.0731241703033447, "learning_rate": 7.650975764615564e-06, "loss": 1.1104, "step": 2302 }, { "epoch": 1.7430463576158939, "grad_norm": 2.0302274227142334, "learning_rate": 7.643173587857043e-06, "loss": 1.1129, "step": 2303 }, { "epoch": 1.7438032166508988, "grad_norm": 2.3482978343963623, "learning_rate": 7.635372930917471e-06, "loss": 1.1239, "step": 2304 }, { "epoch": 1.7445600756859037, "grad_norm": 2.1537070274353027, "learning_rate": 7.627573798825028e-06, "loss": 1.1213, "step": 2305 }, { "epoch": 1.745316934720908, "grad_norm": 3.2694427967071533, "learning_rate": 7.619776196606923e-06, "loss": 1.1201, "step": 2306 }, { "epoch": 1.746073793755913, "grad_norm": 1.9938985109329224, "learning_rate": 7.611980129289362e-06, "loss": 1.1202, "step": 2307 }, { "epoch": 1.7468306527909176, "grad_norm": 1.9866852760314941, "learning_rate": 7.604185601897578e-06, "loss": 1.1716, "step": 2308 }, { "epoch": 1.7475875118259223, "grad_norm": 2.149052143096924, "learning_rate": 7.596392619455805e-06, "loss": 1.0911, "step": 2309 }, { "epoch": 1.7483443708609272, "grad_norm": 2.1124277114868164, "learning_rate": 7.588601186987277e-06, "loss": 1.1686, "step": 2310 }, { "epoch": 1.7491012298959319, "grad_norm": 2.1678647994995117, "learning_rate": 7.5808113095142334e-06, "loss": 1.1356, "step": 2311 }, { "epoch": 1.7498580889309365, "grad_norm": 2.2444238662719727, "learning_rate": 7.573022992057911e-06, "loss": 1.1302, "step": 2312 }, { "epoch": 1.7506149479659414, "grad_norm": 2.296766757965088, "learning_rate": 7.565236239638542e-06, "loss": 1.1317, "step": 2313 }, { "epoch": 1.751371807000946, "grad_norm": 2.106170177459717, "learning_rate": 7.557451057275346e-06, "loss": 1.138, "step": 2314 }, { "epoch": 1.7521286660359507, "grad_norm": 1.8964704275131226, "learning_rate": 7.549667449986533e-06, "loss": 1.1121, "step": 2315 }, { "epoch": 1.7528855250709556, "grad_norm": 2.2497787475585938, "learning_rate": 7.541885422789297e-06, "loss": 1.1607, "step": 2316 }, { "epoch": 1.7536423841059603, "grad_norm": 1.980980634689331, "learning_rate": 7.53410498069982e-06, "loss": 1.0806, "step": 2317 }, { "epoch": 1.754399243140965, "grad_norm": 2.030378818511963, "learning_rate": 7.526326128733247e-06, "loss": 1.1, "step": 2318 }, { "epoch": 1.7551561021759698, "grad_norm": 1.9196511507034302, "learning_rate": 7.5185488719037105e-06, "loss": 1.1771, "step": 2319 }, { "epoch": 1.7559129612109745, "grad_norm": 1.9450955390930176, "learning_rate": 7.510773215224318e-06, "loss": 1.1347, "step": 2320 }, { "epoch": 1.7566698202459792, "grad_norm": 1.9371559619903564, "learning_rate": 7.502999163707131e-06, "loss": 1.1026, "step": 2321 }, { "epoch": 1.757426679280984, "grad_norm": 2.021090269088745, "learning_rate": 7.4952267223631865e-06, "loss": 1.1514, "step": 2322 }, { "epoch": 1.7581835383159885, "grad_norm": 2.016483783721924, "learning_rate": 7.487455896202487e-06, "loss": 1.1315, "step": 2323 }, { "epoch": 1.7589403973509934, "grad_norm": 2.051363945007324, "learning_rate": 7.479686690233981e-06, "loss": 1.1094, "step": 2324 }, { "epoch": 1.759697256385998, "grad_norm": 2.3509905338287354, "learning_rate": 7.471919109465584e-06, "loss": 1.1104, "step": 2325 }, { "epoch": 1.7604541154210027, "grad_norm": 2.0247390270233154, "learning_rate": 7.46415315890416e-06, "loss": 1.1122, "step": 2326 }, { "epoch": 1.7612109744560076, "grad_norm": 2.1923465728759766, "learning_rate": 7.456388843555525e-06, "loss": 1.1308, "step": 2327 }, { "epoch": 1.7619678334910123, "grad_norm": 2.132502555847168, "learning_rate": 7.448626168424434e-06, "loss": 1.1637, "step": 2328 }, { "epoch": 1.762724692526017, "grad_norm": 1.9766474962234497, "learning_rate": 7.440865138514587e-06, "loss": 1.1019, "step": 2329 }, { "epoch": 1.7634815515610218, "grad_norm": 2.1354434490203857, "learning_rate": 7.433105758828631e-06, "loss": 1.0869, "step": 2330 }, { "epoch": 1.7642384105960265, "grad_norm": 2.135441303253174, "learning_rate": 7.425348034368143e-06, "loss": 1.1077, "step": 2331 }, { "epoch": 1.7649952696310311, "grad_norm": 1.9634079933166504, "learning_rate": 7.41759197013363e-06, "loss": 1.0686, "step": 2332 }, { "epoch": 1.765752128666036, "grad_norm": 2.202788829803467, "learning_rate": 7.409837571124535e-06, "loss": 1.0706, "step": 2333 }, { "epoch": 1.7665089877010407, "grad_norm": 2.3422369956970215, "learning_rate": 7.40208484233923e-06, "loss": 1.1673, "step": 2334 }, { "epoch": 1.7672658467360454, "grad_norm": 2.338772773742676, "learning_rate": 7.394333788774995e-06, "loss": 1.1037, "step": 2335 }, { "epoch": 1.7680227057710503, "grad_norm": 2.2548608779907227, "learning_rate": 7.386584415428051e-06, "loss": 1.1583, "step": 2336 }, { "epoch": 1.768779564806055, "grad_norm": 2.1475353240966797, "learning_rate": 7.3788367272935235e-06, "loss": 1.1232, "step": 2337 }, { "epoch": 1.7695364238410596, "grad_norm": 2.1223628520965576, "learning_rate": 7.37109072936545e-06, "loss": 1.1164, "step": 2338 }, { "epoch": 1.7702932828760645, "grad_norm": 2.0361294746398926, "learning_rate": 7.363346426636786e-06, "loss": 1.16, "step": 2339 }, { "epoch": 1.771050141911069, "grad_norm": 2.1143364906311035, "learning_rate": 7.355603824099389e-06, "loss": 1.142, "step": 2340 }, { "epoch": 1.7718070009460738, "grad_norm": 2.2214882373809814, "learning_rate": 7.347862926744027e-06, "loss": 1.1375, "step": 2341 }, { "epoch": 1.7725638599810787, "grad_norm": 1.9182907342910767, "learning_rate": 7.34012373956036e-06, "loss": 1.1099, "step": 2342 }, { "epoch": 1.7733207190160831, "grad_norm": 2.0895349979400635, "learning_rate": 7.332386267536949e-06, "loss": 1.1397, "step": 2343 }, { "epoch": 1.774077578051088, "grad_norm": 2.078885793685913, "learning_rate": 7.3246505156612554e-06, "loss": 1.1296, "step": 2344 }, { "epoch": 1.7748344370860927, "grad_norm": 2.180187702178955, "learning_rate": 7.3169164889196235e-06, "loss": 1.1458, "step": 2345 }, { "epoch": 1.7755912961210973, "grad_norm": 2.3137030601501465, "learning_rate": 7.309184192297289e-06, "loss": 1.0713, "step": 2346 }, { "epoch": 1.7763481551561022, "grad_norm": 2.0382871627807617, "learning_rate": 7.3014536307783725e-06, "loss": 1.1288, "step": 2347 }, { "epoch": 1.777105014191107, "grad_norm": 2.004988670349121, "learning_rate": 7.293724809345879e-06, "loss": 1.1304, "step": 2348 }, { "epoch": 1.7778618732261116, "grad_norm": 2.0899946689605713, "learning_rate": 7.285997732981683e-06, "loss": 1.1095, "step": 2349 }, { "epoch": 1.7786187322611164, "grad_norm": 2.197770118713379, "learning_rate": 7.2782724066665475e-06, "loss": 1.1219, "step": 2350 }, { "epoch": 1.779375591296121, "grad_norm": 1.9547758102416992, "learning_rate": 7.270548835380095e-06, "loss": 1.0707, "step": 2351 }, { "epoch": 1.7801324503311258, "grad_norm": 2.0842347145080566, "learning_rate": 7.262827024100821e-06, "loss": 1.1485, "step": 2352 }, { "epoch": 1.7808893093661307, "grad_norm": 2.189990520477295, "learning_rate": 7.255106977806092e-06, "loss": 1.1403, "step": 2353 }, { "epoch": 1.7816461684011353, "grad_norm": 2.299306869506836, "learning_rate": 7.247388701472129e-06, "loss": 1.1001, "step": 2354 }, { "epoch": 1.78240302743614, "grad_norm": 2.0084657669067383, "learning_rate": 7.239672200074012e-06, "loss": 1.0777, "step": 2355 }, { "epoch": 1.7831598864711449, "grad_norm": 1.9171555042266846, "learning_rate": 7.231957478585687e-06, "loss": 1.1022, "step": 2356 }, { "epoch": 1.7839167455061493, "grad_norm": 2.116420030593872, "learning_rate": 7.224244541979941e-06, "loss": 1.0945, "step": 2357 }, { "epoch": 1.7846736045411542, "grad_norm": 2.250598669052124, "learning_rate": 7.216533395228419e-06, "loss": 1.1327, "step": 2358 }, { "epoch": 1.785430463576159, "grad_norm": 2.0988495349884033, "learning_rate": 7.208824043301604e-06, "loss": 1.1452, "step": 2359 }, { "epoch": 1.7861873226111635, "grad_norm": 1.9777265787124634, "learning_rate": 7.201116491168829e-06, "loss": 1.0838, "step": 2360 }, { "epoch": 1.7869441816461684, "grad_norm": 2.1055500507354736, "learning_rate": 7.19341074379827e-06, "loss": 1.0996, "step": 2361 }, { "epoch": 1.787701040681173, "grad_norm": 1.8813843727111816, "learning_rate": 7.185706806156921e-06, "loss": 1.1238, "step": 2362 }, { "epoch": 1.7884578997161777, "grad_norm": 1.9652965068817139, "learning_rate": 7.178004683210634e-06, "loss": 1.1141, "step": 2363 }, { "epoch": 1.7892147587511826, "grad_norm": 2.2869348526000977, "learning_rate": 7.170304379924078e-06, "loss": 1.1882, "step": 2364 }, { "epoch": 1.7899716177861873, "grad_norm": 2.1046929359436035, "learning_rate": 7.162605901260749e-06, "loss": 1.0947, "step": 2365 }, { "epoch": 1.790728476821192, "grad_norm": 2.0936052799224854, "learning_rate": 7.1549092521829676e-06, "loss": 1.1371, "step": 2366 }, { "epoch": 1.7914853358561968, "grad_norm": 2.0121428966522217, "learning_rate": 7.147214437651881e-06, "loss": 1.102, "step": 2367 }, { "epoch": 1.7922421948912015, "grad_norm": 2.144970178604126, "learning_rate": 7.139521462627446e-06, "loss": 1.1266, "step": 2368 }, { "epoch": 1.7929990539262062, "grad_norm": 2.3722221851348877, "learning_rate": 7.1318303320684356e-06, "loss": 1.1499, "step": 2369 }, { "epoch": 1.793755912961211, "grad_norm": 2.255847454071045, "learning_rate": 7.124141050932441e-06, "loss": 1.1243, "step": 2370 }, { "epoch": 1.7945127719962157, "grad_norm": 2.1879565715789795, "learning_rate": 7.116453624175847e-06, "loss": 1.0995, "step": 2371 }, { "epoch": 1.7952696310312204, "grad_norm": 2.267245292663574, "learning_rate": 7.108768056753863e-06, "loss": 1.156, "step": 2372 }, { "epoch": 1.7960264900662253, "grad_norm": 2.1807005405426025, "learning_rate": 7.101084353620476e-06, "loss": 1.1588, "step": 2373 }, { "epoch": 1.79678334910123, "grad_norm": 2.2159693241119385, "learning_rate": 7.0934025197284924e-06, "loss": 1.0647, "step": 2374 }, { "epoch": 1.7975402081362346, "grad_norm": 2.1058151721954346, "learning_rate": 7.085722560029507e-06, "loss": 1.1166, "step": 2375 }, { "epoch": 1.7982970671712395, "grad_norm": 2.202956438064575, "learning_rate": 7.0780444794738945e-06, "loss": 1.1524, "step": 2376 }, { "epoch": 1.799053926206244, "grad_norm": 2.15413761138916, "learning_rate": 7.070368283010836e-06, "loss": 1.1331, "step": 2377 }, { "epoch": 1.7998107852412488, "grad_norm": 1.968179702758789, "learning_rate": 7.062693975588291e-06, "loss": 1.0785, "step": 2378 }, { "epoch": 1.8005676442762537, "grad_norm": 2.2872471809387207, "learning_rate": 7.0550215621529965e-06, "loss": 1.1364, "step": 2379 }, { "epoch": 1.8013245033112582, "grad_norm": 2.0598936080932617, "learning_rate": 7.047351047650476e-06, "loss": 1.1238, "step": 2380 }, { "epoch": 1.802081362346263, "grad_norm": 2.055774688720703, "learning_rate": 7.039682437025028e-06, "loss": 1.1336, "step": 2381 }, { "epoch": 1.8028382213812677, "grad_norm": 2.1142072677612305, "learning_rate": 7.032015735219719e-06, "loss": 1.1216, "step": 2382 }, { "epoch": 1.8035950804162724, "grad_norm": 2.067873001098633, "learning_rate": 7.024350947176391e-06, "loss": 1.1253, "step": 2383 }, { "epoch": 1.8043519394512773, "grad_norm": 1.906582236289978, "learning_rate": 7.016688077835645e-06, "loss": 1.1002, "step": 2384 }, { "epoch": 1.805108798486282, "grad_norm": 2.005889892578125, "learning_rate": 7.009027132136853e-06, "loss": 1.135, "step": 2385 }, { "epoch": 1.8058656575212866, "grad_norm": 1.9194884300231934, "learning_rate": 7.001368115018144e-06, "loss": 1.0872, "step": 2386 }, { "epoch": 1.8066225165562915, "grad_norm": 2.044262647628784, "learning_rate": 6.993711031416402e-06, "loss": 1.0973, "step": 2387 }, { "epoch": 1.8073793755912961, "grad_norm": 2.0718541145324707, "learning_rate": 6.986055886267265e-06, "loss": 1.1224, "step": 2388 }, { "epoch": 1.8081362346263008, "grad_norm": 2.132376194000244, "learning_rate": 6.97840268450513e-06, "loss": 1.083, "step": 2389 }, { "epoch": 1.8088930936613057, "grad_norm": 2.1769330501556396, "learning_rate": 6.970751431063124e-06, "loss": 1.148, "step": 2390 }, { "epoch": 1.8096499526963103, "grad_norm": 2.1449358463287354, "learning_rate": 6.963102130873134e-06, "loss": 1.0967, "step": 2391 }, { "epoch": 1.810406811731315, "grad_norm": 1.9859085083007812, "learning_rate": 6.955454788865785e-06, "loss": 1.1101, "step": 2392 }, { "epoch": 1.81116367076632, "grad_norm": 2.211151361465454, "learning_rate": 6.947809409970431e-06, "loss": 1.114, "step": 2393 }, { "epoch": 1.8119205298013243, "grad_norm": 2.0564661026000977, "learning_rate": 6.940165999115169e-06, "loss": 1.0703, "step": 2394 }, { "epoch": 1.8126773888363292, "grad_norm": 2.2020647525787354, "learning_rate": 6.932524561226824e-06, "loss": 1.0784, "step": 2395 }, { "epoch": 1.8134342478713341, "grad_norm": 2.0232954025268555, "learning_rate": 6.924885101230955e-06, "loss": 1.1231, "step": 2396 }, { "epoch": 1.8141911069063386, "grad_norm": 2.0655837059020996, "learning_rate": 6.917247624051836e-06, "loss": 1.102, "step": 2397 }, { "epoch": 1.8149479659413434, "grad_norm": 2.0320346355438232, "learning_rate": 6.90961213461247e-06, "loss": 1.119, "step": 2398 }, { "epoch": 1.815704824976348, "grad_norm": 2.415329694747925, "learning_rate": 6.901978637834579e-06, "loss": 1.1015, "step": 2399 }, { "epoch": 1.8164616840113528, "grad_norm": 1.962516188621521, "learning_rate": 6.894347138638595e-06, "loss": 1.1063, "step": 2400 }, { "epoch": 1.8172185430463577, "grad_norm": 2.263796329498291, "learning_rate": 6.886717641943668e-06, "loss": 1.0946, "step": 2401 }, { "epoch": 1.8179754020813623, "grad_norm": 1.8907090425491333, "learning_rate": 6.879090152667655e-06, "loss": 1.0842, "step": 2402 }, { "epoch": 1.818732261116367, "grad_norm": 2.3313028812408447, "learning_rate": 6.871464675727122e-06, "loss": 1.0972, "step": 2403 }, { "epoch": 1.8194891201513719, "grad_norm": 2.123699903488159, "learning_rate": 6.8638412160373294e-06, "loss": 1.0953, "step": 2404 }, { "epoch": 1.8202459791863765, "grad_norm": 2.1058695316314697, "learning_rate": 6.856219778512248e-06, "loss": 1.0983, "step": 2405 }, { "epoch": 1.8210028382213812, "grad_norm": 2.0354390144348145, "learning_rate": 6.8486003680645384e-06, "loss": 1.1184, "step": 2406 }, { "epoch": 1.821759697256386, "grad_norm": 2.448774814605713, "learning_rate": 6.840982989605554e-06, "loss": 1.1902, "step": 2407 }, { "epoch": 1.8225165562913908, "grad_norm": 2.067413330078125, "learning_rate": 6.833367648045347e-06, "loss": 1.0844, "step": 2408 }, { "epoch": 1.8232734153263954, "grad_norm": 1.8351151943206787, "learning_rate": 6.825754348292647e-06, "loss": 1.0751, "step": 2409 }, { "epoch": 1.8240302743614003, "grad_norm": 2.036219835281372, "learning_rate": 6.8181430952548664e-06, "loss": 1.1118, "step": 2410 }, { "epoch": 1.824787133396405, "grad_norm": 2.2903716564178467, "learning_rate": 6.810533893838111e-06, "loss": 1.1085, "step": 2411 }, { "epoch": 1.8255439924314096, "grad_norm": 2.1487245559692383, "learning_rate": 6.802926748947149e-06, "loss": 1.0766, "step": 2412 }, { "epoch": 1.8263008514664145, "grad_norm": 2.073429822921753, "learning_rate": 6.795321665485434e-06, "loss": 1.1056, "step": 2413 }, { "epoch": 1.827057710501419, "grad_norm": 2.1071133613586426, "learning_rate": 6.7877186483550865e-06, "loss": 1.1688, "step": 2414 }, { "epoch": 1.8278145695364238, "grad_norm": 2.153792381286621, "learning_rate": 6.780117702456892e-06, "loss": 1.1281, "step": 2415 }, { "epoch": 1.8285714285714287, "grad_norm": 2.046393632888794, "learning_rate": 6.772518832690312e-06, "loss": 1.1413, "step": 2416 }, { "epoch": 1.8293282876064332, "grad_norm": 2.0445821285247803, "learning_rate": 6.764922043953452e-06, "loss": 1.0761, "step": 2417 }, { "epoch": 1.830085146641438, "grad_norm": 2.1296608448028564, "learning_rate": 6.757327341143093e-06, "loss": 1.1077, "step": 2418 }, { "epoch": 1.8308420056764427, "grad_norm": 2.218290328979492, "learning_rate": 6.749734729154663e-06, "loss": 1.0869, "step": 2419 }, { "epoch": 1.8315988647114474, "grad_norm": 2.161032199859619, "learning_rate": 6.742144212882244e-06, "loss": 1.1116, "step": 2420 }, { "epoch": 1.8323557237464523, "grad_norm": 1.979115605354309, "learning_rate": 6.734555797218567e-06, "loss": 1.1321, "step": 2421 }, { "epoch": 1.833112582781457, "grad_norm": 2.016322135925293, "learning_rate": 6.726969487055008e-06, "loss": 1.1265, "step": 2422 }, { "epoch": 1.8338694418164616, "grad_norm": 1.943589448928833, "learning_rate": 6.719385287281589e-06, "loss": 1.079, "step": 2423 }, { "epoch": 1.8346263008514665, "grad_norm": 2.0779478549957275, "learning_rate": 6.711803202786965e-06, "loss": 1.161, "step": 2424 }, { "epoch": 1.8353831598864712, "grad_norm": 1.9313197135925293, "learning_rate": 6.7042232384584396e-06, "loss": 1.114, "step": 2425 }, { "epoch": 1.8361400189214758, "grad_norm": 2.177368640899658, "learning_rate": 6.6966453991819355e-06, "loss": 1.1141, "step": 2426 }, { "epoch": 1.8368968779564807, "grad_norm": 2.155545234680176, "learning_rate": 6.689069689842015e-06, "loss": 1.1058, "step": 2427 }, { "epoch": 1.8376537369914854, "grad_norm": 2.1615564823150635, "learning_rate": 6.681496115321863e-06, "loss": 1.1445, "step": 2428 }, { "epoch": 1.83841059602649, "grad_norm": 2.2454423904418945, "learning_rate": 6.6739246805032895e-06, "loss": 1.1575, "step": 2429 }, { "epoch": 1.839167455061495, "grad_norm": 1.9341751337051392, "learning_rate": 6.6663553902667345e-06, "loss": 1.0993, "step": 2430 }, { "epoch": 1.8399243140964994, "grad_norm": 2.307654857635498, "learning_rate": 6.658788249491236e-06, "loss": 1.0903, "step": 2431 }, { "epoch": 1.8406811731315043, "grad_norm": 2.172126293182373, "learning_rate": 6.651223263054462e-06, "loss": 1.1384, "step": 2432 }, { "epoch": 1.8414380321665091, "grad_norm": 2.061699151992798, "learning_rate": 6.64366043583269e-06, "loss": 1.1066, "step": 2433 }, { "epoch": 1.8421948912015136, "grad_norm": 2.0565085411071777, "learning_rate": 6.636099772700797e-06, "loss": 1.1265, "step": 2434 }, { "epoch": 1.8429517502365185, "grad_norm": 2.3713178634643555, "learning_rate": 6.628541278532276e-06, "loss": 1.1067, "step": 2435 }, { "epoch": 1.8437086092715231, "grad_norm": 2.0300235748291016, "learning_rate": 6.620984958199217e-06, "loss": 1.1053, "step": 2436 }, { "epoch": 1.8444654683065278, "grad_norm": 1.8853594064712524, "learning_rate": 6.613430816572308e-06, "loss": 1.1375, "step": 2437 }, { "epoch": 1.8452223273415327, "grad_norm": 2.140911102294922, "learning_rate": 6.605878858520832e-06, "loss": 1.1372, "step": 2438 }, { "epoch": 1.8459791863765374, "grad_norm": 2.0533270835876465, "learning_rate": 6.598329088912666e-06, "loss": 1.1054, "step": 2439 }, { "epoch": 1.846736045411542, "grad_norm": 2.0813000202178955, "learning_rate": 6.59078151261428e-06, "loss": 1.0635, "step": 2440 }, { "epoch": 1.847492904446547, "grad_norm": 2.0938546657562256, "learning_rate": 6.5832361344907225e-06, "loss": 1.1368, "step": 2441 }, { "epoch": 1.8482497634815516, "grad_norm": 2.1274354457855225, "learning_rate": 6.57569295940563e-06, "loss": 1.1446, "step": 2442 }, { "epoch": 1.8490066225165562, "grad_norm": 2.2737364768981934, "learning_rate": 6.5681519922212175e-06, "loss": 1.1007, "step": 2443 }, { "epoch": 1.8497634815515611, "grad_norm": 2.0562212467193604, "learning_rate": 6.560613237798282e-06, "loss": 1.1033, "step": 2444 }, { "epoch": 1.8505203405865658, "grad_norm": 2.1894006729125977, "learning_rate": 6.553076700996186e-06, "loss": 1.1733, "step": 2445 }, { "epoch": 1.8512771996215704, "grad_norm": 2.1526927947998047, "learning_rate": 6.545542386672864e-06, "loss": 1.1254, "step": 2446 }, { "epoch": 1.8520340586565753, "grad_norm": 2.335092306137085, "learning_rate": 6.538010299684827e-06, "loss": 1.089, "step": 2447 }, { "epoch": 1.85279091769158, "grad_norm": 2.1147849559783936, "learning_rate": 6.530480444887135e-06, "loss": 1.1075, "step": 2448 }, { "epoch": 1.8535477767265847, "grad_norm": 1.9990819692611694, "learning_rate": 6.522952827133424e-06, "loss": 1.1069, "step": 2449 }, { "epoch": 1.8543046357615895, "grad_norm": 2.2554056644439697, "learning_rate": 6.515427451275879e-06, "loss": 1.1205, "step": 2450 }, { "epoch": 1.855061494796594, "grad_norm": 2.143373489379883, "learning_rate": 6.507904322165242e-06, "loss": 1.1, "step": 2451 }, { "epoch": 1.8558183538315989, "grad_norm": 2.145324468612671, "learning_rate": 6.500383444650808e-06, "loss": 1.124, "step": 2452 }, { "epoch": 1.8565752128666035, "grad_norm": 2.0681822299957275, "learning_rate": 6.492864823580418e-06, "loss": 1.1404, "step": 2453 }, { "epoch": 1.8573320719016082, "grad_norm": 2.0816290378570557, "learning_rate": 6.485348463800467e-06, "loss": 1.121, "step": 2454 }, { "epoch": 1.858088930936613, "grad_norm": 2.210402488708496, "learning_rate": 6.477834370155879e-06, "loss": 1.081, "step": 2455 }, { "epoch": 1.8588457899716178, "grad_norm": 2.258357286453247, "learning_rate": 6.4703225474901266e-06, "loss": 1.1221, "step": 2456 }, { "epoch": 1.8596026490066224, "grad_norm": 2.336432456970215, "learning_rate": 6.462813000645216e-06, "loss": 1.1288, "step": 2457 }, { "epoch": 1.8603595080416273, "grad_norm": 2.224451780319214, "learning_rate": 6.4553057344616885e-06, "loss": 1.1213, "step": 2458 }, { "epoch": 1.861116367076632, "grad_norm": 2.012571096420288, "learning_rate": 6.447800753778608e-06, "loss": 1.1079, "step": 2459 }, { "epoch": 1.8618732261116366, "grad_norm": 2.0077013969421387, "learning_rate": 6.440298063433578e-06, "loss": 1.1139, "step": 2460 }, { "epoch": 1.8626300851466415, "grad_norm": 2.0572779178619385, "learning_rate": 6.432797668262713e-06, "loss": 1.1225, "step": 2461 }, { "epoch": 1.8633869441816462, "grad_norm": 2.052415609359741, "learning_rate": 6.425299573100653e-06, "loss": 1.1232, "step": 2462 }, { "epoch": 1.8641438032166509, "grad_norm": 2.1070804595947266, "learning_rate": 6.41780378278056e-06, "loss": 1.1425, "step": 2463 }, { "epoch": 1.8649006622516557, "grad_norm": 2.1018309593200684, "learning_rate": 6.410310302134102e-06, "loss": 1.124, "step": 2464 }, { "epoch": 1.8656575212866604, "grad_norm": 2.104137897491455, "learning_rate": 6.4028191359914635e-06, "loss": 1.1366, "step": 2465 }, { "epoch": 1.866414380321665, "grad_norm": 2.196840763092041, "learning_rate": 6.395330289181339e-06, "loss": 1.1138, "step": 2466 }, { "epoch": 1.86717123935667, "grad_norm": 2.0204899311065674, "learning_rate": 6.38784376653092e-06, "loss": 1.1571, "step": 2467 }, { "epoch": 1.8679280983916744, "grad_norm": 2.1718480587005615, "learning_rate": 6.380359572865909e-06, "loss": 1.1265, "step": 2468 }, { "epoch": 1.8686849574266793, "grad_norm": 2.2680718898773193, "learning_rate": 6.372877713010501e-06, "loss": 1.1218, "step": 2469 }, { "epoch": 1.8694418164616842, "grad_norm": 1.9217084646224976, "learning_rate": 6.365398191787388e-06, "loss": 1.0846, "step": 2470 }, { "epoch": 1.8701986754966886, "grad_norm": 2.0585711002349854, "learning_rate": 6.35792101401776e-06, "loss": 1.1274, "step": 2471 }, { "epoch": 1.8709555345316935, "grad_norm": 1.989283800125122, "learning_rate": 6.350446184521285e-06, "loss": 1.095, "step": 2472 }, { "epoch": 1.8717123935666982, "grad_norm": 1.886738657951355, "learning_rate": 6.3429737081161265e-06, "loss": 1.1196, "step": 2473 }, { "epoch": 1.8724692526017028, "grad_norm": 1.9688234329223633, "learning_rate": 6.335503589618933e-06, "loss": 1.143, "step": 2474 }, { "epoch": 1.8732261116367077, "grad_norm": 2.37060284614563, "learning_rate": 6.328035833844823e-06, "loss": 1.1088, "step": 2475 }, { "epoch": 1.8739829706717124, "grad_norm": 2.3199589252471924, "learning_rate": 6.320570445607399e-06, "loss": 1.1072, "step": 2476 }, { "epoch": 1.874739829706717, "grad_norm": 1.914215087890625, "learning_rate": 6.313107429718741e-06, "loss": 1.1222, "step": 2477 }, { "epoch": 1.875496688741722, "grad_norm": 2.3843131065368652, "learning_rate": 6.305646790989391e-06, "loss": 1.1509, "step": 2478 }, { "epoch": 1.8762535477767266, "grad_norm": 2.1501553058624268, "learning_rate": 6.298188534228365e-06, "loss": 1.0925, "step": 2479 }, { "epoch": 1.8770104068117313, "grad_norm": 2.103590965270996, "learning_rate": 6.290732664243141e-06, "loss": 1.1068, "step": 2480 }, { "epoch": 1.8777672658467361, "grad_norm": 2.1373817920684814, "learning_rate": 6.283279185839658e-06, "loss": 1.1228, "step": 2481 }, { "epoch": 1.8785241248817408, "grad_norm": 1.9448984861373901, "learning_rate": 6.275828103822317e-06, "loss": 1.1138, "step": 2482 }, { "epoch": 1.8792809839167455, "grad_norm": 2.133575916290283, "learning_rate": 6.268379422993969e-06, "loss": 1.137, "step": 2483 }, { "epoch": 1.8800378429517504, "grad_norm": 2.0521798133850098, "learning_rate": 6.26093314815592e-06, "loss": 1.1077, "step": 2484 }, { "epoch": 1.8807947019867548, "grad_norm": 2.154632091522217, "learning_rate": 6.253489284107929e-06, "loss": 1.0963, "step": 2485 }, { "epoch": 1.8815515610217597, "grad_norm": 2.0606281757354736, "learning_rate": 6.246047835648191e-06, "loss": 1.1233, "step": 2486 }, { "epoch": 1.8823084200567646, "grad_norm": 1.9377020597457886, "learning_rate": 6.238608807573355e-06, "loss": 1.128, "step": 2487 }, { "epoch": 1.883065279091769, "grad_norm": 2.133552074432373, "learning_rate": 6.231172204678507e-06, "loss": 1.0872, "step": 2488 }, { "epoch": 1.883822138126774, "grad_norm": 2.1637847423553467, "learning_rate": 6.2237380317571626e-06, "loss": 1.1051, "step": 2489 }, { "epoch": 1.8845789971617786, "grad_norm": 2.0870983600616455, "learning_rate": 6.216306293601277e-06, "loss": 1.1296, "step": 2490 }, { "epoch": 1.8853358561967832, "grad_norm": 2.129365921020508, "learning_rate": 6.20887699500124e-06, "loss": 1.1125, "step": 2491 }, { "epoch": 1.8860927152317881, "grad_norm": 1.887802004814148, "learning_rate": 6.20145014074586e-06, "loss": 1.0531, "step": 2492 }, { "epoch": 1.8868495742667928, "grad_norm": 2.163595199584961, "learning_rate": 6.194025735622371e-06, "loss": 1.0727, "step": 2493 }, { "epoch": 1.8876064333017974, "grad_norm": 1.8616597652435303, "learning_rate": 6.186603784416441e-06, "loss": 1.0561, "step": 2494 }, { "epoch": 1.8883632923368023, "grad_norm": 2.1504313945770264, "learning_rate": 6.179184291912138e-06, "loss": 1.1093, "step": 2495 }, { "epoch": 1.889120151371807, "grad_norm": 1.997025728225708, "learning_rate": 6.171767262891958e-06, "loss": 1.0991, "step": 2496 }, { "epoch": 1.8898770104068117, "grad_norm": 2.141857147216797, "learning_rate": 6.164352702136799e-06, "loss": 1.0897, "step": 2497 }, { "epoch": 1.8906338694418166, "grad_norm": 1.9679754972457886, "learning_rate": 6.15694061442598e-06, "loss": 1.0971, "step": 2498 }, { "epoch": 1.8913907284768212, "grad_norm": 2.048257350921631, "learning_rate": 6.149531004537222e-06, "loss": 1.1139, "step": 2499 }, { "epoch": 1.8921475875118259, "grad_norm": 2.4383885860443115, "learning_rate": 6.1421238772466375e-06, "loss": 1.1028, "step": 2500 }, { "epoch": 1.8929044465468308, "grad_norm": 2.0352723598480225, "learning_rate": 6.134719237328751e-06, "loss": 1.0957, "step": 2501 }, { "epoch": 1.8936613055818354, "grad_norm": 2.1713624000549316, "learning_rate": 6.127317089556489e-06, "loss": 1.0726, "step": 2502 }, { "epoch": 1.89441816461684, "grad_norm": 2.2224864959716797, "learning_rate": 6.119917438701151e-06, "loss": 1.0919, "step": 2503 }, { "epoch": 1.895175023651845, "grad_norm": 2.1596179008483887, "learning_rate": 6.112520289532445e-06, "loss": 1.1273, "step": 2504 }, { "epoch": 1.8959318826868494, "grad_norm": 2.018328905105591, "learning_rate": 6.105125646818463e-06, "loss": 1.1354, "step": 2505 }, { "epoch": 1.8966887417218543, "grad_norm": 2.1755290031433105, "learning_rate": 6.097733515325671e-06, "loss": 1.1037, "step": 2506 }, { "epoch": 1.8974456007568592, "grad_norm": 2.172973871231079, "learning_rate": 6.090343899818931e-06, "loss": 1.0656, "step": 2507 }, { "epoch": 1.8982024597918636, "grad_norm": 2.193934679031372, "learning_rate": 6.0829568050614725e-06, "loss": 1.1252, "step": 2508 }, { "epoch": 1.8989593188268685, "grad_norm": 2.3651788234710693, "learning_rate": 6.075572235814909e-06, "loss": 1.1242, "step": 2509 }, { "epoch": 1.8997161778618732, "grad_norm": 2.107897996902466, "learning_rate": 6.0681901968392184e-06, "loss": 1.0937, "step": 2510 }, { "epoch": 1.9004730368968779, "grad_norm": 2.57551908493042, "learning_rate": 6.060810692892748e-06, "loss": 1.129, "step": 2511 }, { "epoch": 1.9012298959318827, "grad_norm": 2.0832760334014893, "learning_rate": 6.053433728732217e-06, "loss": 1.138, "step": 2512 }, { "epoch": 1.9019867549668874, "grad_norm": 1.9609954357147217, "learning_rate": 6.046059309112703e-06, "loss": 1.1404, "step": 2513 }, { "epoch": 1.902743614001892, "grad_norm": 2.1895411014556885, "learning_rate": 6.038687438787642e-06, "loss": 1.1378, "step": 2514 }, { "epoch": 1.903500473036897, "grad_norm": 2.058955430984497, "learning_rate": 6.031318122508833e-06, "loss": 1.117, "step": 2515 }, { "epoch": 1.9042573320719016, "grad_norm": 2.1496293544769287, "learning_rate": 6.023951365026426e-06, "loss": 1.1115, "step": 2516 }, { "epoch": 1.9050141911069063, "grad_norm": 2.147587776184082, "learning_rate": 6.016587171088913e-06, "loss": 1.1419, "step": 2517 }, { "epoch": 1.9057710501419112, "grad_norm": 2.470024824142456, "learning_rate": 6.009225545443148e-06, "loss": 1.1518, "step": 2518 }, { "epoch": 1.9065279091769158, "grad_norm": 2.0301973819732666, "learning_rate": 6.001866492834322e-06, "loss": 1.0815, "step": 2519 }, { "epoch": 1.9072847682119205, "grad_norm": 2.2255492210388184, "learning_rate": 5.994510018005964e-06, "loss": 1.1555, "step": 2520 }, { "epoch": 1.9080416272469254, "grad_norm": 2.101928472518921, "learning_rate": 5.987156125699951e-06, "loss": 1.1251, "step": 2521 }, { "epoch": 1.9087984862819298, "grad_norm": 2.0817983150482178, "learning_rate": 5.979804820656483e-06, "loss": 1.1233, "step": 2522 }, { "epoch": 1.9095553453169347, "grad_norm": 2.082615375518799, "learning_rate": 5.972456107614105e-06, "loss": 1.1198, "step": 2523 }, { "epoch": 1.9103122043519396, "grad_norm": 2.036180257797241, "learning_rate": 5.965109991309686e-06, "loss": 1.1056, "step": 2524 }, { "epoch": 1.911069063386944, "grad_norm": 2.358384847640991, "learning_rate": 5.9577664764784126e-06, "loss": 1.1125, "step": 2525 }, { "epoch": 1.911825922421949, "grad_norm": 2.097381830215454, "learning_rate": 5.950425567853813e-06, "loss": 1.1394, "step": 2526 }, { "epoch": 1.9125827814569536, "grad_norm": 2.0445775985717773, "learning_rate": 5.943087270167718e-06, "loss": 1.1276, "step": 2527 }, { "epoch": 1.9133396404919583, "grad_norm": 2.2490360736846924, "learning_rate": 5.935751588150282e-06, "loss": 1.0963, "step": 2528 }, { "epoch": 1.9140964995269631, "grad_norm": 2.212881088256836, "learning_rate": 5.928418526529981e-06, "loss": 1.0829, "step": 2529 }, { "epoch": 1.9148533585619678, "grad_norm": 1.9197094440460205, "learning_rate": 5.921088090033585e-06, "loss": 1.0947, "step": 2530 }, { "epoch": 1.9156102175969725, "grad_norm": 2.0829176902770996, "learning_rate": 5.913760283386186e-06, "loss": 1.1466, "step": 2531 }, { "epoch": 1.9163670766319774, "grad_norm": 2.326220750808716, "learning_rate": 5.906435111311179e-06, "loss": 1.131, "step": 2532 }, { "epoch": 1.917123935666982, "grad_norm": 2.2894301414489746, "learning_rate": 5.899112578530255e-06, "loss": 1.1062, "step": 2533 }, { "epoch": 1.9178807947019867, "grad_norm": 2.134059190750122, "learning_rate": 5.891792689763407e-06, "loss": 1.1116, "step": 2534 }, { "epoch": 1.9186376537369916, "grad_norm": 2.1360747814178467, "learning_rate": 5.884475449728925e-06, "loss": 1.15, "step": 2535 }, { "epoch": 1.9193945127719962, "grad_norm": 2.3759396076202393, "learning_rate": 5.877160863143391e-06, "loss": 1.0696, "step": 2536 }, { "epoch": 1.920151371807001, "grad_norm": 2.216271162033081, "learning_rate": 5.869848934721671e-06, "loss": 1.166, "step": 2537 }, { "epoch": 1.9209082308420058, "grad_norm": 2.0322463512420654, "learning_rate": 5.86253966917693e-06, "loss": 1.1031, "step": 2538 }, { "epoch": 1.9216650898770105, "grad_norm": 1.9586721658706665, "learning_rate": 5.855233071220603e-06, "loss": 1.1062, "step": 2539 }, { "epoch": 1.9224219489120151, "grad_norm": 2.202064037322998, "learning_rate": 5.8479291455624186e-06, "loss": 1.1295, "step": 2540 }, { "epoch": 1.92317880794702, "grad_norm": 2.291038751602173, "learning_rate": 5.840627896910365e-06, "loss": 1.157, "step": 2541 }, { "epoch": 1.9239356669820245, "grad_norm": 1.989047884941101, "learning_rate": 5.833329329970726e-06, "loss": 1.1506, "step": 2542 }, { "epoch": 1.9246925260170293, "grad_norm": 1.9984663724899292, "learning_rate": 5.82603344944804e-06, "loss": 1.106, "step": 2543 }, { "epoch": 1.9254493850520342, "grad_norm": 2.3392581939697266, "learning_rate": 5.818740260045123e-06, "loss": 1.1819, "step": 2544 }, { "epoch": 1.9262062440870387, "grad_norm": 2.148768663406372, "learning_rate": 5.811449766463058e-06, "loss": 1.1439, "step": 2545 }, { "epoch": 1.9269631031220436, "grad_norm": 2.033663511276245, "learning_rate": 5.804161973401175e-06, "loss": 1.1111, "step": 2546 }, { "epoch": 1.9277199621570482, "grad_norm": 2.2173452377319336, "learning_rate": 5.796876885557084e-06, "loss": 1.0752, "step": 2547 }, { "epoch": 1.9284768211920529, "grad_norm": 2.039340019226074, "learning_rate": 5.78959450762664e-06, "loss": 1.0878, "step": 2548 }, { "epoch": 1.9292336802270578, "grad_norm": 2.22098445892334, "learning_rate": 5.782314844303949e-06, "loss": 1.1109, "step": 2549 }, { "epoch": 1.9299905392620624, "grad_norm": 1.9632805585861206, "learning_rate": 5.775037900281372e-06, "loss": 1.0981, "step": 2550 }, { "epoch": 1.930747398297067, "grad_norm": 2.195981025695801, "learning_rate": 5.767763680249521e-06, "loss": 1.0659, "step": 2551 }, { "epoch": 1.931504257332072, "grad_norm": 2.3889381885528564, "learning_rate": 5.760492188897241e-06, "loss": 1.1027, "step": 2552 }, { "epoch": 1.9322611163670766, "grad_norm": 2.212132692337036, "learning_rate": 5.753223430911625e-06, "loss": 1.1435, "step": 2553 }, { "epoch": 1.9330179754020813, "grad_norm": 2.109678268432617, "learning_rate": 5.7459574109780105e-06, "loss": 1.1226, "step": 2554 }, { "epoch": 1.9337748344370862, "grad_norm": 2.1192758083343506, "learning_rate": 5.738694133779954e-06, "loss": 1.158, "step": 2555 }, { "epoch": 1.9345316934720909, "grad_norm": 2.249246597290039, "learning_rate": 5.7314336039992565e-06, "loss": 1.1342, "step": 2556 }, { "epoch": 1.9352885525070955, "grad_norm": 2.1159563064575195, "learning_rate": 5.7241758263159504e-06, "loss": 1.0984, "step": 2557 }, { "epoch": 1.9360454115421004, "grad_norm": 2.092686891555786, "learning_rate": 5.7169208054082794e-06, "loss": 1.1462, "step": 2558 }, { "epoch": 1.9368022705771049, "grad_norm": 2.201853036880493, "learning_rate": 5.7096685459527235e-06, "loss": 1.1121, "step": 2559 }, { "epoch": 1.9375591296121097, "grad_norm": 2.29725980758667, "learning_rate": 5.702419052623982e-06, "loss": 1.17, "step": 2560 }, { "epoch": 1.9383159886471146, "grad_norm": 2.636181592941284, "learning_rate": 5.695172330094961e-06, "loss": 1.1398, "step": 2561 }, { "epoch": 1.939072847682119, "grad_norm": 2.0273807048797607, "learning_rate": 5.687928383036795e-06, "loss": 1.0939, "step": 2562 }, { "epoch": 1.939829706717124, "grad_norm": 2.2744505405426025, "learning_rate": 5.680687216118814e-06, "loss": 1.1233, "step": 2563 }, { "epoch": 1.9405865657521286, "grad_norm": 1.976132869720459, "learning_rate": 5.6734488340085665e-06, "loss": 1.1573, "step": 2564 }, { "epoch": 1.9413434247871333, "grad_norm": 5.837771415710449, "learning_rate": 5.666213241371809e-06, "loss": 1.1299, "step": 2565 }, { "epoch": 1.9421002838221382, "grad_norm": 2.1729319095611572, "learning_rate": 5.658980442872484e-06, "loss": 1.1399, "step": 2566 }, { "epoch": 1.9428571428571428, "grad_norm": 2.049954652786255, "learning_rate": 5.651750443172749e-06, "loss": 1.1012, "step": 2567 }, { "epoch": 1.9436140018921475, "grad_norm": 2.170069694519043, "learning_rate": 5.644523246932951e-06, "loss": 1.1276, "step": 2568 }, { "epoch": 1.9443708609271524, "grad_norm": 2.143918514251709, "learning_rate": 5.637298858811633e-06, "loss": 1.1127, "step": 2569 }, { "epoch": 1.945127719962157, "grad_norm": 2.114530324935913, "learning_rate": 5.6300772834655195e-06, "loss": 1.0949, "step": 2570 }, { "epoch": 1.9458845789971617, "grad_norm": 2.067688226699829, "learning_rate": 5.6228585255495315e-06, "loss": 1.1402, "step": 2571 }, { "epoch": 1.9466414380321666, "grad_norm": 2.254387855529785, "learning_rate": 5.615642589716773e-06, "loss": 1.197, "step": 2572 }, { "epoch": 1.9473982970671713, "grad_norm": 2.0530171394348145, "learning_rate": 5.608429480618519e-06, "loss": 1.0985, "step": 2573 }, { "epoch": 1.948155156102176, "grad_norm": 1.9690866470336914, "learning_rate": 5.6012192029042354e-06, "loss": 1.1004, "step": 2574 }, { "epoch": 1.9489120151371808, "grad_norm": 2.2864949703216553, "learning_rate": 5.594011761221554e-06, "loss": 1.1298, "step": 2575 }, { "epoch": 1.9496688741721855, "grad_norm": 2.291849374771118, "learning_rate": 5.5868071602162875e-06, "loss": 1.1472, "step": 2576 }, { "epoch": 1.9504257332071901, "grad_norm": 2.223792552947998, "learning_rate": 5.579605404532403e-06, "loss": 1.1307, "step": 2577 }, { "epoch": 1.951182592242195, "grad_norm": 2.0151219367980957, "learning_rate": 5.572406498812049e-06, "loss": 1.1281, "step": 2578 }, { "epoch": 1.9519394512771995, "grad_norm": 2.023299217224121, "learning_rate": 5.565210447695529e-06, "loss": 1.165, "step": 2579 }, { "epoch": 1.9526963103122044, "grad_norm": 1.902061939239502, "learning_rate": 5.5580172558213064e-06, "loss": 1.061, "step": 2580 }, { "epoch": 1.953453169347209, "grad_norm": 2.015148639678955, "learning_rate": 5.550826927826003e-06, "loss": 1.0866, "step": 2581 }, { "epoch": 1.9542100283822137, "grad_norm": 1.9616479873657227, "learning_rate": 5.5436394683443996e-06, "loss": 1.0903, "step": 2582 }, { "epoch": 1.9549668874172186, "grad_norm": 1.9738472700119019, "learning_rate": 5.536454882009412e-06, "loss": 1.1312, "step": 2583 }, { "epoch": 1.9557237464522232, "grad_norm": 2.2209506034851074, "learning_rate": 5.52927317345213e-06, "loss": 1.0352, "step": 2584 }, { "epoch": 1.956480605487228, "grad_norm": 2.2492170333862305, "learning_rate": 5.522094347301757e-06, "loss": 1.0982, "step": 2585 }, { "epoch": 1.9572374645222328, "grad_norm": 1.9593442678451538, "learning_rate": 5.514918408185666e-06, "loss": 1.1162, "step": 2586 }, { "epoch": 1.9579943235572375, "grad_norm": 2.0279417037963867, "learning_rate": 5.507745360729356e-06, "loss": 1.1148, "step": 2587 }, { "epoch": 1.9587511825922421, "grad_norm": 2.1683051586151123, "learning_rate": 5.500575209556462e-06, "loss": 1.1078, "step": 2588 }, { "epoch": 1.959508041627247, "grad_norm": 2.0510294437408447, "learning_rate": 5.493407959288752e-06, "loss": 1.1099, "step": 2589 }, { "epoch": 1.9602649006622517, "grad_norm": 2.144102096557617, "learning_rate": 5.486243614546135e-06, "loss": 1.0938, "step": 2590 }, { "epoch": 1.9610217596972563, "grad_norm": 1.9423801898956299, "learning_rate": 5.479082179946628e-06, "loss": 1.0941, "step": 2591 }, { "epoch": 1.9617786187322612, "grad_norm": 1.9970104694366455, "learning_rate": 5.471923660106387e-06, "loss": 1.1106, "step": 2592 }, { "epoch": 1.962535477767266, "grad_norm": 2.0680718421936035, "learning_rate": 5.46476805963969e-06, "loss": 1.1039, "step": 2593 }, { "epoch": 1.9632923368022706, "grad_norm": 2.138693332672119, "learning_rate": 5.457615383158917e-06, "loss": 1.1283, "step": 2594 }, { "epoch": 1.9640491958372754, "grad_norm": 2.0552265644073486, "learning_rate": 5.450465635274581e-06, "loss": 1.0889, "step": 2595 }, { "epoch": 1.9648060548722799, "grad_norm": 1.974301815032959, "learning_rate": 5.443318820595303e-06, "loss": 1.1174, "step": 2596 }, { "epoch": 1.9655629139072848, "grad_norm": 1.9770950078964233, "learning_rate": 5.436174943727803e-06, "loss": 1.1261, "step": 2597 }, { "epoch": 1.9663197729422897, "grad_norm": 2.0605309009552, "learning_rate": 5.42903400927692e-06, "loss": 1.1453, "step": 2598 }, { "epoch": 1.967076631977294, "grad_norm": 1.9402192831039429, "learning_rate": 5.421896021845591e-06, "loss": 1.1047, "step": 2599 }, { "epoch": 1.967833491012299, "grad_norm": 2.0428860187530518, "learning_rate": 5.4147609860348545e-06, "loss": 1.0957, "step": 2600 }, { "epoch": 1.9685903500473036, "grad_norm": 1.9550975561141968, "learning_rate": 5.407628906443844e-06, "loss": 1.1202, "step": 2601 }, { "epoch": 1.9693472090823083, "grad_norm": 1.9513860940933228, "learning_rate": 5.400499787669788e-06, "loss": 1.0562, "step": 2602 }, { "epoch": 1.9701040681173132, "grad_norm": 2.2109532356262207, "learning_rate": 5.393373634308015e-06, "loss": 1.1637, "step": 2603 }, { "epoch": 1.9708609271523179, "grad_norm": 1.9870307445526123, "learning_rate": 5.3862504509519245e-06, "loss": 1.1243, "step": 2604 }, { "epoch": 1.9716177861873225, "grad_norm": 2.027862310409546, "learning_rate": 5.379130242193018e-06, "loss": 1.0791, "step": 2605 }, { "epoch": 1.9723746452223274, "grad_norm": 1.969875454902649, "learning_rate": 5.372013012620875e-06, "loss": 1.1593, "step": 2606 }, { "epoch": 1.973131504257332, "grad_norm": 2.163132667541504, "learning_rate": 5.3648987668231475e-06, "loss": 1.1337, "step": 2607 }, { "epoch": 1.9738883632923367, "grad_norm": 2.0548205375671387, "learning_rate": 5.357787509385571e-06, "loss": 1.1185, "step": 2608 }, { "epoch": 1.9746452223273416, "grad_norm": 2.120103597640991, "learning_rate": 5.350679244891962e-06, "loss": 1.1419, "step": 2609 }, { "epoch": 1.9754020813623463, "grad_norm": 2.1263537406921387, "learning_rate": 5.343573977924188e-06, "loss": 1.1043, "step": 2610 }, { "epoch": 1.976158940397351, "grad_norm": 2.0377280712127686, "learning_rate": 5.3364717130622e-06, "loss": 1.0852, "step": 2611 }, { "epoch": 1.9769157994323558, "grad_norm": 1.9558144807815552, "learning_rate": 5.329372454884014e-06, "loss": 1.1432, "step": 2612 }, { "epoch": 1.9776726584673603, "grad_norm": 2.1129062175750732, "learning_rate": 5.322276207965698e-06, "loss": 1.1347, "step": 2613 }, { "epoch": 1.9784295175023652, "grad_norm": 2.042936086654663, "learning_rate": 5.315182976881382e-06, "loss": 1.1201, "step": 2614 }, { "epoch": 1.97918637653737, "grad_norm": 2.1350150108337402, "learning_rate": 5.308092766203265e-06, "loss": 1.0633, "step": 2615 }, { "epoch": 1.9799432355723745, "grad_norm": 1.984386682510376, "learning_rate": 5.301005580501579e-06, "loss": 1.1045, "step": 2616 }, { "epoch": 1.9807000946073794, "grad_norm": 1.9831804037094116, "learning_rate": 5.293921424344624e-06, "loss": 1.1376, "step": 2617 }, { "epoch": 1.981456953642384, "grad_norm": 2.1934800148010254, "learning_rate": 5.286840302298729e-06, "loss": 1.1043, "step": 2618 }, { "epoch": 1.9822138126773887, "grad_norm": 2.0572476387023926, "learning_rate": 5.2797622189282835e-06, "loss": 1.0807, "step": 2619 }, { "epoch": 1.9829706717123936, "grad_norm": 2.0445570945739746, "learning_rate": 5.272687178795715e-06, "loss": 1.1075, "step": 2620 }, { "epoch": 1.9837275307473983, "grad_norm": 2.0021796226501465, "learning_rate": 5.265615186461479e-06, "loss": 1.0971, "step": 2621 }, { "epoch": 1.984484389782403, "grad_norm": 2.107959508895874, "learning_rate": 5.258546246484077e-06, "loss": 1.1329, "step": 2622 }, { "epoch": 1.9852412488174078, "grad_norm": 1.8930269479751587, "learning_rate": 5.251480363420041e-06, "loss": 1.0927, "step": 2623 }, { "epoch": 1.9859981078524125, "grad_norm": 2.0325968265533447, "learning_rate": 5.244417541823935e-06, "loss": 1.1009, "step": 2624 }, { "epoch": 1.9867549668874172, "grad_norm": 2.3380300998687744, "learning_rate": 5.237357786248337e-06, "loss": 1.0821, "step": 2625 }, { "epoch": 1.987511825922422, "grad_norm": 2.121659517288208, "learning_rate": 5.230301101243864e-06, "loss": 1.0595, "step": 2626 }, { "epoch": 1.9882686849574267, "grad_norm": 2.329930067062378, "learning_rate": 5.22324749135915e-06, "loss": 1.1233, "step": 2627 }, { "epoch": 1.9890255439924314, "grad_norm": 2.044088840484619, "learning_rate": 5.216196961140837e-06, "loss": 1.1064, "step": 2628 }, { "epoch": 1.9897824030274363, "grad_norm": 2.201205015182495, "learning_rate": 5.209149515133593e-06, "loss": 1.1553, "step": 2629 }, { "epoch": 1.990539262062441, "grad_norm": 2.029348850250244, "learning_rate": 5.202105157880095e-06, "loss": 1.119, "step": 2630 }, { "epoch": 1.9912961210974456, "grad_norm": 2.332625150680542, "learning_rate": 5.1950638939210296e-06, "loss": 1.0767, "step": 2631 }, { "epoch": 1.9920529801324505, "grad_norm": 2.2954776287078857, "learning_rate": 5.188025727795084e-06, "loss": 1.0942, "step": 2632 }, { "epoch": 1.992809839167455, "grad_norm": 1.9560728073120117, "learning_rate": 5.180990664038954e-06, "loss": 1.0801, "step": 2633 }, { "epoch": 1.9935666982024598, "grad_norm": 2.1281726360321045, "learning_rate": 5.17395870718734e-06, "loss": 1.1274, "step": 2634 }, { "epoch": 1.9943235572374647, "grad_norm": 1.8601038455963135, "learning_rate": 5.166929861772925e-06, "loss": 1.0999, "step": 2635 }, { "epoch": 1.9950804162724691, "grad_norm": 2.056415319442749, "learning_rate": 5.159904132326399e-06, "loss": 1.0721, "step": 2636 }, { "epoch": 1.995837275307474, "grad_norm": 1.9574776887893677, "learning_rate": 5.152881523376445e-06, "loss": 1.1364, "step": 2637 }, { "epoch": 1.9965941343424787, "grad_norm": 2.011434555053711, "learning_rate": 5.145862039449723e-06, "loss": 1.1044, "step": 2638 }, { "epoch": 1.9973509933774833, "grad_norm": 1.9725828170776367, "learning_rate": 5.138845685070891e-06, "loss": 1.1425, "step": 2639 }, { "epoch": 1.9981078524124882, "grad_norm": 2.337836742401123, "learning_rate": 5.131832464762576e-06, "loss": 1.1062, "step": 2640 }, { "epoch": 1.998864711447493, "grad_norm": 2.0006585121154785, "learning_rate": 5.1248223830454e-06, "loss": 1.1117, "step": 2641 }, { "epoch": 1.9996215704824976, "grad_norm": 2.063220977783203, "learning_rate": 5.117815444437956e-06, "loss": 1.0999, "step": 2642 }, { "epoch": 2.0003784295175024, "grad_norm": 2.052854537963867, "learning_rate": 5.110811653456801e-06, "loss": 1.099, "step": 2643 }, { "epoch": 2.001135288552507, "grad_norm": 2.046462059020996, "learning_rate": 5.103811014616479e-06, "loss": 1.0824, "step": 2644 }, { "epoch": 2.0018921475875118, "grad_norm": 2.0290746688842773, "learning_rate": 5.096813532429496e-06, "loss": 1.0797, "step": 2645 }, { "epoch": 2.0026490066225167, "grad_norm": 2.0245625972747803, "learning_rate": 5.089819211406316e-06, "loss": 1.078, "step": 2646 }, { "epoch": 2.003405865657521, "grad_norm": 2.207991600036621, "learning_rate": 5.082828056055375e-06, "loss": 1.0669, "step": 2647 }, { "epoch": 2.004162724692526, "grad_norm": 2.133127212524414, "learning_rate": 5.075840070883069e-06, "loss": 1.0906, "step": 2648 }, { "epoch": 2.004919583727531, "grad_norm": 2.1217539310455322, "learning_rate": 5.068855260393739e-06, "loss": 1.1084, "step": 2649 }, { "epoch": 2.0056764427625353, "grad_norm": 2.1101694107055664, "learning_rate": 5.061873629089693e-06, "loss": 1.0727, "step": 2650 }, { "epoch": 2.00643330179754, "grad_norm": 2.0910747051239014, "learning_rate": 5.054895181471185e-06, "loss": 1.0533, "step": 2651 }, { "epoch": 2.007190160832545, "grad_norm": 1.8526837825775146, "learning_rate": 5.0479199220364085e-06, "loss": 1.0245, "step": 2652 }, { "epoch": 2.0079470198675495, "grad_norm": 2.0023531913757324, "learning_rate": 5.040947855281515e-06, "loss": 1.096, "step": 2653 }, { "epoch": 2.0087038789025544, "grad_norm": 1.9686319828033447, "learning_rate": 5.033978985700592e-06, "loss": 1.0526, "step": 2654 }, { "epoch": 2.0094607379375593, "grad_norm": 1.999506950378418, "learning_rate": 5.02701331778567e-06, "loss": 1.0875, "step": 2655 }, { "epoch": 2.0102175969725637, "grad_norm": 2.096315860748291, "learning_rate": 5.020050856026703e-06, "loss": 1.0637, "step": 2656 }, { "epoch": 2.0109744560075686, "grad_norm": 2.0473968982696533, "learning_rate": 5.013091604911594e-06, "loss": 1.1281, "step": 2657 }, { "epoch": 2.0117313150425735, "grad_norm": 2.0134975910186768, "learning_rate": 5.006135568926175e-06, "loss": 1.0641, "step": 2658 }, { "epoch": 2.012488174077578, "grad_norm": 1.9990431070327759, "learning_rate": 4.999182752554189e-06, "loss": 1.1031, "step": 2659 }, { "epoch": 2.013245033112583, "grad_norm": 2.199671983718872, "learning_rate": 4.992233160277321e-06, "loss": 1.1043, "step": 2660 }, { "epoch": 2.0140018921475873, "grad_norm": 2.0332555770874023, "learning_rate": 4.985286796575174e-06, "loss": 1.1057, "step": 2661 }, { "epoch": 2.014758751182592, "grad_norm": 2.1199121475219727, "learning_rate": 4.978343665925269e-06, "loss": 1.036, "step": 2662 }, { "epoch": 2.015515610217597, "grad_norm": 2.047947883605957, "learning_rate": 4.9714037728030415e-06, "loss": 1.0934, "step": 2663 }, { "epoch": 2.0162724692526015, "grad_norm": 1.814427137374878, "learning_rate": 4.964467121681834e-06, "loss": 1.0809, "step": 2664 }, { "epoch": 2.0170293282876064, "grad_norm": 2.189452648162842, "learning_rate": 4.957533717032911e-06, "loss": 1.0565, "step": 2665 }, { "epoch": 2.0177861873226113, "grad_norm": 2.025991201400757, "learning_rate": 4.95060356332544e-06, "loss": 1.0633, "step": 2666 }, { "epoch": 2.0185430463576157, "grad_norm": 2.3097431659698486, "learning_rate": 4.943676665026492e-06, "loss": 1.0527, "step": 2667 }, { "epoch": 2.0192999053926206, "grad_norm": 2.235900402069092, "learning_rate": 4.936753026601047e-06, "loss": 1.0878, "step": 2668 }, { "epoch": 2.0200567644276255, "grad_norm": 2.0237877368927, "learning_rate": 4.929832652511963e-06, "loss": 1.0243, "step": 2669 }, { "epoch": 2.02081362346263, "grad_norm": 2.148148536682129, "learning_rate": 4.922915547220014e-06, "loss": 1.05, "step": 2670 }, { "epoch": 2.021570482497635, "grad_norm": 2.204345464706421, "learning_rate": 4.91600171518386e-06, "loss": 1.0613, "step": 2671 }, { "epoch": 2.0223273415326397, "grad_norm": 2.05426287651062, "learning_rate": 4.909091160860053e-06, "loss": 1.0683, "step": 2672 }, { "epoch": 2.023084200567644, "grad_norm": 2.0507991313934326, "learning_rate": 4.902183888703029e-06, "loss": 1.1039, "step": 2673 }, { "epoch": 2.023841059602649, "grad_norm": 2.111011505126953, "learning_rate": 4.895279903165118e-06, "loss": 1.0708, "step": 2674 }, { "epoch": 2.024597918637654, "grad_norm": 2.152397871017456, "learning_rate": 4.888379208696516e-06, "loss": 1.135, "step": 2675 }, { "epoch": 2.0253547776726584, "grad_norm": 2.062863826751709, "learning_rate": 4.881481809745303e-06, "loss": 1.0808, "step": 2676 }, { "epoch": 2.0261116367076633, "grad_norm": 2.052548885345459, "learning_rate": 4.874587710757442e-06, "loss": 1.0729, "step": 2677 }, { "epoch": 2.026868495742668, "grad_norm": 2.1231849193573, "learning_rate": 4.8676969161767625e-06, "loss": 1.13, "step": 2678 }, { "epoch": 2.0276253547776726, "grad_norm": 1.9931443929672241, "learning_rate": 4.860809430444969e-06, "loss": 1.0863, "step": 2679 }, { "epoch": 2.0283822138126775, "grad_norm": 1.980806589126587, "learning_rate": 4.853925258001626e-06, "loss": 1.1208, "step": 2680 }, { "epoch": 2.029139072847682, "grad_norm": 2.0515875816345215, "learning_rate": 4.847044403284166e-06, "loss": 1.0301, "step": 2681 }, { "epoch": 2.029895931882687, "grad_norm": 2.115715742111206, "learning_rate": 4.840166870727887e-06, "loss": 1.0677, "step": 2682 }, { "epoch": 2.0306527909176917, "grad_norm": 1.9753094911575317, "learning_rate": 4.833292664765935e-06, "loss": 1.0814, "step": 2683 }, { "epoch": 2.031409649952696, "grad_norm": 2.0974655151367188, "learning_rate": 4.8264217898293226e-06, "loss": 1.0553, "step": 2684 }, { "epoch": 2.032166508987701, "grad_norm": 1.9045485258102417, "learning_rate": 4.8195542503469145e-06, "loss": 1.1018, "step": 2685 }, { "epoch": 2.032923368022706, "grad_norm": 2.1343581676483154, "learning_rate": 4.812690050745413e-06, "loss": 1.1279, "step": 2686 }, { "epoch": 2.0336802270577103, "grad_norm": 2.060368776321411, "learning_rate": 4.805829195449382e-06, "loss": 1.1021, "step": 2687 }, { "epoch": 2.0344370860927152, "grad_norm": 2.2184059619903564, "learning_rate": 4.798971688881224e-06, "loss": 1.0819, "step": 2688 }, { "epoch": 2.03519394512772, "grad_norm": 2.1077048778533936, "learning_rate": 4.792117535461187e-06, "loss": 1.0469, "step": 2689 }, { "epoch": 2.0359508041627246, "grad_norm": 2.105867624282837, "learning_rate": 4.7852667396073475e-06, "loss": 1.1129, "step": 2690 }, { "epoch": 2.0367076631977294, "grad_norm": 1.9376499652862549, "learning_rate": 4.7784193057356234e-06, "loss": 1.0889, "step": 2691 }, { "epoch": 2.0374645222327343, "grad_norm": 2.1429734230041504, "learning_rate": 4.771575238259769e-06, "loss": 1.06, "step": 2692 }, { "epoch": 2.0382213812677388, "grad_norm": 2.1066880226135254, "learning_rate": 4.764734541591365e-06, "loss": 1.0841, "step": 2693 }, { "epoch": 2.0389782403027437, "grad_norm": 2.034998655319214, "learning_rate": 4.757897220139822e-06, "loss": 1.1155, "step": 2694 }, { "epoch": 2.0397350993377485, "grad_norm": 2.0084969997406006, "learning_rate": 4.751063278312371e-06, "loss": 1.0365, "step": 2695 }, { "epoch": 2.040491958372753, "grad_norm": 2.260364055633545, "learning_rate": 4.744232720514074e-06, "loss": 1.0722, "step": 2696 }, { "epoch": 2.041248817407758, "grad_norm": 2.0515432357788086, "learning_rate": 4.737405551147791e-06, "loss": 1.1263, "step": 2697 }, { "epoch": 2.0420056764427623, "grad_norm": 2.0565128326416016, "learning_rate": 4.7305817746142186e-06, "loss": 1.0697, "step": 2698 }, { "epoch": 2.042762535477767, "grad_norm": 2.287740707397461, "learning_rate": 4.723761395311858e-06, "loss": 1.0616, "step": 2699 }, { "epoch": 2.043519394512772, "grad_norm": 2.0816521644592285, "learning_rate": 4.716944417637024e-06, "loss": 1.0425, "step": 2700 }, { "epoch": 2.0442762535477765, "grad_norm": 2.117865562438965, "learning_rate": 4.710130845983837e-06, "loss": 1.1141, "step": 2701 }, { "epoch": 2.0450331125827814, "grad_norm": 1.9112534523010254, "learning_rate": 4.703320684744216e-06, "loss": 1.06, "step": 2702 }, { "epoch": 2.0457899716177863, "grad_norm": 2.1456851959228516, "learning_rate": 4.696513938307894e-06, "loss": 1.0512, "step": 2703 }, { "epoch": 2.0465468306527907, "grad_norm": 2.6872496604919434, "learning_rate": 4.689710611062389e-06, "loss": 1.0907, "step": 2704 }, { "epoch": 2.0473036896877956, "grad_norm": 2.116586446762085, "learning_rate": 4.682910707393024e-06, "loss": 1.1179, "step": 2705 }, { "epoch": 2.0480605487228005, "grad_norm": 2.1400527954101562, "learning_rate": 4.676114231682915e-06, "loss": 1.0673, "step": 2706 }, { "epoch": 2.048817407757805, "grad_norm": 2.1281938552856445, "learning_rate": 4.669321188312969e-06, "loss": 1.0719, "step": 2707 }, { "epoch": 2.04957426679281, "grad_norm": 2.1432082653045654, "learning_rate": 4.662531581661873e-06, "loss": 1.0844, "step": 2708 }, { "epoch": 2.0503311258278147, "grad_norm": 1.9638357162475586, "learning_rate": 4.655745416106105e-06, "loss": 1.0379, "step": 2709 }, { "epoch": 2.051087984862819, "grad_norm": 2.069023609161377, "learning_rate": 4.648962696019928e-06, "loss": 1.0808, "step": 2710 }, { "epoch": 2.051844843897824, "grad_norm": 1.974176287651062, "learning_rate": 4.6421834257753745e-06, "loss": 1.1431, "step": 2711 }, { "epoch": 2.052601702932829, "grad_norm": 2.0162038803100586, "learning_rate": 4.635407609742265e-06, "loss": 1.0715, "step": 2712 }, { "epoch": 2.0533585619678334, "grad_norm": 2.0583693981170654, "learning_rate": 4.628635252288178e-06, "loss": 1.0583, "step": 2713 }, { "epoch": 2.0541154210028383, "grad_norm": 1.934477686882019, "learning_rate": 4.621866357778479e-06, "loss": 1.071, "step": 2714 }, { "epoch": 2.054872280037843, "grad_norm": 2.2252588272094727, "learning_rate": 4.61510093057629e-06, "loss": 1.1205, "step": 2715 }, { "epoch": 2.0556291390728476, "grad_norm": 2.0399527549743652, "learning_rate": 4.608338975042509e-06, "loss": 1.1, "step": 2716 }, { "epoch": 2.0563859981078525, "grad_norm": 2.1194961071014404, "learning_rate": 4.601580495535781e-06, "loss": 1.0566, "step": 2717 }, { "epoch": 2.057142857142857, "grad_norm": 2.1798765659332275, "learning_rate": 4.594825496412527e-06, "loss": 1.07, "step": 2718 }, { "epoch": 2.057899716177862, "grad_norm": 2.00516414642334, "learning_rate": 4.588073982026908e-06, "loss": 1.0404, "step": 2719 }, { "epoch": 2.0586565752128667, "grad_norm": 2.1549298763275146, "learning_rate": 4.581325956730851e-06, "loss": 1.0873, "step": 2720 }, { "epoch": 2.059413434247871, "grad_norm": 2.3754074573516846, "learning_rate": 4.574581424874031e-06, "loss": 1.0917, "step": 2721 }, { "epoch": 2.060170293282876, "grad_norm": 2.118363857269287, "learning_rate": 4.56784039080387e-06, "loss": 1.0864, "step": 2722 }, { "epoch": 2.060927152317881, "grad_norm": 1.9879770278930664, "learning_rate": 4.561102858865542e-06, "loss": 1.047, "step": 2723 }, { "epoch": 2.0616840113528854, "grad_norm": 2.0962250232696533, "learning_rate": 4.554368833401944e-06, "loss": 1.0803, "step": 2724 }, { "epoch": 2.0624408703878903, "grad_norm": 2.095574378967285, "learning_rate": 4.547638318753733e-06, "loss": 1.1101, "step": 2725 }, { "epoch": 2.063197729422895, "grad_norm": 2.2542734146118164, "learning_rate": 4.540911319259297e-06, "loss": 1.0672, "step": 2726 }, { "epoch": 2.0639545884578996, "grad_norm": 2.1071441173553467, "learning_rate": 4.534187839254755e-06, "loss": 1.0295, "step": 2727 }, { "epoch": 2.0647114474929045, "grad_norm": 2.2289743423461914, "learning_rate": 4.527467883073962e-06, "loss": 1.0759, "step": 2728 }, { "epoch": 2.0654683065279094, "grad_norm": 2.20210862159729, "learning_rate": 4.520751455048502e-06, "loss": 1.0952, "step": 2729 }, { "epoch": 2.066225165562914, "grad_norm": 2.0890111923217773, "learning_rate": 4.5140385595076795e-06, "loss": 1.1055, "step": 2730 }, { "epoch": 2.0669820245979187, "grad_norm": 2.0093884468078613, "learning_rate": 4.507329200778518e-06, "loss": 1.0836, "step": 2731 }, { "epoch": 2.0677388836329236, "grad_norm": 2.171649932861328, "learning_rate": 4.500623383185774e-06, "loss": 1.0794, "step": 2732 }, { "epoch": 2.068495742667928, "grad_norm": 1.8796758651733398, "learning_rate": 4.493921111051916e-06, "loss": 1.084, "step": 2733 }, { "epoch": 2.069252601702933, "grad_norm": 2.1491153240203857, "learning_rate": 4.487222388697128e-06, "loss": 1.0629, "step": 2734 }, { "epoch": 2.0700094607379373, "grad_norm": 2.3227274417877197, "learning_rate": 4.4805272204392965e-06, "loss": 1.0901, "step": 2735 }, { "epoch": 2.0707663197729422, "grad_norm": 2.256610631942749, "learning_rate": 4.47383561059403e-06, "loss": 1.1046, "step": 2736 }, { "epoch": 2.071523178807947, "grad_norm": 1.9754210710525513, "learning_rate": 4.467147563474642e-06, "loss": 1.0433, "step": 2737 }, { "epoch": 2.0722800378429516, "grad_norm": 2.2307772636413574, "learning_rate": 4.460463083392139e-06, "loss": 1.079, "step": 2738 }, { "epoch": 2.0730368968779564, "grad_norm": 1.8400083780288696, "learning_rate": 4.453782174655236e-06, "loss": 1.0615, "step": 2739 }, { "epoch": 2.0737937559129613, "grad_norm": 1.9424253702163696, "learning_rate": 4.447104841570351e-06, "loss": 1.0318, "step": 2740 }, { "epoch": 2.0745506149479658, "grad_norm": 2.008769989013672, "learning_rate": 4.440431088441582e-06, "loss": 1.0861, "step": 2741 }, { "epoch": 2.0753074739829707, "grad_norm": 2.024463415145874, "learning_rate": 4.4337609195707325e-06, "loss": 1.0406, "step": 2742 }, { "epoch": 2.0760643330179755, "grad_norm": 2.0557620525360107, "learning_rate": 4.4270943392572924e-06, "loss": 1.0983, "step": 2743 }, { "epoch": 2.07682119205298, "grad_norm": 2.7445693016052246, "learning_rate": 4.420431351798441e-06, "loss": 1.1253, "step": 2744 }, { "epoch": 2.077578051087985, "grad_norm": 2.1181790828704834, "learning_rate": 4.413771961489035e-06, "loss": 1.0808, "step": 2745 }, { "epoch": 2.0783349101229898, "grad_norm": 2.035220146179199, "learning_rate": 4.4071161726216116e-06, "loss": 1.0242, "step": 2746 }, { "epoch": 2.079091769157994, "grad_norm": 2.0690808296203613, "learning_rate": 4.4004639894863945e-06, "loss": 1.072, "step": 2747 }, { "epoch": 2.079848628192999, "grad_norm": 2.145026683807373, "learning_rate": 4.39381541637128e-06, "loss": 1.0455, "step": 2748 }, { "epoch": 2.080605487228004, "grad_norm": 1.9162312746047974, "learning_rate": 4.387170457561837e-06, "loss": 1.073, "step": 2749 }, { "epoch": 2.0813623462630084, "grad_norm": 2.1280105113983154, "learning_rate": 4.380529117341305e-06, "loss": 1.151, "step": 2750 }, { "epoch": 2.0821192052980133, "grad_norm": 2.1977860927581787, "learning_rate": 4.373891399990595e-06, "loss": 1.0732, "step": 2751 }, { "epoch": 2.0828760643330178, "grad_norm": 2.101363182067871, "learning_rate": 4.367257309788268e-06, "loss": 1.0534, "step": 2752 }, { "epoch": 2.0836329233680226, "grad_norm": 2.0512733459472656, "learning_rate": 4.360626851010562e-06, "loss": 1.0613, "step": 2753 }, { "epoch": 2.0843897824030275, "grad_norm": 2.4588279724121094, "learning_rate": 4.35400002793137e-06, "loss": 1.058, "step": 2754 }, { "epoch": 2.085146641438032, "grad_norm": 2.0685465335845947, "learning_rate": 4.347376844822242e-06, "loss": 1.0532, "step": 2755 }, { "epoch": 2.085903500473037, "grad_norm": 2.1436235904693604, "learning_rate": 4.340757305952384e-06, "loss": 1.0618, "step": 2756 }, { "epoch": 2.0866603595080417, "grad_norm": 2.1337039470672607, "learning_rate": 4.334141415588644e-06, "loss": 1.0852, "step": 2757 }, { "epoch": 2.087417218543046, "grad_norm": 1.9831031560897827, "learning_rate": 4.3275291779955245e-06, "loss": 1.1146, "step": 2758 }, { "epoch": 2.088174077578051, "grad_norm": 2.2047150135040283, "learning_rate": 4.320920597435174e-06, "loss": 1.0817, "step": 2759 }, { "epoch": 2.088930936613056, "grad_norm": 2.065563201904297, "learning_rate": 4.3143156781673846e-06, "loss": 1.1424, "step": 2760 }, { "epoch": 2.0896877956480604, "grad_norm": 2.1660046577453613, "learning_rate": 4.307714424449583e-06, "loss": 1.1019, "step": 2761 }, { "epoch": 2.0904446546830653, "grad_norm": 2.0400032997131348, "learning_rate": 4.301116840536844e-06, "loss": 1.0345, "step": 2762 }, { "epoch": 2.09120151371807, "grad_norm": 1.9771476984024048, "learning_rate": 4.29452293068186e-06, "loss": 1.0576, "step": 2763 }, { "epoch": 2.0919583727530746, "grad_norm": 2.2237024307250977, "learning_rate": 4.287932699134973e-06, "loss": 1.0808, "step": 2764 }, { "epoch": 2.0927152317880795, "grad_norm": 2.0638787746429443, "learning_rate": 4.281346150144139e-06, "loss": 1.0315, "step": 2765 }, { "epoch": 2.0934720908230844, "grad_norm": 2.370335817337036, "learning_rate": 4.27476328795495e-06, "loss": 1.1243, "step": 2766 }, { "epoch": 2.094228949858089, "grad_norm": 2.069380521774292, "learning_rate": 4.268184116810623e-06, "loss": 1.0801, "step": 2767 }, { "epoch": 2.0949858088930937, "grad_norm": 2.310030221939087, "learning_rate": 4.261608640951981e-06, "loss": 1.0666, "step": 2768 }, { "epoch": 2.0957426679280986, "grad_norm": 2.0185890197753906, "learning_rate": 4.255036864617483e-06, "loss": 1.0817, "step": 2769 }, { "epoch": 2.096499526963103, "grad_norm": 2.0323379039764404, "learning_rate": 4.248468792043194e-06, "loss": 1.057, "step": 2770 }, { "epoch": 2.097256385998108, "grad_norm": 2.362914562225342, "learning_rate": 4.241904427462797e-06, "loss": 1.0846, "step": 2771 }, { "epoch": 2.0980132450331124, "grad_norm": 2.203740119934082, "learning_rate": 4.235343775107575e-06, "loss": 1.0565, "step": 2772 }, { "epoch": 2.0987701040681173, "grad_norm": 2.006248712539673, "learning_rate": 4.22878683920643e-06, "loss": 1.1263, "step": 2773 }, { "epoch": 2.099526963103122, "grad_norm": 2.120445489883423, "learning_rate": 4.222233623985858e-06, "loss": 1.0629, "step": 2774 }, { "epoch": 2.1002838221381266, "grad_norm": 2.015179395675659, "learning_rate": 4.2156841336699625e-06, "loss": 1.0304, "step": 2775 }, { "epoch": 2.1010406811731315, "grad_norm": 2.1381468772888184, "learning_rate": 4.209138372480447e-06, "loss": 1.0669, "step": 2776 }, { "epoch": 2.1017975402081364, "grad_norm": 2.2807891368865967, "learning_rate": 4.202596344636609e-06, "loss": 1.0635, "step": 2777 }, { "epoch": 2.102554399243141, "grad_norm": 2.1004843711853027, "learning_rate": 4.196058054355347e-06, "loss": 1.1306, "step": 2778 }, { "epoch": 2.1033112582781457, "grad_norm": 2.092963695526123, "learning_rate": 4.189523505851129e-06, "loss": 1.0561, "step": 2779 }, { "epoch": 2.1040681173131506, "grad_norm": 2.0627875328063965, "learning_rate": 4.1829927033360314e-06, "loss": 1.0671, "step": 2780 }, { "epoch": 2.104824976348155, "grad_norm": 2.0852344036102295, "learning_rate": 4.17646565101971e-06, "loss": 1.056, "step": 2781 }, { "epoch": 2.10558183538316, "grad_norm": 2.0920495986938477, "learning_rate": 4.1699423531094065e-06, "loss": 1.0415, "step": 2782 }, { "epoch": 2.106338694418165, "grad_norm": 2.3179705142974854, "learning_rate": 4.163422813809934e-06, "loss": 1.0648, "step": 2783 }, { "epoch": 2.1070955534531692, "grad_norm": 2.0878725051879883, "learning_rate": 4.156907037323696e-06, "loss": 1.0996, "step": 2784 }, { "epoch": 2.107852412488174, "grad_norm": 2.1616759300231934, "learning_rate": 4.1503950278506565e-06, "loss": 1.0473, "step": 2785 }, { "epoch": 2.108609271523179, "grad_norm": 2.314814805984497, "learning_rate": 4.1438867895883555e-06, "loss": 1.0717, "step": 2786 }, { "epoch": 2.1093661305581834, "grad_norm": 2.105376958847046, "learning_rate": 4.137382326731906e-06, "loss": 1.1177, "step": 2787 }, { "epoch": 2.1101229895931883, "grad_norm": 2.18996000289917, "learning_rate": 4.130881643473987e-06, "loss": 1.0923, "step": 2788 }, { "epoch": 2.1108798486281932, "grad_norm": 2.0627288818359375, "learning_rate": 4.124384744004844e-06, "loss": 1.0307, "step": 2789 }, { "epoch": 2.1116367076631977, "grad_norm": 2.2541861534118652, "learning_rate": 4.117891632512271e-06, "loss": 1.0543, "step": 2790 }, { "epoch": 2.1123935666982026, "grad_norm": 2.0544228553771973, "learning_rate": 4.111402313181631e-06, "loss": 1.0987, "step": 2791 }, { "epoch": 2.113150425733207, "grad_norm": 2.1496474742889404, "learning_rate": 4.1049167901958454e-06, "loss": 1.0422, "step": 2792 }, { "epoch": 2.113907284768212, "grad_norm": 2.1363749504089355, "learning_rate": 4.098435067735377e-06, "loss": 1.0371, "step": 2793 }, { "epoch": 2.1146641438032168, "grad_norm": 2.166128635406494, "learning_rate": 4.091957149978247e-06, "loss": 1.056, "step": 2794 }, { "epoch": 2.115421002838221, "grad_norm": 2.3086111545562744, "learning_rate": 4.085483041100028e-06, "loss": 1.0582, "step": 2795 }, { "epoch": 2.116177861873226, "grad_norm": 2.0368103981018066, "learning_rate": 4.079012745273822e-06, "loss": 0.9679, "step": 2796 }, { "epoch": 2.116934720908231, "grad_norm": 2.146679639816284, "learning_rate": 4.072546266670289e-06, "loss": 1.0472, "step": 2797 }, { "epoch": 2.1176915799432354, "grad_norm": 2.188101291656494, "learning_rate": 4.0660836094576215e-06, "loss": 1.1283, "step": 2798 }, { "epoch": 2.1184484389782403, "grad_norm": 2.099888563156128, "learning_rate": 4.059624777801554e-06, "loss": 1.0708, "step": 2799 }, { "epoch": 2.119205298013245, "grad_norm": 2.088252544403076, "learning_rate": 4.053169775865346e-06, "loss": 1.0619, "step": 2800 }, { "epoch": 2.1199621570482496, "grad_norm": 2.0278518199920654, "learning_rate": 4.046718607809791e-06, "loss": 1.0549, "step": 2801 }, { "epoch": 2.1207190160832545, "grad_norm": 1.9221056699752808, "learning_rate": 4.040271277793217e-06, "loss": 1.0776, "step": 2802 }, { "epoch": 2.1214758751182594, "grad_norm": 2.296339511871338, "learning_rate": 4.033827789971474e-06, "loss": 1.0686, "step": 2803 }, { "epoch": 2.122232734153264, "grad_norm": 2.1365742683410645, "learning_rate": 4.027388148497936e-06, "loss": 1.0812, "step": 2804 }, { "epoch": 2.1229895931882687, "grad_norm": 1.9683605432510376, "learning_rate": 4.020952357523498e-06, "loss": 1.0168, "step": 2805 }, { "epoch": 2.1237464522232736, "grad_norm": 2.0199337005615234, "learning_rate": 4.014520421196579e-06, "loss": 1.1035, "step": 2806 }, { "epoch": 2.124503311258278, "grad_norm": 2.0269358158111572, "learning_rate": 4.008092343663094e-06, "loss": 1.0973, "step": 2807 }, { "epoch": 2.125260170293283, "grad_norm": 2.286689519882202, "learning_rate": 4.001668129066491e-06, "loss": 1.0882, "step": 2808 }, { "epoch": 2.1260170293282874, "grad_norm": 2.257807731628418, "learning_rate": 3.995247781547721e-06, "loss": 1.0877, "step": 2809 }, { "epoch": 2.1267738883632923, "grad_norm": 2.049635171890259, "learning_rate": 3.98883130524524e-06, "loss": 1.0924, "step": 2810 }, { "epoch": 2.127530747398297, "grad_norm": 2.112349033355713, "learning_rate": 3.982418704295016e-06, "loss": 1.0931, "step": 2811 }, { "epoch": 2.1282876064333016, "grad_norm": 2.0468220710754395, "learning_rate": 3.9760099828305104e-06, "loss": 1.0842, "step": 2812 }, { "epoch": 2.1290444654683065, "grad_norm": 2.0540926456451416, "learning_rate": 3.969605144982682e-06, "loss": 1.0924, "step": 2813 }, { "epoch": 2.1298013245033114, "grad_norm": 2.1668741703033447, "learning_rate": 3.963204194879998e-06, "loss": 1.1271, "step": 2814 }, { "epoch": 2.130558183538316, "grad_norm": 1.9331365823745728, "learning_rate": 3.956807136648411e-06, "loss": 1.1066, "step": 2815 }, { "epoch": 2.1313150425733207, "grad_norm": 1.9183405637741089, "learning_rate": 3.950413974411367e-06, "loss": 1.1018, "step": 2816 }, { "epoch": 2.1320719016083256, "grad_norm": 1.9769048690795898, "learning_rate": 3.944024712289805e-06, "loss": 1.0565, "step": 2817 }, { "epoch": 2.13282876064333, "grad_norm": 1.961674690246582, "learning_rate": 3.93763935440214e-06, "loss": 1.0816, "step": 2818 }, { "epoch": 2.133585619678335, "grad_norm": 2.0646157264709473, "learning_rate": 3.931257904864283e-06, "loss": 1.0373, "step": 2819 }, { "epoch": 2.13434247871334, "grad_norm": 2.243910074234009, "learning_rate": 3.92488036778961e-06, "loss": 1.0423, "step": 2820 }, { "epoch": 2.1350993377483443, "grad_norm": 2.192121744155884, "learning_rate": 3.91850674728899e-06, "loss": 1.0402, "step": 2821 }, { "epoch": 2.135856196783349, "grad_norm": 2.0529327392578125, "learning_rate": 3.912137047470764e-06, "loss": 1.0676, "step": 2822 }, { "epoch": 2.136613055818354, "grad_norm": 2.1558024883270264, "learning_rate": 3.9057712724407366e-06, "loss": 1.06, "step": 2823 }, { "epoch": 2.1373699148533585, "grad_norm": 1.8848477602005005, "learning_rate": 3.899409426302193e-06, "loss": 1.0668, "step": 2824 }, { "epoch": 2.1381267738883634, "grad_norm": 2.0971271991729736, "learning_rate": 3.893051513155881e-06, "loss": 1.1362, "step": 2825 }, { "epoch": 2.138883632923368, "grad_norm": 2.3545618057250977, "learning_rate": 3.88669753710002e-06, "loss": 1.1014, "step": 2826 }, { "epoch": 2.1396404919583727, "grad_norm": 2.0267715454101562, "learning_rate": 3.880347502230277e-06, "loss": 1.0955, "step": 2827 }, { "epoch": 2.1403973509933776, "grad_norm": 2.065638780593872, "learning_rate": 3.874001412639796e-06, "loss": 1.0732, "step": 2828 }, { "epoch": 2.141154210028382, "grad_norm": 2.221348285675049, "learning_rate": 3.867659272419163e-06, "loss": 1.0891, "step": 2829 }, { "epoch": 2.141911069063387, "grad_norm": 2.0352323055267334, "learning_rate": 3.861321085656425e-06, "loss": 1.0615, "step": 2830 }, { "epoch": 2.142667928098392, "grad_norm": 2.294567584991455, "learning_rate": 3.854986856437086e-06, "loss": 1.0886, "step": 2831 }, { "epoch": 2.1434247871333962, "grad_norm": 2.132350444793701, "learning_rate": 3.848656588844089e-06, "loss": 1.0932, "step": 2832 }, { "epoch": 2.144181646168401, "grad_norm": 2.0099170207977295, "learning_rate": 3.842330286957837e-06, "loss": 1.1081, "step": 2833 }, { "epoch": 2.144938505203406, "grad_norm": 2.181610584259033, "learning_rate": 3.836007954856154e-06, "loss": 1.1125, "step": 2834 }, { "epoch": 2.1456953642384105, "grad_norm": 2.204340934753418, "learning_rate": 3.829689596614324e-06, "loss": 1.1074, "step": 2835 }, { "epoch": 2.1464522232734153, "grad_norm": 2.0872256755828857, "learning_rate": 3.823375216305066e-06, "loss": 1.0709, "step": 2836 }, { "epoch": 2.1472090823084202, "grad_norm": 2.192131757736206, "learning_rate": 3.8170648179985324e-06, "loss": 1.0539, "step": 2837 }, { "epoch": 2.1479659413434247, "grad_norm": 1.9136378765106201, "learning_rate": 3.810758405762311e-06, "loss": 1.0931, "step": 2838 }, { "epoch": 2.1487228003784296, "grad_norm": 2.390619993209839, "learning_rate": 3.8044559836614203e-06, "loss": 1.0645, "step": 2839 }, { "epoch": 2.1494796594134344, "grad_norm": 2.138697862625122, "learning_rate": 3.798157555758304e-06, "loss": 1.084, "step": 2840 }, { "epoch": 2.150236518448439, "grad_norm": 1.9692342281341553, "learning_rate": 3.791863126112828e-06, "loss": 1.0735, "step": 2841 }, { "epoch": 2.1509933774834438, "grad_norm": 2.02774715423584, "learning_rate": 3.78557269878229e-06, "loss": 1.0853, "step": 2842 }, { "epoch": 2.151750236518448, "grad_norm": 1.9480324983596802, "learning_rate": 3.779286277821402e-06, "loss": 1.0504, "step": 2843 }, { "epoch": 2.152507095553453, "grad_norm": 2.0497875213623047, "learning_rate": 3.773003867282301e-06, "loss": 1.0475, "step": 2844 }, { "epoch": 2.153263954588458, "grad_norm": 2.0127995014190674, "learning_rate": 3.766725471214524e-06, "loss": 1.0773, "step": 2845 }, { "epoch": 2.1540208136234624, "grad_norm": 1.920920729637146, "learning_rate": 3.760451093665034e-06, "loss": 1.0747, "step": 2846 }, { "epoch": 2.1547776726584673, "grad_norm": 1.9610087871551514, "learning_rate": 3.754180738678201e-06, "loss": 1.0929, "step": 2847 }, { "epoch": 2.155534531693472, "grad_norm": 2.1988742351531982, "learning_rate": 3.7479144102957955e-06, "loss": 1.0426, "step": 2848 }, { "epoch": 2.1562913907284766, "grad_norm": 2.0719704627990723, "learning_rate": 3.7416521125569987e-06, "loss": 1.0965, "step": 2849 }, { "epoch": 2.1570482497634815, "grad_norm": 2.073084592819214, "learning_rate": 3.7353938494983966e-06, "loss": 1.0428, "step": 2850 }, { "epoch": 2.1578051087984864, "grad_norm": 1.9803792238235474, "learning_rate": 3.729139625153964e-06, "loss": 1.0724, "step": 2851 }, { "epoch": 2.158561967833491, "grad_norm": 1.9936349391937256, "learning_rate": 3.72288944355508e-06, "loss": 1.0278, "step": 2852 }, { "epoch": 2.1593188268684957, "grad_norm": 2.1690564155578613, "learning_rate": 3.7166433087305177e-06, "loss": 1.044, "step": 2853 }, { "epoch": 2.1600756859035006, "grad_norm": 2.125483512878418, "learning_rate": 3.7104012247064436e-06, "loss": 1.0493, "step": 2854 }, { "epoch": 2.160832544938505, "grad_norm": 2.031766653060913, "learning_rate": 3.7041631955064067e-06, "loss": 1.0746, "step": 2855 }, { "epoch": 2.16158940397351, "grad_norm": 2.1385655403137207, "learning_rate": 3.697929225151341e-06, "loss": 1.0993, "step": 2856 }, { "epoch": 2.162346263008515, "grad_norm": 2.363760471343994, "learning_rate": 3.691699317659574e-06, "loss": 1.0544, "step": 2857 }, { "epoch": 2.1631031220435193, "grad_norm": 2.0311970710754395, "learning_rate": 3.685473477046807e-06, "loss": 1.0244, "step": 2858 }, { "epoch": 2.163859981078524, "grad_norm": 2.2926740646362305, "learning_rate": 3.679251707326123e-06, "loss": 1.0813, "step": 2859 }, { "epoch": 2.164616840113529, "grad_norm": 2.1094629764556885, "learning_rate": 3.6730340125079804e-06, "loss": 1.0729, "step": 2860 }, { "epoch": 2.1653736991485335, "grad_norm": 2.2575571537017822, "learning_rate": 3.6668203966002157e-06, "loss": 1.1031, "step": 2861 }, { "epoch": 2.1661305581835384, "grad_norm": 2.2293882369995117, "learning_rate": 3.660610863608018e-06, "loss": 1.0676, "step": 2862 }, { "epoch": 2.1668874172185433, "grad_norm": 2.214388132095337, "learning_rate": 3.6544054175339655e-06, "loss": 1.1499, "step": 2863 }, { "epoch": 2.1676442762535477, "grad_norm": 1.9468921422958374, "learning_rate": 3.6482040623779925e-06, "loss": 1.0726, "step": 2864 }, { "epoch": 2.1684011352885526, "grad_norm": 2.0682532787323, "learning_rate": 3.642006802137399e-06, "loss": 1.0695, "step": 2865 }, { "epoch": 2.169157994323557, "grad_norm": 2.1253714561462402, "learning_rate": 3.6358136408068475e-06, "loss": 1.116, "step": 2866 }, { "epoch": 2.169914853358562, "grad_norm": 2.113579511642456, "learning_rate": 3.6296245823783514e-06, "loss": 1.0874, "step": 2867 }, { "epoch": 2.170671712393567, "grad_norm": 1.9568238258361816, "learning_rate": 3.623439630841282e-06, "loss": 1.0363, "step": 2868 }, { "epoch": 2.1714285714285713, "grad_norm": 2.3202977180480957, "learning_rate": 3.6172587901823652e-06, "loss": 1.0881, "step": 2869 }, { "epoch": 2.172185430463576, "grad_norm": 2.232671022415161, "learning_rate": 3.611082064385679e-06, "loss": 1.0919, "step": 2870 }, { "epoch": 2.172942289498581, "grad_norm": 1.9573837518692017, "learning_rate": 3.6049094574326453e-06, "loss": 1.017, "step": 2871 }, { "epoch": 2.1736991485335855, "grad_norm": 2.110637664794922, "learning_rate": 3.598740973302036e-06, "loss": 1.1066, "step": 2872 }, { "epoch": 2.1744560075685904, "grad_norm": 2.0418527126312256, "learning_rate": 3.592576615969956e-06, "loss": 1.0607, "step": 2873 }, { "epoch": 2.1752128666035953, "grad_norm": 2.177208662033081, "learning_rate": 3.5864163894098624e-06, "loss": 1.0724, "step": 2874 }, { "epoch": 2.1759697256385997, "grad_norm": 2.2924139499664307, "learning_rate": 3.580260297592535e-06, "loss": 1.0593, "step": 2875 }, { "epoch": 2.1767265846736046, "grad_norm": 2.0772855281829834, "learning_rate": 3.574108344486102e-06, "loss": 1.0661, "step": 2876 }, { "epoch": 2.1774834437086095, "grad_norm": 2.265467405319214, "learning_rate": 3.5679605340560187e-06, "loss": 1.0569, "step": 2877 }, { "epoch": 2.178240302743614, "grad_norm": 2.154500722885132, "learning_rate": 3.5618168702650713e-06, "loss": 1.0332, "step": 2878 }, { "epoch": 2.178997161778619, "grad_norm": 2.0559258460998535, "learning_rate": 3.5556773570733666e-06, "loss": 1.0697, "step": 2879 }, { "epoch": 2.1797540208136237, "grad_norm": 2.4780728816986084, "learning_rate": 3.5495419984383452e-06, "loss": 1.0525, "step": 2880 }, { "epoch": 2.180510879848628, "grad_norm": 2.0388307571411133, "learning_rate": 3.543410798314767e-06, "loss": 1.0224, "step": 2881 }, { "epoch": 2.181267738883633, "grad_norm": 2.0700438022613525, "learning_rate": 3.5372837606547056e-06, "loss": 1.0795, "step": 2882 }, { "epoch": 2.1820245979186375, "grad_norm": 2.08799409866333, "learning_rate": 3.5311608894075606e-06, "loss": 1.1147, "step": 2883 }, { "epoch": 2.1827814569536423, "grad_norm": 2.04353928565979, "learning_rate": 3.5250421885200357e-06, "loss": 1.0693, "step": 2884 }, { "epoch": 2.1835383159886472, "grad_norm": 2.1684114933013916, "learning_rate": 3.5189276619361567e-06, "loss": 1.0844, "step": 2885 }, { "epoch": 2.1842951750236517, "grad_norm": 2.2811787128448486, "learning_rate": 3.5128173135972515e-06, "loss": 1.0921, "step": 2886 }, { "epoch": 2.1850520340586566, "grad_norm": 2.293611764907837, "learning_rate": 3.5067111474419603e-06, "loss": 1.1276, "step": 2887 }, { "epoch": 2.1858088930936614, "grad_norm": 1.9369990825653076, "learning_rate": 3.5006091674062263e-06, "loss": 1.0811, "step": 2888 }, { "epoch": 2.186565752128666, "grad_norm": 2.1612861156463623, "learning_rate": 3.494511377423291e-06, "loss": 1.0987, "step": 2889 }, { "epoch": 2.1873226111636708, "grad_norm": 2.301436424255371, "learning_rate": 3.488417781423691e-06, "loss": 1.1224, "step": 2890 }, { "epoch": 2.1880794701986757, "grad_norm": 2.149083375930786, "learning_rate": 3.482328383335271e-06, "loss": 1.0906, "step": 2891 }, { "epoch": 2.18883632923368, "grad_norm": 2.4687178134918213, "learning_rate": 3.4762431870831625e-06, "loss": 1.1381, "step": 2892 }, { "epoch": 2.189593188268685, "grad_norm": 2.131269693374634, "learning_rate": 3.4701621965897906e-06, "loss": 1.0644, "step": 2893 }, { "epoch": 2.19035004730369, "grad_norm": 2.0854032039642334, "learning_rate": 3.464085415774874e-06, "loss": 1.0703, "step": 2894 }, { "epoch": 2.1911069063386943, "grad_norm": 1.988800287246704, "learning_rate": 3.458012848555407e-06, "loss": 1.0925, "step": 2895 }, { "epoch": 2.191863765373699, "grad_norm": 2.0683155059814453, "learning_rate": 3.451944498845673e-06, "loss": 1.1212, "step": 2896 }, { "epoch": 2.192620624408704, "grad_norm": 2.1208488941192627, "learning_rate": 3.4458803705572385e-06, "loss": 1.0917, "step": 2897 }, { "epoch": 2.1933774834437085, "grad_norm": 1.9864528179168701, "learning_rate": 3.4398204675989504e-06, "loss": 1.1095, "step": 2898 }, { "epoch": 2.1941343424787134, "grad_norm": 2.0708682537078857, "learning_rate": 3.4337647938769283e-06, "loss": 1.0989, "step": 2899 }, { "epoch": 2.194891201513718, "grad_norm": 2.229597330093384, "learning_rate": 3.4277133532945704e-06, "loss": 1.1137, "step": 2900 }, { "epoch": 2.1956480605487227, "grad_norm": 2.039870500564575, "learning_rate": 3.4216661497525372e-06, "loss": 1.0866, "step": 2901 }, { "epoch": 2.1964049195837276, "grad_norm": 2.037367582321167, "learning_rate": 3.4156231871487706e-06, "loss": 1.0947, "step": 2902 }, { "epoch": 2.197161778618732, "grad_norm": 2.3312087059020996, "learning_rate": 3.4095844693784647e-06, "loss": 1.0883, "step": 2903 }, { "epoch": 2.197918637653737, "grad_norm": 2.1165080070495605, "learning_rate": 3.4035500003340886e-06, "loss": 1.07, "step": 2904 }, { "epoch": 2.198675496688742, "grad_norm": 2.1637613773345947, "learning_rate": 3.3975197839053727e-06, "loss": 1.0012, "step": 2905 }, { "epoch": 2.1994323557237463, "grad_norm": 2.1280291080474854, "learning_rate": 3.3914938239792956e-06, "loss": 1.0525, "step": 2906 }, { "epoch": 2.200189214758751, "grad_norm": 2.1883440017700195, "learning_rate": 3.385472124440102e-06, "loss": 1.0466, "step": 2907 }, { "epoch": 2.200946073793756, "grad_norm": 2.120882272720337, "learning_rate": 3.3794546891692883e-06, "loss": 1.0268, "step": 2908 }, { "epoch": 2.2017029328287605, "grad_norm": 2.141380786895752, "learning_rate": 3.3734415220456036e-06, "loss": 1.0695, "step": 2909 }, { "epoch": 2.2024597918637654, "grad_norm": 1.9636356830596924, "learning_rate": 3.3674326269450386e-06, "loss": 1.035, "step": 2910 }, { "epoch": 2.2032166508987703, "grad_norm": 2.227339506149292, "learning_rate": 3.361428007740842e-06, "loss": 1.1143, "step": 2911 }, { "epoch": 2.2039735099337747, "grad_norm": 2.206693172454834, "learning_rate": 3.3554276683034933e-06, "loss": 1.075, "step": 2912 }, { "epoch": 2.2047303689687796, "grad_norm": 2.3205721378326416, "learning_rate": 3.349431612500721e-06, "loss": 1.0599, "step": 2913 }, { "epoch": 2.2054872280037845, "grad_norm": 2.0222678184509277, "learning_rate": 3.343439844197493e-06, "loss": 1.083, "step": 2914 }, { "epoch": 2.206244087038789, "grad_norm": 2.076840400695801, "learning_rate": 3.337452367256012e-06, "loss": 1.0306, "step": 2915 }, { "epoch": 2.207000946073794, "grad_norm": 1.9618786573410034, "learning_rate": 3.3314691855357197e-06, "loss": 1.1021, "step": 2916 }, { "epoch": 2.2077578051087983, "grad_norm": 2.168519973754883, "learning_rate": 3.3254903028932716e-06, "loss": 1.007, "step": 2917 }, { "epoch": 2.208514664143803, "grad_norm": 2.0127992630004883, "learning_rate": 3.3195157231825704e-06, "loss": 1.0797, "step": 2918 }, { "epoch": 2.209271523178808, "grad_norm": 2.0020880699157715, "learning_rate": 3.3135454502547397e-06, "loss": 1.1154, "step": 2919 }, { "epoch": 2.2100283822138125, "grad_norm": 1.9836198091506958, "learning_rate": 3.307579487958125e-06, "loss": 1.0418, "step": 2920 }, { "epoch": 2.2107852412488174, "grad_norm": 1.9691238403320312, "learning_rate": 3.3016178401382957e-06, "loss": 1.1094, "step": 2921 }, { "epoch": 2.2115421002838223, "grad_norm": 2.1438305377960205, "learning_rate": 3.2956605106380464e-06, "loss": 1.0935, "step": 2922 }, { "epoch": 2.2122989593188267, "grad_norm": 2.1357624530792236, "learning_rate": 3.2897075032973656e-06, "loss": 1.1033, "step": 2923 }, { "epoch": 2.2130558183538316, "grad_norm": 2.027420997619629, "learning_rate": 3.28375882195348e-06, "loss": 1.0502, "step": 2924 }, { "epoch": 2.2138126773888365, "grad_norm": 2.073096513748169, "learning_rate": 3.2778144704408167e-06, "loss": 1.0565, "step": 2925 }, { "epoch": 2.214569536423841, "grad_norm": 2.12164306640625, "learning_rate": 3.271874452591015e-06, "loss": 1.103, "step": 2926 }, { "epoch": 2.215326395458846, "grad_norm": 2.0933268070220947, "learning_rate": 3.2659387722329226e-06, "loss": 1.0776, "step": 2927 }, { "epoch": 2.2160832544938507, "grad_norm": 2.033733367919922, "learning_rate": 3.2600074331925834e-06, "loss": 1.0642, "step": 2928 }, { "epoch": 2.216840113528855, "grad_norm": 1.951857328414917, "learning_rate": 3.2540804392932527e-06, "loss": 1.0956, "step": 2929 }, { "epoch": 2.21759697256386, "grad_norm": 2.0612125396728516, "learning_rate": 3.2481577943553766e-06, "loss": 1.0756, "step": 2930 }, { "epoch": 2.218353831598865, "grad_norm": 1.9757081270217896, "learning_rate": 3.2422395021966006e-06, "loss": 1.0937, "step": 2931 }, { "epoch": 2.2191106906338693, "grad_norm": 1.9480013847351074, "learning_rate": 3.2363255666317706e-06, "loss": 1.0986, "step": 2932 }, { "epoch": 2.2198675496688742, "grad_norm": 2.087038040161133, "learning_rate": 3.2304159914729194e-06, "loss": 1.0518, "step": 2933 }, { "epoch": 2.2206244087038787, "grad_norm": 2.119804859161377, "learning_rate": 3.2245107805292625e-06, "loss": 1.0356, "step": 2934 }, { "epoch": 2.2213812677388836, "grad_norm": 2.2135863304138184, "learning_rate": 3.2186099376072133e-06, "loss": 1.0988, "step": 2935 }, { "epoch": 2.2221381267738884, "grad_norm": 1.9379045963287354, "learning_rate": 3.2127134665103684e-06, "loss": 1.0762, "step": 2936 }, { "epoch": 2.222894985808893, "grad_norm": 2.078213691711426, "learning_rate": 3.206821371039495e-06, "loss": 1.1031, "step": 2937 }, { "epoch": 2.2236518448438978, "grad_norm": 1.7914735078811646, "learning_rate": 3.2009336549925558e-06, "loss": 1.0699, "step": 2938 }, { "epoch": 2.2244087038789027, "grad_norm": 2.0808887481689453, "learning_rate": 3.195050322164676e-06, "loss": 1.0362, "step": 2939 }, { "epoch": 2.225165562913907, "grad_norm": 2.2079849243164062, "learning_rate": 3.1891713763481664e-06, "loss": 1.0544, "step": 2940 }, { "epoch": 2.225922421948912, "grad_norm": 2.137425184249878, "learning_rate": 3.1832968213325056e-06, "loss": 1.0465, "step": 2941 }, { "epoch": 2.226679280983917, "grad_norm": 2.0021207332611084, "learning_rate": 3.177426660904339e-06, "loss": 1.0756, "step": 2942 }, { "epoch": 2.2274361400189213, "grad_norm": 2.105543851852417, "learning_rate": 3.1715608988474904e-06, "loss": 1.0771, "step": 2943 }, { "epoch": 2.228192999053926, "grad_norm": 1.925067663192749, "learning_rate": 3.1656995389429347e-06, "loss": 1.0919, "step": 2944 }, { "epoch": 2.228949858088931, "grad_norm": 2.076474905014038, "learning_rate": 3.159842584968813e-06, "loss": 1.0684, "step": 2945 }, { "epoch": 2.2297067171239355, "grad_norm": 2.0177693367004395, "learning_rate": 3.15399004070043e-06, "loss": 1.1303, "step": 2946 }, { "epoch": 2.2304635761589404, "grad_norm": 1.9716448783874512, "learning_rate": 3.1481419099102477e-06, "loss": 1.0174, "step": 2947 }, { "epoch": 2.2312204351939453, "grad_norm": 2.0462799072265625, "learning_rate": 3.1422981963678823e-06, "loss": 1.0466, "step": 2948 }, { "epoch": 2.2319772942289497, "grad_norm": 2.242594003677368, "learning_rate": 3.1364589038401055e-06, "loss": 1.0786, "step": 2949 }, { "epoch": 2.2327341532639546, "grad_norm": 2.011847972869873, "learning_rate": 3.1306240360908325e-06, "loss": 1.0735, "step": 2950 }, { "epoch": 2.2334910122989595, "grad_norm": 1.8826643228530884, "learning_rate": 3.124793596881128e-06, "loss": 1.105, "step": 2951 }, { "epoch": 2.234247871333964, "grad_norm": 2.173900604248047, "learning_rate": 3.118967589969205e-06, "loss": 1.0665, "step": 2952 }, { "epoch": 2.235004730368969, "grad_norm": 2.065894365310669, "learning_rate": 3.1131460191104214e-06, "loss": 1.1092, "step": 2953 }, { "epoch": 2.2357615894039737, "grad_norm": 2.1508965492248535, "learning_rate": 3.107328888057271e-06, "loss": 1.0692, "step": 2954 }, { "epoch": 2.236518448438978, "grad_norm": 2.2163479328155518, "learning_rate": 3.1015162005593918e-06, "loss": 1.1466, "step": 2955 }, { "epoch": 2.237275307473983, "grad_norm": 2.0771398544311523, "learning_rate": 3.095707960363548e-06, "loss": 1.1392, "step": 2956 }, { "epoch": 2.2380321665089875, "grad_norm": 2.0793957710266113, "learning_rate": 3.0899041712136474e-06, "loss": 1.056, "step": 2957 }, { "epoch": 2.2387890255439924, "grad_norm": 2.296447277069092, "learning_rate": 3.084104836850719e-06, "loss": 1.0637, "step": 2958 }, { "epoch": 2.2395458845789973, "grad_norm": 2.2935092449188232, "learning_rate": 3.0783099610129273e-06, "loss": 1.0821, "step": 2959 }, { "epoch": 2.2403027436140017, "grad_norm": 2.17787766456604, "learning_rate": 3.0725195474355648e-06, "loss": 1.0933, "step": 2960 }, { "epoch": 2.2410596026490066, "grad_norm": 2.0003387928009033, "learning_rate": 3.066733599851038e-06, "loss": 1.0357, "step": 2961 }, { "epoch": 2.2418164616840115, "grad_norm": 2.4547882080078125, "learning_rate": 3.060952121988881e-06, "loss": 1.1288, "step": 2962 }, { "epoch": 2.242573320719016, "grad_norm": 2.319946765899658, "learning_rate": 3.055175117575754e-06, "loss": 1.09, "step": 2963 }, { "epoch": 2.243330179754021, "grad_norm": 2.2627015113830566, "learning_rate": 3.049402590335415e-06, "loss": 1.1238, "step": 2964 }, { "epoch": 2.2440870387890257, "grad_norm": 2.032540798187256, "learning_rate": 3.043634543988752e-06, "loss": 1.0622, "step": 2965 }, { "epoch": 2.24484389782403, "grad_norm": 2.041095495223999, "learning_rate": 3.037870982253763e-06, "loss": 1.0729, "step": 2966 }, { "epoch": 2.245600756859035, "grad_norm": 2.079834222793579, "learning_rate": 3.032111908845547e-06, "loss": 1.0541, "step": 2967 }, { "epoch": 2.24635761589404, "grad_norm": 1.9280726909637451, "learning_rate": 3.0263573274763165e-06, "loss": 1.0715, "step": 2968 }, { "epoch": 2.2471144749290444, "grad_norm": 1.9036996364593506, "learning_rate": 3.0206072418553854e-06, "loss": 1.0914, "step": 2969 }, { "epoch": 2.2478713339640493, "grad_norm": 1.9973169565200806, "learning_rate": 3.0148616556891774e-06, "loss": 1.0612, "step": 2970 }, { "epoch": 2.248628192999054, "grad_norm": 2.048168420791626, "learning_rate": 3.009120572681206e-06, "loss": 1.0573, "step": 2971 }, { "epoch": 2.2493850520340586, "grad_norm": 1.9385312795639038, "learning_rate": 3.0033839965320797e-06, "loss": 1.0503, "step": 2972 }, { "epoch": 2.2501419110690635, "grad_norm": 1.8759933710098267, "learning_rate": 2.9976519309395154e-06, "loss": 1.0739, "step": 2973 }, { "epoch": 2.250898770104068, "grad_norm": 2.2850966453552246, "learning_rate": 2.9919243795983116e-06, "loss": 1.0669, "step": 2974 }, { "epoch": 2.251655629139073, "grad_norm": 2.017787218093872, "learning_rate": 2.9862013462003634e-06, "loss": 1.0993, "step": 2975 }, { "epoch": 2.2524124881740777, "grad_norm": 1.9540081024169922, "learning_rate": 2.980482834434648e-06, "loss": 1.116, "step": 2976 }, { "epoch": 2.253169347209082, "grad_norm": 2.056605577468872, "learning_rate": 2.974768847987239e-06, "loss": 1.0612, "step": 2977 }, { "epoch": 2.253926206244087, "grad_norm": 2.3890209197998047, "learning_rate": 2.969059390541273e-06, "loss": 1.0817, "step": 2978 }, { "epoch": 2.254683065279092, "grad_norm": 1.947478175163269, "learning_rate": 2.963354465776983e-06, "loss": 1.0236, "step": 2979 }, { "epoch": 2.2554399243140963, "grad_norm": 2.1459438800811768, "learning_rate": 2.9576540773716783e-06, "loss": 1.0725, "step": 2980 }, { "epoch": 2.2561967833491012, "grad_norm": 2.0048837661743164, "learning_rate": 2.9519582289997423e-06, "loss": 1.1, "step": 2981 }, { "epoch": 2.256953642384106, "grad_norm": 2.1805686950683594, "learning_rate": 2.9462669243326357e-06, "loss": 1.0963, "step": 2982 }, { "epoch": 2.2577105014191106, "grad_norm": 1.9871695041656494, "learning_rate": 2.9405801670388784e-06, "loss": 1.1184, "step": 2983 }, { "epoch": 2.2584673604541154, "grad_norm": 2.142199754714966, "learning_rate": 2.934897960784075e-06, "loss": 1.0546, "step": 2984 }, { "epoch": 2.2592242194891203, "grad_norm": 2.1177968978881836, "learning_rate": 2.9292203092308823e-06, "loss": 1.1136, "step": 2985 }, { "epoch": 2.2599810785241248, "grad_norm": 2.1006743907928467, "learning_rate": 2.9235472160390315e-06, "loss": 1.0091, "step": 2986 }, { "epoch": 2.2607379375591297, "grad_norm": 1.978402853012085, "learning_rate": 2.917878684865312e-06, "loss": 1.0677, "step": 2987 }, { "epoch": 2.2614947965941345, "grad_norm": 2.2147839069366455, "learning_rate": 2.9122147193635757e-06, "loss": 1.0809, "step": 2988 }, { "epoch": 2.262251655629139, "grad_norm": 2.2743515968322754, "learning_rate": 2.9065553231847215e-06, "loss": 1.0647, "step": 2989 }, { "epoch": 2.263008514664144, "grad_norm": 2.038224935531616, "learning_rate": 2.900900499976714e-06, "loss": 1.0953, "step": 2990 }, { "epoch": 2.2637653736991483, "grad_norm": 1.9364351034164429, "learning_rate": 2.895250253384567e-06, "loss": 1.0241, "step": 2991 }, { "epoch": 2.264522232734153, "grad_norm": 2.0556015968322754, "learning_rate": 2.8896045870503405e-06, "loss": 1.0358, "step": 2992 }, { "epoch": 2.265279091769158, "grad_norm": 2.6211061477661133, "learning_rate": 2.8839635046131477e-06, "loss": 1.058, "step": 2993 }, { "epoch": 2.2660359508041625, "grad_norm": 2.0403685569763184, "learning_rate": 2.87832700970914e-06, "loss": 1.0957, "step": 2994 }, { "epoch": 2.2667928098391674, "grad_norm": 2.106076955795288, "learning_rate": 2.8726951059715184e-06, "loss": 1.0622, "step": 2995 }, { "epoch": 2.2675496688741723, "grad_norm": 1.878516674041748, "learning_rate": 2.867067797030522e-06, "loss": 1.0636, "step": 2996 }, { "epoch": 2.2683065279091768, "grad_norm": 2.178928852081299, "learning_rate": 2.861445086513431e-06, "loss": 1.0347, "step": 2997 }, { "epoch": 2.2690633869441816, "grad_norm": 2.5624477863311768, "learning_rate": 2.855826978044558e-06, "loss": 1.0171, "step": 2998 }, { "epoch": 2.2698202459791865, "grad_norm": 1.9493463039398193, "learning_rate": 2.8502134752452488e-06, "loss": 1.0763, "step": 2999 }, { "epoch": 2.270577105014191, "grad_norm": 1.9162508249282837, "learning_rate": 2.844604581733879e-06, "loss": 1.1071, "step": 3000 }, { "epoch": 2.271333964049196, "grad_norm": 2.097134828567505, "learning_rate": 2.8390003011258576e-06, "loss": 1.0987, "step": 3001 }, { "epoch": 2.2720908230842007, "grad_norm": 2.138456106185913, "learning_rate": 2.83340063703362e-06, "loss": 1.0962, "step": 3002 }, { "epoch": 2.272847682119205, "grad_norm": 2.057185411453247, "learning_rate": 2.8278055930666243e-06, "loss": 1.0849, "step": 3003 }, { "epoch": 2.27360454115421, "grad_norm": 2.094721555709839, "learning_rate": 2.822215172831354e-06, "loss": 1.1004, "step": 3004 }, { "epoch": 2.274361400189215, "grad_norm": 2.1358296871185303, "learning_rate": 2.8166293799312994e-06, "loss": 1.0583, "step": 3005 }, { "epoch": 2.2751182592242194, "grad_norm": 1.955474615097046, "learning_rate": 2.8110482179669823e-06, "loss": 1.0828, "step": 3006 }, { "epoch": 2.2758751182592243, "grad_norm": 2.084411144256592, "learning_rate": 2.805471690535935e-06, "loss": 1.0635, "step": 3007 }, { "epoch": 2.2766319772942287, "grad_norm": 2.111748218536377, "learning_rate": 2.799899801232702e-06, "loss": 1.0604, "step": 3008 }, { "epoch": 2.2773888363292336, "grad_norm": 2.2352616786956787, "learning_rate": 2.7943325536488373e-06, "loss": 1.1397, "step": 3009 }, { "epoch": 2.2781456953642385, "grad_norm": 2.1407878398895264, "learning_rate": 2.788769951372908e-06, "loss": 1.083, "step": 3010 }, { "epoch": 2.2789025543992434, "grad_norm": 2.0809216499328613, "learning_rate": 2.7832119979904798e-06, "loss": 1.0496, "step": 3011 }, { "epoch": 2.279659413434248, "grad_norm": 2.0093045234680176, "learning_rate": 2.77765869708412e-06, "loss": 1.0543, "step": 3012 }, { "epoch": 2.2804162724692527, "grad_norm": 1.9954379796981812, "learning_rate": 2.7721100522334056e-06, "loss": 1.1095, "step": 3013 }, { "epoch": 2.281173131504257, "grad_norm": 2.1180033683776855, "learning_rate": 2.7665660670149092e-06, "loss": 1.0982, "step": 3014 }, { "epoch": 2.281929990539262, "grad_norm": 2.0234453678131104, "learning_rate": 2.761026745002201e-06, "loss": 1.0808, "step": 3015 }, { "epoch": 2.282686849574267, "grad_norm": 2.1585819721221924, "learning_rate": 2.7554920897658386e-06, "loss": 1.0775, "step": 3016 }, { "epoch": 2.2834437086092714, "grad_norm": 1.9864180088043213, "learning_rate": 2.7499621048733775e-06, "loss": 1.1177, "step": 3017 }, { "epoch": 2.2842005676442763, "grad_norm": 2.359938621520996, "learning_rate": 2.744436793889368e-06, "loss": 1.0951, "step": 3018 }, { "epoch": 2.284957426679281, "grad_norm": 2.0253729820251465, "learning_rate": 2.7389161603753312e-06, "loss": 1.0414, "step": 3019 }, { "epoch": 2.2857142857142856, "grad_norm": 1.9887871742248535, "learning_rate": 2.73340020788979e-06, "loss": 1.0882, "step": 3020 }, { "epoch": 2.2864711447492905, "grad_norm": 1.971255898475647, "learning_rate": 2.7278889399882435e-06, "loss": 1.0867, "step": 3021 }, { "epoch": 2.2872280037842954, "grad_norm": 2.0510776042938232, "learning_rate": 2.7223823602231664e-06, "loss": 1.0738, "step": 3022 }, { "epoch": 2.2879848628193, "grad_norm": 1.953727126121521, "learning_rate": 2.7168804721440177e-06, "loss": 1.0262, "step": 3023 }, { "epoch": 2.2887417218543047, "grad_norm": 2.0659303665161133, "learning_rate": 2.7113832792972323e-06, "loss": 1.0748, "step": 3024 }, { "epoch": 2.289498580889309, "grad_norm": 2.147465229034424, "learning_rate": 2.705890785226219e-06, "loss": 1.0691, "step": 3025 }, { "epoch": 2.290255439924314, "grad_norm": 2.1003715991973877, "learning_rate": 2.7004029934713516e-06, "loss": 1.0623, "step": 3026 }, { "epoch": 2.291012298959319, "grad_norm": 1.9521448612213135, "learning_rate": 2.6949199075699754e-06, "loss": 1.1025, "step": 3027 }, { "epoch": 2.291769157994324, "grad_norm": 1.998204231262207, "learning_rate": 2.689441531056408e-06, "loss": 1.0875, "step": 3028 }, { "epoch": 2.2925260170293282, "grad_norm": 1.98150634765625, "learning_rate": 2.683967867461925e-06, "loss": 1.1051, "step": 3029 }, { "epoch": 2.293282876064333, "grad_norm": 2.0418317317962646, "learning_rate": 2.678498920314767e-06, "loss": 1.0871, "step": 3030 }, { "epoch": 2.2940397350993376, "grad_norm": 2.075697898864746, "learning_rate": 2.673034693140136e-06, "loss": 1.1588, "step": 3031 }, { "epoch": 2.2947965941343424, "grad_norm": 2.049619197845459, "learning_rate": 2.6675751894601928e-06, "loss": 1.1, "step": 3032 }, { "epoch": 2.2955534531693473, "grad_norm": 1.9731786251068115, "learning_rate": 2.6621204127940403e-06, "loss": 1.0883, "step": 3033 }, { "epoch": 2.2963103122043518, "grad_norm": 2.0121383666992188, "learning_rate": 2.656670366657748e-06, "loss": 1.0914, "step": 3034 }, { "epoch": 2.2970671712393567, "grad_norm": 2.0904853343963623, "learning_rate": 2.651225054564334e-06, "loss": 1.0803, "step": 3035 }, { "epoch": 2.2978240302743616, "grad_norm": 2.0923306941986084, "learning_rate": 2.645784480023764e-06, "loss": 1.0528, "step": 3036 }, { "epoch": 2.298580889309366, "grad_norm": 2.1617391109466553, "learning_rate": 2.6403486465429524e-06, "loss": 1.1454, "step": 3037 }, { "epoch": 2.299337748344371, "grad_norm": 1.9403904676437378, "learning_rate": 2.634917557625747e-06, "loss": 1.1051, "step": 3038 }, { "epoch": 2.3000946073793758, "grad_norm": 2.3704395294189453, "learning_rate": 2.629491216772951e-06, "loss": 1.0736, "step": 3039 }, { "epoch": 2.30085146641438, "grad_norm": 2.0632617473602295, "learning_rate": 2.6240696274822976e-06, "loss": 1.0948, "step": 3040 }, { "epoch": 2.301608325449385, "grad_norm": 1.9306870698928833, "learning_rate": 2.6186527932484595e-06, "loss": 1.0978, "step": 3041 }, { "epoch": 2.3023651844843895, "grad_norm": 2.1989099979400635, "learning_rate": 2.61324071756305e-06, "loss": 1.0513, "step": 3042 }, { "epoch": 2.3031220435193944, "grad_norm": 2.053589105606079, "learning_rate": 2.60783340391461e-06, "loss": 1.0828, "step": 3043 }, { "epoch": 2.3038789025543993, "grad_norm": 2.009385347366333, "learning_rate": 2.602430855788607e-06, "loss": 1.0859, "step": 3044 }, { "epoch": 2.304635761589404, "grad_norm": 2.086993932723999, "learning_rate": 2.597033076667443e-06, "loss": 1.1311, "step": 3045 }, { "epoch": 2.3053926206244086, "grad_norm": 2.1662371158599854, "learning_rate": 2.5916400700304476e-06, "loss": 1.0746, "step": 3046 }, { "epoch": 2.3061494796594135, "grad_norm": 1.9955482482910156, "learning_rate": 2.5862518393538662e-06, "loss": 1.0841, "step": 3047 }, { "epoch": 2.306906338694418, "grad_norm": 2.2083284854888916, "learning_rate": 2.5808683881108743e-06, "loss": 1.0738, "step": 3048 }, { "epoch": 2.307663197729423, "grad_norm": 2.1207940578460693, "learning_rate": 2.5754897197715566e-06, "loss": 1.1198, "step": 3049 }, { "epoch": 2.3084200567644277, "grad_norm": 2.125546455383301, "learning_rate": 2.5701158378029245e-06, "loss": 1.0487, "step": 3050 }, { "epoch": 2.309176915799432, "grad_norm": 2.025674343109131, "learning_rate": 2.564746745668899e-06, "loss": 1.0782, "step": 3051 }, { "epoch": 2.309933774834437, "grad_norm": 2.1108992099761963, "learning_rate": 2.559382446830318e-06, "loss": 1.1041, "step": 3052 }, { "epoch": 2.310690633869442, "grad_norm": 2.149214267730713, "learning_rate": 2.554022944744925e-06, "loss": 1.0332, "step": 3053 }, { "epoch": 2.3114474929044464, "grad_norm": 2.708857536315918, "learning_rate": 2.5486682428673753e-06, "loss": 1.0605, "step": 3054 }, { "epoch": 2.3122043519394513, "grad_norm": 2.029184341430664, "learning_rate": 2.5433183446492214e-06, "loss": 1.1014, "step": 3055 }, { "epoch": 2.312961210974456, "grad_norm": 1.9624137878417969, "learning_rate": 2.537973253538931e-06, "loss": 1.0924, "step": 3056 }, { "epoch": 2.3137180700094606, "grad_norm": 2.015695571899414, "learning_rate": 2.5326329729818673e-06, "loss": 1.0847, "step": 3057 }, { "epoch": 2.3144749290444655, "grad_norm": 2.0661072731018066, "learning_rate": 2.5272975064202943e-06, "loss": 1.0434, "step": 3058 }, { "epoch": 2.3152317880794704, "grad_norm": 2.1128456592559814, "learning_rate": 2.521966857293378e-06, "loss": 1.0661, "step": 3059 }, { "epoch": 2.315988647114475, "grad_norm": 2.1260507106781006, "learning_rate": 2.5166410290371626e-06, "loss": 1.084, "step": 3060 }, { "epoch": 2.3167455061494797, "grad_norm": 2.091785430908203, "learning_rate": 2.5113200250845996e-06, "loss": 1.0772, "step": 3061 }, { "epoch": 2.3175023651844846, "grad_norm": 2.1781039237976074, "learning_rate": 2.5060038488655302e-06, "loss": 1.0469, "step": 3062 }, { "epoch": 2.318259224219489, "grad_norm": 2.150576114654541, "learning_rate": 2.500692503806678e-06, "loss": 1.1243, "step": 3063 }, { "epoch": 2.319016083254494, "grad_norm": 2.021026372909546, "learning_rate": 2.4953859933316555e-06, "loss": 1.0894, "step": 3064 }, { "epoch": 2.3197729422894984, "grad_norm": 2.0633111000061035, "learning_rate": 2.490084320860961e-06, "loss": 1.07, "step": 3065 }, { "epoch": 2.3205298013245033, "grad_norm": 2.0326859951019287, "learning_rate": 2.4847874898119706e-06, "loss": 1.1148, "step": 3066 }, { "epoch": 2.321286660359508, "grad_norm": 2.0705957412719727, "learning_rate": 2.479495503598935e-06, "loss": 1.056, "step": 3067 }, { "epoch": 2.3220435193945126, "grad_norm": 2.0693790912628174, "learning_rate": 2.474208365632993e-06, "loss": 1.0678, "step": 3068 }, { "epoch": 2.3228003784295175, "grad_norm": 2.152256965637207, "learning_rate": 2.468926079322153e-06, "loss": 1.0826, "step": 3069 }, { "epoch": 2.3235572374645224, "grad_norm": 1.9246243238449097, "learning_rate": 2.463648648071298e-06, "loss": 1.0872, "step": 3070 }, { "epoch": 2.324314096499527, "grad_norm": 2.0952563285827637, "learning_rate": 2.4583760752821752e-06, "loss": 1.0851, "step": 3071 }, { "epoch": 2.3250709555345317, "grad_norm": 2.1515979766845703, "learning_rate": 2.453108364353406e-06, "loss": 1.0494, "step": 3072 }, { "epoch": 2.3258278145695366, "grad_norm": 2.0528197288513184, "learning_rate": 2.447845518680481e-06, "loss": 1.0619, "step": 3073 }, { "epoch": 2.326584673604541, "grad_norm": 2.0087759494781494, "learning_rate": 2.4425875416557426e-06, "loss": 1.0229, "step": 3074 }, { "epoch": 2.327341532639546, "grad_norm": 2.0219340324401855, "learning_rate": 2.437334436668407e-06, "loss": 1.0621, "step": 3075 }, { "epoch": 2.328098391674551, "grad_norm": 2.0388236045837402, "learning_rate": 2.432086207104549e-06, "loss": 1.111, "step": 3076 }, { "epoch": 2.3288552507095552, "grad_norm": 2.142197370529175, "learning_rate": 2.426842856347089e-06, "loss": 1.0878, "step": 3077 }, { "epoch": 2.32961210974456, "grad_norm": 1.8765891790390015, "learning_rate": 2.4216043877758163e-06, "loss": 1.1534, "step": 3078 }, { "epoch": 2.330368968779565, "grad_norm": 2.0405936241149902, "learning_rate": 2.416370804767367e-06, "loss": 1.0497, "step": 3079 }, { "epoch": 2.3311258278145695, "grad_norm": 1.989956259727478, "learning_rate": 2.4111421106952317e-06, "loss": 1.0716, "step": 3080 }, { "epoch": 2.3318826868495743, "grad_norm": 2.097310781478882, "learning_rate": 2.4059183089297432e-06, "loss": 1.0932, "step": 3081 }, { "epoch": 2.332639545884579, "grad_norm": 1.9613529443740845, "learning_rate": 2.4006994028380835e-06, "loss": 1.0853, "step": 3082 }, { "epoch": 2.3333964049195837, "grad_norm": 2.1626081466674805, "learning_rate": 2.3954853957842816e-06, "loss": 1.1113, "step": 3083 }, { "epoch": 2.3341532639545886, "grad_norm": 1.9847509860992432, "learning_rate": 2.3902762911292063e-06, "loss": 1.0632, "step": 3084 }, { "epoch": 2.334910122989593, "grad_norm": 2.432072639465332, "learning_rate": 2.385072092230568e-06, "loss": 1.0981, "step": 3085 }, { "epoch": 2.335666982024598, "grad_norm": 2.0696966648101807, "learning_rate": 2.3798728024429136e-06, "loss": 1.053, "step": 3086 }, { "epoch": 2.3364238410596028, "grad_norm": 2.017995595932007, "learning_rate": 2.374678425117631e-06, "loss": 1.1, "step": 3087 }, { "epoch": 2.337180700094607, "grad_norm": 2.056910753250122, "learning_rate": 2.369488963602927e-06, "loss": 1.1009, "step": 3088 }, { "epoch": 2.337937559129612, "grad_norm": 2.1294288635253906, "learning_rate": 2.3643044212438547e-06, "loss": 1.1008, "step": 3089 }, { "epoch": 2.338694418164617, "grad_norm": 1.9634032249450684, "learning_rate": 2.3591248013822885e-06, "loss": 1.0868, "step": 3090 }, { "epoch": 2.3394512771996214, "grad_norm": 2.2118031978607178, "learning_rate": 2.3539501073569357e-06, "loss": 1.1081, "step": 3091 }, { "epoch": 2.3402081362346263, "grad_norm": 2.3364391326904297, "learning_rate": 2.348780342503326e-06, "loss": 1.0373, "step": 3092 }, { "epoch": 2.340964995269631, "grad_norm": 2.061373472213745, "learning_rate": 2.343615510153806e-06, "loss": 1.0809, "step": 3093 }, { "epoch": 2.3417218543046356, "grad_norm": 1.9650219678878784, "learning_rate": 2.338455613637553e-06, "loss": 1.0659, "step": 3094 }, { "epoch": 2.3424787133396405, "grad_norm": 2.1445631980895996, "learning_rate": 2.333300656280552e-06, "loss": 1.0708, "step": 3095 }, { "epoch": 2.3432355723746454, "grad_norm": 2.209373950958252, "learning_rate": 2.328150641405614e-06, "loss": 1.0744, "step": 3096 }, { "epoch": 2.34399243140965, "grad_norm": 2.036855936050415, "learning_rate": 2.3230055723323587e-06, "loss": 1.0878, "step": 3097 }, { "epoch": 2.3447492904446547, "grad_norm": 2.2812464237213135, "learning_rate": 2.317865452377222e-06, "loss": 1.0321, "step": 3098 }, { "epoch": 2.345506149479659, "grad_norm": 1.9373234510421753, "learning_rate": 2.312730284853442e-06, "loss": 1.1768, "step": 3099 }, { "epoch": 2.346263008514664, "grad_norm": 1.9641289710998535, "learning_rate": 2.3076000730710715e-06, "loss": 1.0922, "step": 3100 }, { "epoch": 2.347019867549669, "grad_norm": 2.2891197204589844, "learning_rate": 2.3024748203369697e-06, "loss": 1.0122, "step": 3101 }, { "epoch": 2.347776726584674, "grad_norm": 2.042477607727051, "learning_rate": 2.2973545299547907e-06, "loss": 1.0209, "step": 3102 }, { "epoch": 2.3485335856196783, "grad_norm": 2.082688331604004, "learning_rate": 2.2922392052250012e-06, "loss": 1.0879, "step": 3103 }, { "epoch": 2.349290444654683, "grad_norm": 2.036217451095581, "learning_rate": 2.287128849444857e-06, "loss": 1.0736, "step": 3104 }, { "epoch": 2.3500473036896876, "grad_norm": 2.099870443344116, "learning_rate": 2.282023465908417e-06, "loss": 1.0502, "step": 3105 }, { "epoch": 2.3508041627246925, "grad_norm": 2.0198404788970947, "learning_rate": 2.276923057906534e-06, "loss": 1.1233, "step": 3106 }, { "epoch": 2.3515610217596974, "grad_norm": 2.331169843673706, "learning_rate": 2.271827628726853e-06, "loss": 1.0535, "step": 3107 }, { "epoch": 2.352317880794702, "grad_norm": 2.124520778656006, "learning_rate": 2.2667371816538124e-06, "loss": 1.0393, "step": 3108 }, { "epoch": 2.3530747398297067, "grad_norm": 1.9916775226593018, "learning_rate": 2.261651719968635e-06, "loss": 1.0811, "step": 3109 }, { "epoch": 2.3538315988647116, "grad_norm": 2.134824514389038, "learning_rate": 2.2565712469493285e-06, "loss": 1.0755, "step": 3110 }, { "epoch": 2.354588457899716, "grad_norm": 2.083906412124634, "learning_rate": 2.251495765870691e-06, "loss": 1.0626, "step": 3111 }, { "epoch": 2.355345316934721, "grad_norm": 2.06776762008667, "learning_rate": 2.246425280004301e-06, "loss": 1.0667, "step": 3112 }, { "epoch": 2.356102175969726, "grad_norm": 2.042806386947632, "learning_rate": 2.241359792618514e-06, "loss": 1.0589, "step": 3113 }, { "epoch": 2.3568590350047303, "grad_norm": 2.0756242275238037, "learning_rate": 2.2362993069784754e-06, "loss": 1.0757, "step": 3114 }, { "epoch": 2.357615894039735, "grad_norm": 2.315819501876831, "learning_rate": 2.231243826346082e-06, "loss": 1.1302, "step": 3115 }, { "epoch": 2.3583727530747396, "grad_norm": 2.038795232772827, "learning_rate": 2.2261933539800276e-06, "loss": 0.9891, "step": 3116 }, { "epoch": 2.3591296121097445, "grad_norm": 1.8599226474761963, "learning_rate": 2.2211478931357686e-06, "loss": 1.0786, "step": 3117 }, { "epoch": 2.3598864711447494, "grad_norm": 2.074420928955078, "learning_rate": 2.2161074470655327e-06, "loss": 1.0555, "step": 3118 }, { "epoch": 2.3606433301797543, "grad_norm": 2.0066890716552734, "learning_rate": 2.2110720190183143e-06, "loss": 1.077, "step": 3119 }, { "epoch": 2.3614001892147587, "grad_norm": 2.137488603591919, "learning_rate": 2.2060416122398754e-06, "loss": 1.0421, "step": 3120 }, { "epoch": 2.3621570482497636, "grad_norm": 2.0553901195526123, "learning_rate": 2.2010162299727382e-06, "loss": 1.0978, "step": 3121 }, { "epoch": 2.362913907284768, "grad_norm": 1.9921813011169434, "learning_rate": 2.1959958754561846e-06, "loss": 1.0452, "step": 3122 }, { "epoch": 2.363670766319773, "grad_norm": 2.0219523906707764, "learning_rate": 2.1909805519262607e-06, "loss": 1.0474, "step": 3123 }, { "epoch": 2.364427625354778, "grad_norm": 1.9772000312805176, "learning_rate": 2.185970262615767e-06, "loss": 1.0281, "step": 3124 }, { "epoch": 2.3651844843897822, "grad_norm": 2.09308123588562, "learning_rate": 2.1809650107542632e-06, "loss": 1.082, "step": 3125 }, { "epoch": 2.365941343424787, "grad_norm": 1.9955263137817383, "learning_rate": 2.175964799568052e-06, "loss": 1.0324, "step": 3126 }, { "epoch": 2.366698202459792, "grad_norm": 2.1914641857147217, "learning_rate": 2.1709696322801972e-06, "loss": 1.093, "step": 3127 }, { "epoch": 2.3674550614947965, "grad_norm": 1.9175313711166382, "learning_rate": 2.1659795121105097e-06, "loss": 1.0671, "step": 3128 }, { "epoch": 2.3682119205298013, "grad_norm": 2.153555154800415, "learning_rate": 2.16099444227554e-06, "loss": 1.0857, "step": 3129 }, { "epoch": 2.3689687795648062, "grad_norm": 2.1872153282165527, "learning_rate": 2.1560144259885886e-06, "loss": 1.0465, "step": 3130 }, { "epoch": 2.3697256385998107, "grad_norm": 2.0605931282043457, "learning_rate": 2.151039466459703e-06, "loss": 1.0579, "step": 3131 }, { "epoch": 2.3704824976348156, "grad_norm": 2.1121630668640137, "learning_rate": 2.1460695668956603e-06, "loss": 1.0797, "step": 3132 }, { "epoch": 2.37123935666982, "grad_norm": 1.927918553352356, "learning_rate": 2.1411047304999855e-06, "loss": 1.065, "step": 3133 }, { "epoch": 2.371996215704825, "grad_norm": 2.0213940143585205, "learning_rate": 2.1361449604729334e-06, "loss": 1.0243, "step": 3134 }, { "epoch": 2.3727530747398298, "grad_norm": 2.0634241104125977, "learning_rate": 2.1311902600115026e-06, "loss": 1.0243, "step": 3135 }, { "epoch": 2.3735099337748347, "grad_norm": 2.063898801803589, "learning_rate": 2.126240632309412e-06, "loss": 1.0804, "step": 3136 }, { "epoch": 2.374266792809839, "grad_norm": 1.9965025186538696, "learning_rate": 2.1212960805571153e-06, "loss": 1.0819, "step": 3137 }, { "epoch": 2.375023651844844, "grad_norm": 2.0790200233459473, "learning_rate": 2.1163566079417965e-06, "loss": 1.0549, "step": 3138 }, { "epoch": 2.3757805108798484, "grad_norm": 2.2082910537719727, "learning_rate": 2.1114222176473647e-06, "loss": 1.041, "step": 3139 }, { "epoch": 2.3765373699148533, "grad_norm": 2.1791975498199463, "learning_rate": 2.1064929128544527e-06, "loss": 1.0671, "step": 3140 }, { "epoch": 2.377294228949858, "grad_norm": 2.011662006378174, "learning_rate": 2.1015686967404155e-06, "loss": 1.0451, "step": 3141 }, { "epoch": 2.3780510879848626, "grad_norm": 2.092410087585449, "learning_rate": 2.0966495724793328e-06, "loss": 0.999, "step": 3142 }, { "epoch": 2.3788079470198675, "grad_norm": 2.0836849212646484, "learning_rate": 2.0917355432419856e-06, "loss": 1.0693, "step": 3143 }, { "epoch": 2.3795648060548724, "grad_norm": 2.1676831245422363, "learning_rate": 2.0868266121958895e-06, "loss": 1.0786, "step": 3144 }, { "epoch": 2.380321665089877, "grad_norm": 1.94955313205719, "learning_rate": 2.0819227825052655e-06, "loss": 1.04, "step": 3145 }, { "epoch": 2.3810785241248817, "grad_norm": 2.1713364124298096, "learning_rate": 2.0770240573310464e-06, "loss": 1.0997, "step": 3146 }, { "epoch": 2.3818353831598866, "grad_norm": 1.9667205810546875, "learning_rate": 2.07213043983088e-06, "loss": 1.0526, "step": 3147 }, { "epoch": 2.382592242194891, "grad_norm": 2.1327104568481445, "learning_rate": 2.067241933159111e-06, "loss": 1.0717, "step": 3148 }, { "epoch": 2.383349101229896, "grad_norm": 2.0339972972869873, "learning_rate": 2.0623585404668027e-06, "loss": 1.0583, "step": 3149 }, { "epoch": 2.384105960264901, "grad_norm": 2.172558069229126, "learning_rate": 2.0574802649017087e-06, "loss": 1.0814, "step": 3150 }, { "epoch": 2.3848628192999053, "grad_norm": 2.1538596153259277, "learning_rate": 2.0526071096082958e-06, "loss": 1.0713, "step": 3151 }, { "epoch": 2.38561967833491, "grad_norm": 2.009945869445801, "learning_rate": 2.0477390777277238e-06, "loss": 1.0783, "step": 3152 }, { "epoch": 2.386376537369915, "grad_norm": 2.024836540222168, "learning_rate": 2.042876172397855e-06, "loss": 1.0648, "step": 3153 }, { "epoch": 2.3871333964049195, "grad_norm": 1.9101126194000244, "learning_rate": 2.0380183967532398e-06, "loss": 1.0476, "step": 3154 }, { "epoch": 2.3878902554399244, "grad_norm": 1.9727280139923096, "learning_rate": 2.033165753925127e-06, "loss": 1.0658, "step": 3155 }, { "epoch": 2.388647114474929, "grad_norm": 2.0228683948516846, "learning_rate": 2.0283182470414605e-06, "loss": 1.0536, "step": 3156 }, { "epoch": 2.3894039735099337, "grad_norm": 2.0721933841705322, "learning_rate": 2.0234758792268626e-06, "loss": 1.1227, "step": 3157 }, { "epoch": 2.3901608325449386, "grad_norm": 2.15034818649292, "learning_rate": 2.0186386536026563e-06, "loss": 1.1274, "step": 3158 }, { "epoch": 2.390917691579943, "grad_norm": 2.1228785514831543, "learning_rate": 2.0138065732868377e-06, "loss": 1.1003, "step": 3159 }, { "epoch": 2.391674550614948, "grad_norm": 1.9539825916290283, "learning_rate": 2.008979641394094e-06, "loss": 1.0618, "step": 3160 }, { "epoch": 2.392431409649953, "grad_norm": 2.054403066635132, "learning_rate": 2.0041578610357924e-06, "loss": 1.0761, "step": 3161 }, { "epoch": 2.3931882686849573, "grad_norm": 2.2176952362060547, "learning_rate": 1.9993412353199797e-06, "loss": 1.0955, "step": 3162 }, { "epoch": 2.393945127719962, "grad_norm": 1.9848977327346802, "learning_rate": 1.9945297673513813e-06, "loss": 1.0478, "step": 3163 }, { "epoch": 2.394701986754967, "grad_norm": 2.0067944526672363, "learning_rate": 1.9897234602313935e-06, "loss": 1.0606, "step": 3164 }, { "epoch": 2.3954588457899715, "grad_norm": 2.1978671550750732, "learning_rate": 1.9849223170580863e-06, "loss": 1.0673, "step": 3165 }, { "epoch": 2.3962157048249764, "grad_norm": 2.103545665740967, "learning_rate": 1.9801263409262044e-06, "loss": 1.0697, "step": 3166 }, { "epoch": 2.3969725638599813, "grad_norm": 2.0121638774871826, "learning_rate": 1.975335534927164e-06, "loss": 1.1051, "step": 3167 }, { "epoch": 2.3977294228949857, "grad_norm": 2.227635383605957, "learning_rate": 1.970549902149043e-06, "loss": 1.1104, "step": 3168 }, { "epoch": 2.3984862819299906, "grad_norm": 1.98334538936615, "learning_rate": 1.965769445676593e-06, "loss": 1.0538, "step": 3169 }, { "epoch": 2.3992431409649955, "grad_norm": 2.092841863632202, "learning_rate": 1.9609941685912137e-06, "loss": 1.076, "step": 3170 }, { "epoch": 2.4, "grad_norm": 1.9714951515197754, "learning_rate": 1.9562240739709797e-06, "loss": 1.0408, "step": 3171 }, { "epoch": 2.400756859035005, "grad_norm": 2.228931427001953, "learning_rate": 1.951459164890623e-06, "loss": 1.07, "step": 3172 }, { "epoch": 2.4015137180700092, "grad_norm": 2.2674131393432617, "learning_rate": 1.9466994444215306e-06, "loss": 1.0301, "step": 3173 }, { "epoch": 2.402270577105014, "grad_norm": 2.017943859100342, "learning_rate": 1.941944915631745e-06, "loss": 1.0159, "step": 3174 }, { "epoch": 2.403027436140019, "grad_norm": 2.2940430641174316, "learning_rate": 1.937195581585966e-06, "loss": 1.0656, "step": 3175 }, { "epoch": 2.403784295175024, "grad_norm": 2.035090684890747, "learning_rate": 1.9324514453455404e-06, "loss": 1.0572, "step": 3176 }, { "epoch": 2.4045411542100283, "grad_norm": 2.0770013332366943, "learning_rate": 1.927712509968461e-06, "loss": 1.0763, "step": 3177 }, { "epoch": 2.4052980132450332, "grad_norm": 2.1525466442108154, "learning_rate": 1.9229787785093784e-06, "loss": 1.069, "step": 3178 }, { "epoch": 2.4060548722800377, "grad_norm": 1.9798002243041992, "learning_rate": 1.9182502540195826e-06, "loss": 1.04, "step": 3179 }, { "epoch": 2.4068117313150426, "grad_norm": 2.0167088508605957, "learning_rate": 1.9135269395470117e-06, "loss": 1.0951, "step": 3180 }, { "epoch": 2.4075685903500474, "grad_norm": 1.9377844333648682, "learning_rate": 1.908808838136235e-06, "loss": 1.0678, "step": 3181 }, { "epoch": 2.408325449385052, "grad_norm": 1.8350154161453247, "learning_rate": 1.904095952828474e-06, "loss": 1.0734, "step": 3182 }, { "epoch": 2.4090823084200568, "grad_norm": 2.102295160293579, "learning_rate": 1.8993882866615832e-06, "loss": 1.0446, "step": 3183 }, { "epoch": 2.4098391674550617, "grad_norm": 2.003739833831787, "learning_rate": 1.8946858426700479e-06, "loss": 1.1126, "step": 3184 }, { "epoch": 2.410596026490066, "grad_norm": 2.410670280456543, "learning_rate": 1.8899886238849949e-06, "loss": 1.0803, "step": 3185 }, { "epoch": 2.411352885525071, "grad_norm": 1.9924821853637695, "learning_rate": 1.8852966333341822e-06, "loss": 1.0713, "step": 3186 }, { "epoch": 2.412109744560076, "grad_norm": 2.1071012020111084, "learning_rate": 1.880609874041989e-06, "loss": 1.0772, "step": 3187 }, { "epoch": 2.4128666035950803, "grad_norm": 2.1172494888305664, "learning_rate": 1.8759283490294333e-06, "loss": 1.0584, "step": 3188 }, { "epoch": 2.413623462630085, "grad_norm": 2.2310361862182617, "learning_rate": 1.8712520613141525e-06, "loss": 1.039, "step": 3189 }, { "epoch": 2.4143803216650896, "grad_norm": 1.90047287940979, "learning_rate": 1.8665810139104157e-06, "loss": 1.0776, "step": 3190 }, { "epoch": 2.4151371807000945, "grad_norm": 2.110424041748047, "learning_rate": 1.8619152098291044e-06, "loss": 1.0546, "step": 3191 }, { "epoch": 2.4158940397350994, "grad_norm": 1.9257051944732666, "learning_rate": 1.8572546520777214e-06, "loss": 1.0843, "step": 3192 }, { "epoch": 2.4166508987701043, "grad_norm": 2.1789603233337402, "learning_rate": 1.852599343660396e-06, "loss": 1.0479, "step": 3193 }, { "epoch": 2.4174077578051087, "grad_norm": 1.8343688249588013, "learning_rate": 1.847949287577868e-06, "loss": 0.9984, "step": 3194 }, { "epoch": 2.4181646168401136, "grad_norm": 2.1044609546661377, "learning_rate": 1.843304486827492e-06, "loss": 1.0603, "step": 3195 }, { "epoch": 2.418921475875118, "grad_norm": 2.0383095741271973, "learning_rate": 1.838664944403236e-06, "loss": 1.0733, "step": 3196 }, { "epoch": 2.419678334910123, "grad_norm": 1.9661799669265747, "learning_rate": 1.8340306632956847e-06, "loss": 1.0257, "step": 3197 }, { "epoch": 2.420435193945128, "grad_norm": 2.1200876235961914, "learning_rate": 1.8294016464920133e-06, "loss": 1.0597, "step": 3198 }, { "epoch": 2.4211920529801323, "grad_norm": 2.138803005218506, "learning_rate": 1.8247778969760206e-06, "loss": 1.0518, "step": 3199 }, { "epoch": 2.421948912015137, "grad_norm": 1.937528371810913, "learning_rate": 1.8201594177281053e-06, "loss": 1.1165, "step": 3200 }, { "epoch": 2.422705771050142, "grad_norm": 1.99111008644104, "learning_rate": 1.8155462117252693e-06, "loss": 1.0687, "step": 3201 }, { "epoch": 2.4234626300851465, "grad_norm": 2.2547271251678467, "learning_rate": 1.8109382819411164e-06, "loss": 1.0613, "step": 3202 }, { "epoch": 2.4242194891201514, "grad_norm": 1.8853436708450317, "learning_rate": 1.8063356313458443e-06, "loss": 1.1003, "step": 3203 }, { "epoch": 2.4249763481551563, "grad_norm": 1.9870060682296753, "learning_rate": 1.801738262906254e-06, "loss": 1.0924, "step": 3204 }, { "epoch": 2.4257332071901607, "grad_norm": 1.9391242265701294, "learning_rate": 1.7971461795857367e-06, "loss": 1.0116, "step": 3205 }, { "epoch": 2.4264900662251656, "grad_norm": 2.092609167098999, "learning_rate": 1.7925593843442798e-06, "loss": 1.1001, "step": 3206 }, { "epoch": 2.42724692526017, "grad_norm": 2.029475212097168, "learning_rate": 1.787977880138463e-06, "loss": 1.0716, "step": 3207 }, { "epoch": 2.428003784295175, "grad_norm": 2.054161787033081, "learning_rate": 1.783401669921456e-06, "loss": 1.0752, "step": 3208 }, { "epoch": 2.42876064333018, "grad_norm": 1.762110710144043, "learning_rate": 1.7788307566430083e-06, "loss": 1.0639, "step": 3209 }, { "epoch": 2.4295175023651847, "grad_norm": 2.044447898864746, "learning_rate": 1.774265143249467e-06, "loss": 1.0905, "step": 3210 }, { "epoch": 2.430274361400189, "grad_norm": 2.024933338165283, "learning_rate": 1.7697048326837516e-06, "loss": 1.0152, "step": 3211 }, { "epoch": 2.431031220435194, "grad_norm": 1.9569860696792603, "learning_rate": 1.7651498278853708e-06, "loss": 1.0603, "step": 3212 }, { "epoch": 2.4317880794701985, "grad_norm": 2.0603296756744385, "learning_rate": 1.760600131790414e-06, "loss": 1.1086, "step": 3213 }, { "epoch": 2.4325449385052034, "grad_norm": 1.8924018144607544, "learning_rate": 1.7560557473315413e-06, "loss": 1.0509, "step": 3214 }, { "epoch": 2.4333017975402083, "grad_norm": 1.9490152597427368, "learning_rate": 1.7515166774379947e-06, "loss": 1.0518, "step": 3215 }, { "epoch": 2.4340586565752127, "grad_norm": 2.046515703201294, "learning_rate": 1.746982925035591e-06, "loss": 1.0466, "step": 3216 }, { "epoch": 2.4348155156102176, "grad_norm": 2.0436580181121826, "learning_rate": 1.7424544930467205e-06, "loss": 1.0642, "step": 3217 }, { "epoch": 2.4355723746452225, "grad_norm": 2.1087872982025146, "learning_rate": 1.7379313843903357e-06, "loss": 1.0838, "step": 3218 }, { "epoch": 2.436329233680227, "grad_norm": 1.896474838256836, "learning_rate": 1.7334136019819681e-06, "loss": 1.0678, "step": 3219 }, { "epoch": 2.437086092715232, "grad_norm": 2.087778091430664, "learning_rate": 1.7289011487337059e-06, "loss": 1.0402, "step": 3220 }, { "epoch": 2.4378429517502367, "grad_norm": 1.9922432899475098, "learning_rate": 1.7243940275542126e-06, "loss": 1.119, "step": 3221 }, { "epoch": 2.438599810785241, "grad_norm": 2.1278886795043945, "learning_rate": 1.7198922413487073e-06, "loss": 1.1303, "step": 3222 }, { "epoch": 2.439356669820246, "grad_norm": 2.059356689453125, "learning_rate": 1.7153957930189735e-06, "loss": 1.0732, "step": 3223 }, { "epoch": 2.440113528855251, "grad_norm": 2.127638578414917, "learning_rate": 1.7109046854633587e-06, "loss": 1.0715, "step": 3224 }, { "epoch": 2.4408703878902553, "grad_norm": 2.0281498432159424, "learning_rate": 1.7064189215767526e-06, "loss": 1.0378, "step": 3225 }, { "epoch": 2.4416272469252602, "grad_norm": 2.8992788791656494, "learning_rate": 1.7019385042506134e-06, "loss": 1.0982, "step": 3226 }, { "epoch": 2.442384105960265, "grad_norm": 2.046388626098633, "learning_rate": 1.697463436372951e-06, "loss": 1.0689, "step": 3227 }, { "epoch": 2.4431409649952696, "grad_norm": 1.9682432413101196, "learning_rate": 1.692993720828327e-06, "loss": 1.0886, "step": 3228 }, { "epoch": 2.4438978240302744, "grad_norm": 2.190717935562134, "learning_rate": 1.6885293604978495e-06, "loss": 1.0941, "step": 3229 }, { "epoch": 2.444654683065279, "grad_norm": 2.263735294342041, "learning_rate": 1.6840703582591808e-06, "loss": 1.1485, "step": 3230 }, { "epoch": 2.4454115421002838, "grad_norm": 1.917043924331665, "learning_rate": 1.6796167169865243e-06, "loss": 1.044, "step": 3231 }, { "epoch": 2.4461684011352887, "grad_norm": 2.120823621749878, "learning_rate": 1.6751684395506248e-06, "loss": 1.0492, "step": 3232 }, { "epoch": 2.446925260170293, "grad_norm": 1.9636114835739136, "learning_rate": 1.6707255288187776e-06, "loss": 1.0525, "step": 3233 }, { "epoch": 2.447682119205298, "grad_norm": 2.068773031234741, "learning_rate": 1.6662879876548164e-06, "loss": 1.1185, "step": 3234 }, { "epoch": 2.448438978240303, "grad_norm": 1.9608315229415894, "learning_rate": 1.661855818919112e-06, "loss": 1.0705, "step": 3235 }, { "epoch": 2.4491958372753073, "grad_norm": 2.037750005722046, "learning_rate": 1.65742902546857e-06, "loss": 1.1109, "step": 3236 }, { "epoch": 2.449952696310312, "grad_norm": 2.2666871547698975, "learning_rate": 1.653007610156637e-06, "loss": 1.0955, "step": 3237 }, { "epoch": 2.450709555345317, "grad_norm": 2.0493760108947754, "learning_rate": 1.6485915758332899e-06, "loss": 1.0354, "step": 3238 }, { "epoch": 2.4514664143803215, "grad_norm": 2.0443224906921387, "learning_rate": 1.6441809253450347e-06, "loss": 1.0605, "step": 3239 }, { "epoch": 2.4522232734153264, "grad_norm": 1.7710448503494263, "learning_rate": 1.6397756615349103e-06, "loss": 1.064, "step": 3240 }, { "epoch": 2.4529801324503313, "grad_norm": 2.0574166774749756, "learning_rate": 1.6353757872424848e-06, "loss": 1.0986, "step": 3241 }, { "epoch": 2.4537369914853357, "grad_norm": 2.0350000858306885, "learning_rate": 1.6309813053038476e-06, "loss": 1.0722, "step": 3242 }, { "epoch": 2.4544938505203406, "grad_norm": 2.178621530532837, "learning_rate": 1.6265922185516136e-06, "loss": 1.0777, "step": 3243 }, { "epoch": 2.4552507095553455, "grad_norm": 1.874701976776123, "learning_rate": 1.6222085298149237e-06, "loss": 1.0678, "step": 3244 }, { "epoch": 2.45600756859035, "grad_norm": 2.080073356628418, "learning_rate": 1.617830241919439e-06, "loss": 1.1109, "step": 3245 }, { "epoch": 2.456764427625355, "grad_norm": 2.067389488220215, "learning_rate": 1.6134573576873347e-06, "loss": 1.0613, "step": 3246 }, { "epoch": 2.4575212866603593, "grad_norm": 2.2184066772460938, "learning_rate": 1.6090898799373013e-06, "loss": 1.0445, "step": 3247 }, { "epoch": 2.458278145695364, "grad_norm": 2.1395821571350098, "learning_rate": 1.6047278114845524e-06, "loss": 1.1291, "step": 3248 }, { "epoch": 2.459035004730369, "grad_norm": 1.878059983253479, "learning_rate": 1.6003711551408108e-06, "loss": 1.1197, "step": 3249 }, { "epoch": 2.4597918637653735, "grad_norm": 2.079202651977539, "learning_rate": 1.5960199137143096e-06, "loss": 1.0601, "step": 3250 }, { "epoch": 2.4605487228003784, "grad_norm": 2.1114516258239746, "learning_rate": 1.5916740900097936e-06, "loss": 1.0981, "step": 3251 }, { "epoch": 2.4613055818353833, "grad_norm": 2.0840392112731934, "learning_rate": 1.5873336868285188e-06, "loss": 1.0901, "step": 3252 }, { "epoch": 2.4620624408703877, "grad_norm": 2.1868133544921875, "learning_rate": 1.582998706968233e-06, "loss": 1.1103, "step": 3253 }, { "epoch": 2.4628192999053926, "grad_norm": 2.0153892040252686, "learning_rate": 1.5786691532232047e-06, "loss": 1.0825, "step": 3254 }, { "epoch": 2.4635761589403975, "grad_norm": 2.147407054901123, "learning_rate": 1.5743450283841957e-06, "loss": 1.0529, "step": 3255 }, { "epoch": 2.464333017975402, "grad_norm": 2.2476887702941895, "learning_rate": 1.5700263352384732e-06, "loss": 1.0551, "step": 3256 }, { "epoch": 2.465089877010407, "grad_norm": 1.897383689880371, "learning_rate": 1.5657130765698006e-06, "loss": 1.0773, "step": 3257 }, { "epoch": 2.4658467360454117, "grad_norm": 2.2343618869781494, "learning_rate": 1.56140525515844e-06, "loss": 1.0388, "step": 3258 }, { "epoch": 2.466603595080416, "grad_norm": 2.182474136352539, "learning_rate": 1.5571028737811414e-06, "loss": 1.0837, "step": 3259 }, { "epoch": 2.467360454115421, "grad_norm": 1.94349205493927, "learning_rate": 1.5528059352111586e-06, "loss": 1.0374, "step": 3260 }, { "epoch": 2.468117313150426, "grad_norm": 2.3165524005889893, "learning_rate": 1.5485144422182325e-06, "loss": 1.049, "step": 3261 }, { "epoch": 2.4688741721854304, "grad_norm": 2.2094292640686035, "learning_rate": 1.5442283975685937e-06, "loss": 1.0934, "step": 3262 }, { "epoch": 2.4696310312204353, "grad_norm": 2.0244195461273193, "learning_rate": 1.5399478040249638e-06, "loss": 1.044, "step": 3263 }, { "epoch": 2.4703878902554397, "grad_norm": 1.9300179481506348, "learning_rate": 1.5356726643465427e-06, "loss": 1.1156, "step": 3264 }, { "epoch": 2.4711447492904446, "grad_norm": 2.06846022605896, "learning_rate": 1.5314029812890258e-06, "loss": 1.012, "step": 3265 }, { "epoch": 2.4719016083254495, "grad_norm": 2.2604005336761475, "learning_rate": 1.5271387576045804e-06, "loss": 1.1042, "step": 3266 }, { "epoch": 2.4726584673604544, "grad_norm": 2.3489127159118652, "learning_rate": 1.5228799960418639e-06, "loss": 1.05, "step": 3267 }, { "epoch": 2.473415326395459, "grad_norm": 2.0200610160827637, "learning_rate": 1.518626699346009e-06, "loss": 1.1298, "step": 3268 }, { "epoch": 2.4741721854304637, "grad_norm": 2.0748353004455566, "learning_rate": 1.514378870258623e-06, "loss": 1.0477, "step": 3269 }, { "epoch": 2.474929044465468, "grad_norm": 2.0303914546966553, "learning_rate": 1.510136511517792e-06, "loss": 1.0319, "step": 3270 }, { "epoch": 2.475685903500473, "grad_norm": 1.9617363214492798, "learning_rate": 1.5058996258580788e-06, "loss": 1.1149, "step": 3271 }, { "epoch": 2.476442762535478, "grad_norm": 2.0544240474700928, "learning_rate": 1.5016682160105153e-06, "loss": 1.0733, "step": 3272 }, { "epoch": 2.4771996215704823, "grad_norm": 2.3402349948883057, "learning_rate": 1.4974422847026002e-06, "loss": 1.0615, "step": 3273 }, { "epoch": 2.4779564806054872, "grad_norm": 2.0029454231262207, "learning_rate": 1.4932218346583082e-06, "loss": 1.0688, "step": 3274 }, { "epoch": 2.478713339640492, "grad_norm": 1.8487077951431274, "learning_rate": 1.4890068685980732e-06, "loss": 1.1071, "step": 3275 }, { "epoch": 2.4794701986754966, "grad_norm": 2.282620906829834, "learning_rate": 1.4847973892388003e-06, "loss": 1.0802, "step": 3276 }, { "epoch": 2.4802270577105014, "grad_norm": 1.9295916557312012, "learning_rate": 1.4805933992938547e-06, "loss": 1.0663, "step": 3277 }, { "epoch": 2.4809839167455063, "grad_norm": 2.0331246852874756, "learning_rate": 1.476394901473066e-06, "loss": 1.0906, "step": 3278 }, { "epoch": 2.4817407757805108, "grad_norm": 2.1244752407073975, "learning_rate": 1.4722018984827247e-06, "loss": 1.0655, "step": 3279 }, { "epoch": 2.4824976348155157, "grad_norm": 2.140397787094116, "learning_rate": 1.4680143930255675e-06, "loss": 1.0969, "step": 3280 }, { "epoch": 2.48325449385052, "grad_norm": 2.1430792808532715, "learning_rate": 1.4638323878008022e-06, "loss": 1.06, "step": 3281 }, { "epoch": 2.484011352885525, "grad_norm": 2.28765606880188, "learning_rate": 1.459655885504086e-06, "loss": 1.1159, "step": 3282 }, { "epoch": 2.48476821192053, "grad_norm": 1.9069238901138306, "learning_rate": 1.455484888827526e-06, "loss": 1.0083, "step": 3283 }, { "epoch": 2.4855250709555348, "grad_norm": 1.848893642425537, "learning_rate": 1.4513194004596865e-06, "loss": 1.0527, "step": 3284 }, { "epoch": 2.486281929990539, "grad_norm": 1.8594064712524414, "learning_rate": 1.4471594230855774e-06, "loss": 1.0815, "step": 3285 }, { "epoch": 2.487038789025544, "grad_norm": 1.9376791715621948, "learning_rate": 1.4430049593866543e-06, "loss": 1.0403, "step": 3286 }, { "epoch": 2.4877956480605485, "grad_norm": 2.031545639038086, "learning_rate": 1.4388560120408215e-06, "loss": 1.0378, "step": 3287 }, { "epoch": 2.4885525070955534, "grad_norm": 2.0290255546569824, "learning_rate": 1.4347125837224266e-06, "loss": 1.05, "step": 3288 }, { "epoch": 2.4893093661305583, "grad_norm": 2.070533037185669, "learning_rate": 1.4305746771022623e-06, "loss": 1.0854, "step": 3289 }, { "epoch": 2.4900662251655628, "grad_norm": 3.2161612510681152, "learning_rate": 1.4264422948475618e-06, "loss": 1.0484, "step": 3290 }, { "epoch": 2.4908230842005676, "grad_norm": 2.044058322906494, "learning_rate": 1.4223154396219906e-06, "loss": 1.0543, "step": 3291 }, { "epoch": 2.4915799432355725, "grad_norm": 1.9972931146621704, "learning_rate": 1.4181941140856595e-06, "loss": 1.0482, "step": 3292 }, { "epoch": 2.492336802270577, "grad_norm": 2.115438222885132, "learning_rate": 1.4140783208951142e-06, "loss": 1.1006, "step": 3293 }, { "epoch": 2.493093661305582, "grad_norm": 1.9351952075958252, "learning_rate": 1.4099680627033266e-06, "loss": 1.0229, "step": 3294 }, { "epoch": 2.4938505203405867, "grad_norm": 2.0593087673187256, "learning_rate": 1.4058633421597104e-06, "loss": 1.0653, "step": 3295 }, { "epoch": 2.494607379375591, "grad_norm": 2.1452414989471436, "learning_rate": 1.4017641619101074e-06, "loss": 1.1039, "step": 3296 }, { "epoch": 2.495364238410596, "grad_norm": 1.799978494644165, "learning_rate": 1.3976705245967832e-06, "loss": 1.0778, "step": 3297 }, { "epoch": 2.4961210974456005, "grad_norm": 1.9537346363067627, "learning_rate": 1.3935824328584335e-06, "loss": 1.0688, "step": 3298 }, { "epoch": 2.4968779564806054, "grad_norm": 2.1485798358917236, "learning_rate": 1.3894998893301829e-06, "loss": 1.0416, "step": 3299 }, { "epoch": 2.4976348155156103, "grad_norm": 2.167556047439575, "learning_rate": 1.3854228966435768e-06, "loss": 1.0693, "step": 3300 }, { "epoch": 2.498391674550615, "grad_norm": 2.1335930824279785, "learning_rate": 1.3813514574265815e-06, "loss": 1.0789, "step": 3301 }, { "epoch": 2.4991485335856196, "grad_norm": 1.9826858043670654, "learning_rate": 1.3772855743035818e-06, "loss": 1.071, "step": 3302 }, { "epoch": 2.4999053926206245, "grad_norm": 1.9714877605438232, "learning_rate": 1.3732252498953874e-06, "loss": 1.0915, "step": 3303 }, { "epoch": 2.500662251655629, "grad_norm": 1.859167218208313, "learning_rate": 1.3691704868192202e-06, "loss": 1.0345, "step": 3304 }, { "epoch": 2.501419110690634, "grad_norm": 1.9658515453338623, "learning_rate": 1.3651212876887181e-06, "loss": 1.0506, "step": 3305 }, { "epoch": 2.5021759697256387, "grad_norm": 2.1254079341888428, "learning_rate": 1.361077655113935e-06, "loss": 1.0721, "step": 3306 }, { "epoch": 2.5029328287606436, "grad_norm": 2.0176279544830322, "learning_rate": 1.3570395917013365e-06, "loss": 1.0711, "step": 3307 }, { "epoch": 2.503689687795648, "grad_norm": 2.091989278793335, "learning_rate": 1.353007100053791e-06, "loss": 1.1103, "step": 3308 }, { "epoch": 2.504446546830653, "grad_norm": 2.0587007999420166, "learning_rate": 1.348980182770584e-06, "loss": 1.0569, "step": 3309 }, { "epoch": 2.5052034058656574, "grad_norm": 1.9162755012512207, "learning_rate": 1.344958842447405e-06, "loss": 1.0914, "step": 3310 }, { "epoch": 2.5059602649006623, "grad_norm": 1.995250940322876, "learning_rate": 1.3409430816763478e-06, "loss": 1.0789, "step": 3311 }, { "epoch": 2.506717123935667, "grad_norm": 2.189922571182251, "learning_rate": 1.3369329030459152e-06, "loss": 1.1112, "step": 3312 }, { "epoch": 2.5074739829706716, "grad_norm": 2.006410837173462, "learning_rate": 1.3329283091410014e-06, "loss": 1.0911, "step": 3313 }, { "epoch": 2.5082308420056765, "grad_norm": 1.9690699577331543, "learning_rate": 1.3289293025429082e-06, "loss": 1.0642, "step": 3314 }, { "epoch": 2.508987701040681, "grad_norm": 1.9968689680099487, "learning_rate": 1.324935885829334e-06, "loss": 1.1037, "step": 3315 }, { "epoch": 2.509744560075686, "grad_norm": 2.0873682498931885, "learning_rate": 1.3209480615743746e-06, "loss": 1.0804, "step": 3316 }, { "epoch": 2.5105014191106907, "grad_norm": 2.100817918777466, "learning_rate": 1.3169658323485212e-06, "loss": 1.0313, "step": 3317 }, { "epoch": 2.5112582781456956, "grad_norm": 2.1082022190093994, "learning_rate": 1.3129892007186602e-06, "loss": 1.0815, "step": 3318 }, { "epoch": 2.5120151371807, "grad_norm": 2.102774143218994, "learning_rate": 1.3090181692480642e-06, "loss": 1.0529, "step": 3319 }, { "epoch": 2.512771996215705, "grad_norm": 1.8931456804275513, "learning_rate": 1.305052740496402e-06, "loss": 1.029, "step": 3320 }, { "epoch": 2.5135288552507093, "grad_norm": 1.9600942134857178, "learning_rate": 1.301092917019724e-06, "loss": 1.0499, "step": 3321 }, { "epoch": 2.5142857142857142, "grad_norm": 1.9435330629348755, "learning_rate": 1.2971387013704767e-06, "loss": 1.0662, "step": 3322 }, { "epoch": 2.515042573320719, "grad_norm": 2.6212613582611084, "learning_rate": 1.2931900960974872e-06, "loss": 1.0569, "step": 3323 }, { "epoch": 2.515799432355724, "grad_norm": 2.1485402584075928, "learning_rate": 1.2892471037459634e-06, "loss": 1.0798, "step": 3324 }, { "epoch": 2.5165562913907285, "grad_norm": 1.9852415323257446, "learning_rate": 1.285309726857499e-06, "loss": 1.0856, "step": 3325 }, { "epoch": 2.5173131504257333, "grad_norm": 2.0448715686798096, "learning_rate": 1.281377967970067e-06, "loss": 1.0894, "step": 3326 }, { "epoch": 2.518070009460738, "grad_norm": 1.966200590133667, "learning_rate": 1.2774518296180222e-06, "loss": 1.0583, "step": 3327 }, { "epoch": 2.5188268684957427, "grad_norm": 2.0975255966186523, "learning_rate": 1.2735313143320901e-06, "loss": 1.1087, "step": 3328 }, { "epoch": 2.5195837275307476, "grad_norm": 1.8325495719909668, "learning_rate": 1.2696164246393766e-06, "loss": 1.0857, "step": 3329 }, { "epoch": 2.520340586565752, "grad_norm": 2.3163001537323, "learning_rate": 1.265707163063358e-06, "loss": 1.0535, "step": 3330 }, { "epoch": 2.521097445600757, "grad_norm": 2.0799732208251953, "learning_rate": 1.2618035321238856e-06, "loss": 1.1036, "step": 3331 }, { "epoch": 2.5218543046357613, "grad_norm": 1.9881048202514648, "learning_rate": 1.257905534337181e-06, "loss": 1.0693, "step": 3332 }, { "epoch": 2.522611163670766, "grad_norm": 2.5260820388793945, "learning_rate": 1.2540131722158336e-06, "loss": 1.0339, "step": 3333 }, { "epoch": 2.523368022705771, "grad_norm": 2.0032739639282227, "learning_rate": 1.2501264482688052e-06, "loss": 1.0888, "step": 3334 }, { "epoch": 2.524124881740776, "grad_norm": 1.9609510898590088, "learning_rate": 1.2462453650014107e-06, "loss": 1.0654, "step": 3335 }, { "epoch": 2.5248817407757804, "grad_norm": 2.018681049346924, "learning_rate": 1.2423699249153408e-06, "loss": 1.0763, "step": 3336 }, { "epoch": 2.5256385998107853, "grad_norm": 1.9069342613220215, "learning_rate": 1.2385001305086455e-06, "loss": 1.0883, "step": 3337 }, { "epoch": 2.5263954588457898, "grad_norm": 2.1848833560943604, "learning_rate": 1.2346359842757345e-06, "loss": 1.0894, "step": 3338 }, { "epoch": 2.5271523178807946, "grad_norm": 2.210022449493408, "learning_rate": 1.230777488707379e-06, "loss": 1.0807, "step": 3339 }, { "epoch": 2.5279091769157995, "grad_norm": 2.1791112422943115, "learning_rate": 1.2269246462907065e-06, "loss": 1.0707, "step": 3340 }, { "epoch": 2.5286660359508044, "grad_norm": 1.9715594053268433, "learning_rate": 1.2230774595092005e-06, "loss": 1.0304, "step": 3341 }, { "epoch": 2.529422894985809, "grad_norm": 2.082552433013916, "learning_rate": 1.219235930842696e-06, "loss": 1.088, "step": 3342 }, { "epoch": 2.5301797540208137, "grad_norm": 2.0022144317626953, "learning_rate": 1.215400062767385e-06, "loss": 1.0153, "step": 3343 }, { "epoch": 2.530936613055818, "grad_norm": 2.0545942783355713, "learning_rate": 1.2115698577558096e-06, "loss": 1.0895, "step": 3344 }, { "epoch": 2.531693472090823, "grad_norm": 1.8354177474975586, "learning_rate": 1.207745318276865e-06, "loss": 1.082, "step": 3345 }, { "epoch": 2.532450331125828, "grad_norm": 2.023404121398926, "learning_rate": 1.203926446795787e-06, "loss": 1.0675, "step": 3346 }, { "epoch": 2.5332071901608324, "grad_norm": 1.8171032667160034, "learning_rate": 1.2001132457741615e-06, "loss": 1.0464, "step": 3347 }, { "epoch": 2.5339640491958373, "grad_norm": 1.9232815504074097, "learning_rate": 1.1963057176699249e-06, "loss": 1.0647, "step": 3348 }, { "epoch": 2.5347209082308417, "grad_norm": 1.9270484447479248, "learning_rate": 1.1925038649373456e-06, "loss": 1.1044, "step": 3349 }, { "epoch": 2.5354777672658466, "grad_norm": 2.1430654525756836, "learning_rate": 1.1887076900270418e-06, "loss": 1.0809, "step": 3350 }, { "epoch": 2.5362346263008515, "grad_norm": 2.0014898777008057, "learning_rate": 1.1849171953859737e-06, "loss": 1.0902, "step": 3351 }, { "epoch": 2.5369914853358564, "grad_norm": 2.0257580280303955, "learning_rate": 1.1811323834574302e-06, "loss": 1.0911, "step": 3352 }, { "epoch": 2.537748344370861, "grad_norm": 1.9925347566604614, "learning_rate": 1.1773532566810477e-06, "loss": 1.0666, "step": 3353 }, { "epoch": 2.5385052034058657, "grad_norm": 2.072805404663086, "learning_rate": 1.1735798174927917e-06, "loss": 1.1, "step": 3354 }, { "epoch": 2.53926206244087, "grad_norm": 2.0256335735321045, "learning_rate": 1.1698120683249663e-06, "loss": 1.0585, "step": 3355 }, { "epoch": 2.540018921475875, "grad_norm": 2.217489004135132, "learning_rate": 1.1660500116062037e-06, "loss": 1.0761, "step": 3356 }, { "epoch": 2.54077578051088, "grad_norm": 1.91505765914917, "learning_rate": 1.1622936497614644e-06, "loss": 1.0579, "step": 3357 }, { "epoch": 2.541532639545885, "grad_norm": 1.9349195957183838, "learning_rate": 1.1585429852120462e-06, "loss": 1.074, "step": 3358 }, { "epoch": 2.5422894985808893, "grad_norm": 1.915028691291809, "learning_rate": 1.1547980203755697e-06, "loss": 1.0717, "step": 3359 }, { "epoch": 2.543046357615894, "grad_norm": 2.154155731201172, "learning_rate": 1.1510587576659814e-06, "loss": 1.057, "step": 3360 }, { "epoch": 2.5438032166508986, "grad_norm": 2.6311988830566406, "learning_rate": 1.1473251994935532e-06, "loss": 1.1184, "step": 3361 }, { "epoch": 2.5445600756859035, "grad_norm": 2.038876533508301, "learning_rate": 1.1435973482648844e-06, "loss": 1.0693, "step": 3362 }, { "epoch": 2.5453169347209084, "grad_norm": 2.0545339584350586, "learning_rate": 1.1398752063828815e-06, "loss": 1.0887, "step": 3363 }, { "epoch": 2.546073793755913, "grad_norm": 1.8213523626327515, "learning_rate": 1.1361587762467873e-06, "loss": 1.0866, "step": 3364 }, { "epoch": 2.5468306527909177, "grad_norm": 2.055341958999634, "learning_rate": 1.1324480602521524e-06, "loss": 1.0923, "step": 3365 }, { "epoch": 2.5475875118259226, "grad_norm": 2.2624013423919678, "learning_rate": 1.1287430607908508e-06, "loss": 1.0532, "step": 3366 }, { "epoch": 2.548344370860927, "grad_norm": 2.7478582859039307, "learning_rate": 1.1250437802510686e-06, "loss": 1.0849, "step": 3367 }, { "epoch": 2.549101229895932, "grad_norm": 1.8595587015151978, "learning_rate": 1.1213502210173044e-06, "loss": 1.0518, "step": 3368 }, { "epoch": 2.549858088930937, "grad_norm": 2.0965359210968018, "learning_rate": 1.1176623854703688e-06, "loss": 1.0152, "step": 3369 }, { "epoch": 2.5506149479659412, "grad_norm": 2.1459243297576904, "learning_rate": 1.1139802759873852e-06, "loss": 1.1309, "step": 3370 }, { "epoch": 2.551371807000946, "grad_norm": 1.981099247932434, "learning_rate": 1.110303894941786e-06, "loss": 1.1094, "step": 3371 }, { "epoch": 2.5521286660359506, "grad_norm": 2.0165510177612305, "learning_rate": 1.10663324470331e-06, "loss": 1.0812, "step": 3372 }, { "epoch": 2.5528855250709555, "grad_norm": 1.839483380317688, "learning_rate": 1.102968327638005e-06, "loss": 1.0098, "step": 3373 }, { "epoch": 2.5536423841059603, "grad_norm": 2.0099551677703857, "learning_rate": 1.0993091461082154e-06, "loss": 1.0111, "step": 3374 }, { "epoch": 2.5543992431409652, "grad_norm": 2.1071789264678955, "learning_rate": 1.0956557024725986e-06, "loss": 1.018, "step": 3375 }, { "epoch": 2.5551561021759697, "grad_norm": 2.2769486904144287, "learning_rate": 1.0920079990861043e-06, "loss": 1.0538, "step": 3376 }, { "epoch": 2.5559129612109746, "grad_norm": 1.969045877456665, "learning_rate": 1.088366038299989e-06, "loss": 1.0186, "step": 3377 }, { "epoch": 2.556669820245979, "grad_norm": 2.0804572105407715, "learning_rate": 1.0847298224618053e-06, "loss": 1.0581, "step": 3378 }, { "epoch": 2.557426679280984, "grad_norm": 1.9232372045516968, "learning_rate": 1.081099353915403e-06, "loss": 1.0507, "step": 3379 }, { "epoch": 2.5581835383159888, "grad_norm": 1.9603554010391235, "learning_rate": 1.077474635000925e-06, "loss": 1.0878, "step": 3380 }, { "epoch": 2.558940397350993, "grad_norm": 2.1331593990325928, "learning_rate": 1.07385566805481e-06, "loss": 1.0751, "step": 3381 }, { "epoch": 2.559697256385998, "grad_norm": 2.48708438873291, "learning_rate": 1.070242455409791e-06, "loss": 1.0929, "step": 3382 }, { "epoch": 2.560454115421003, "grad_norm": 2.023110866546631, "learning_rate": 1.066634999394886e-06, "loss": 1.0477, "step": 3383 }, { "epoch": 2.5612109744560074, "grad_norm": 2.201087236404419, "learning_rate": 1.0630333023354118e-06, "loss": 1.0736, "step": 3384 }, { "epoch": 2.5619678334910123, "grad_norm": 2.0009500980377197, "learning_rate": 1.0594373665529613e-06, "loss": 1.0059, "step": 3385 }, { "epoch": 2.562724692526017, "grad_norm": 1.9756640195846558, "learning_rate": 1.0558471943654217e-06, "loss": 1.0857, "step": 3386 }, { "epoch": 2.5634815515610216, "grad_norm": 2.2636256217956543, "learning_rate": 1.0522627880869646e-06, "loss": 1.0848, "step": 3387 }, { "epoch": 2.5642384105960265, "grad_norm": 2.0621442794799805, "learning_rate": 1.0486841500280441e-06, "loss": 1.0821, "step": 3388 }, { "epoch": 2.564995269631031, "grad_norm": 2.0804009437561035, "learning_rate": 1.0451112824953961e-06, "loss": 1.072, "step": 3389 }, { "epoch": 2.565752128666036, "grad_norm": 2.1010797023773193, "learning_rate": 1.0415441877920349e-06, "loss": 1.0384, "step": 3390 }, { "epoch": 2.5665089877010407, "grad_norm": 2.0294950008392334, "learning_rate": 1.037982868217254e-06, "loss": 1.1007, "step": 3391 }, { "epoch": 2.5672658467360456, "grad_norm": 2.2194080352783203, "learning_rate": 1.0344273260666264e-06, "loss": 1.0293, "step": 3392 }, { "epoch": 2.56802270577105, "grad_norm": 2.0473580360412598, "learning_rate": 1.0308775636320018e-06, "loss": 1.0989, "step": 3393 }, { "epoch": 2.568779564806055, "grad_norm": 2.1180901527404785, "learning_rate": 1.027333583201503e-06, "loss": 1.073, "step": 3394 }, { "epoch": 2.5695364238410594, "grad_norm": 1.9611269235610962, "learning_rate": 1.0237953870595262e-06, "loss": 1.0638, "step": 3395 }, { "epoch": 2.5702932828760643, "grad_norm": 2.031759023666382, "learning_rate": 1.0202629774867378e-06, "loss": 1.044, "step": 3396 }, { "epoch": 2.571050141911069, "grad_norm": 2.155648708343506, "learning_rate": 1.016736356760073e-06, "loss": 1.0815, "step": 3397 }, { "epoch": 2.571807000946074, "grad_norm": 2.0659499168395996, "learning_rate": 1.0132155271527401e-06, "loss": 1.0977, "step": 3398 }, { "epoch": 2.5725638599810785, "grad_norm": 2.2170495986938477, "learning_rate": 1.0097004909342112e-06, "loss": 1.0449, "step": 3399 }, { "epoch": 2.5733207190160834, "grad_norm": 1.9732736349105835, "learning_rate": 1.0061912503702258e-06, "loss": 1.0475, "step": 3400 }, { "epoch": 2.574077578051088, "grad_norm": 1.9781739711761475, "learning_rate": 1.0026878077227885e-06, "loss": 1.0778, "step": 3401 }, { "epoch": 2.5748344370860927, "grad_norm": 2.0298542976379395, "learning_rate": 9.99190165250161e-07, "loss": 1.0379, "step": 3402 }, { "epoch": 2.5755912961210976, "grad_norm": 1.7894214391708374, "learning_rate": 9.95698325206874e-07, "loss": 1.069, "step": 3403 }, { "epoch": 2.576348155156102, "grad_norm": 2.022477865219116, "learning_rate": 9.922122898437122e-07, "loss": 1.0623, "step": 3404 }, { "epoch": 2.577105014191107, "grad_norm": 1.8968234062194824, "learning_rate": 9.887320614077198e-07, "loss": 1.04, "step": 3405 }, { "epoch": 2.5778618732261114, "grad_norm": 2.217832326889038, "learning_rate": 9.852576421422033e-07, "loss": 1.0943, "step": 3406 }, { "epoch": 2.5786187322611163, "grad_norm": 2.0771234035491943, "learning_rate": 9.817890342867157e-07, "loss": 1.0767, "step": 3407 }, { "epoch": 2.579375591296121, "grad_norm": 1.9438964128494263, "learning_rate": 9.783262400770708e-07, "loss": 1.1243, "step": 3408 }, { "epoch": 2.580132450331126, "grad_norm": 2.441040277481079, "learning_rate": 9.748692617453326e-07, "loss": 1.0624, "step": 3409 }, { "epoch": 2.5808893093661305, "grad_norm": 2.0702598094940186, "learning_rate": 9.714181015198182e-07, "loss": 1.0696, "step": 3410 }, { "epoch": 2.5816461684011354, "grad_norm": 2.0918853282928467, "learning_rate": 9.67972761625091e-07, "loss": 1.0814, "step": 3411 }, { "epoch": 2.58240302743614, "grad_norm": 1.9000964164733887, "learning_rate": 9.645332442819653e-07, "loss": 1.0554, "step": 3412 }, { "epoch": 2.5831598864711447, "grad_norm": 2.1118955612182617, "learning_rate": 9.610995517075005e-07, "loss": 1.0622, "step": 3413 }, { "epoch": 2.5839167455061496, "grad_norm": 2.136005163192749, "learning_rate": 9.57671686115003e-07, "loss": 1.0871, "step": 3414 }, { "epoch": 2.5846736045411545, "grad_norm": 2.0861973762512207, "learning_rate": 9.542496497140228e-07, "loss": 1.0348, "step": 3415 }, { "epoch": 2.585430463576159, "grad_norm": 1.9754106998443604, "learning_rate": 9.50833444710354e-07, "loss": 1.0797, "step": 3416 }, { "epoch": 2.586187322611164, "grad_norm": 2.138561964035034, "learning_rate": 9.474230733060293e-07, "loss": 1.1018, "step": 3417 }, { "epoch": 2.5869441816461682, "grad_norm": 2.1578221321105957, "learning_rate": 9.440185376993193e-07, "loss": 1.1082, "step": 3418 }, { "epoch": 2.587701040681173, "grad_norm": 1.928044080734253, "learning_rate": 9.406198400847376e-07, "loss": 1.0723, "step": 3419 }, { "epoch": 2.588457899716178, "grad_norm": 2.0299084186553955, "learning_rate": 9.372269826530338e-07, "loss": 1.0557, "step": 3420 }, { "epoch": 2.5892147587511825, "grad_norm": 2.100691556930542, "learning_rate": 9.338399675911917e-07, "loss": 1.0221, "step": 3421 }, { "epoch": 2.5899716177861873, "grad_norm": 2.015913724899292, "learning_rate": 9.304587970824288e-07, "loss": 1.0651, "step": 3422 }, { "epoch": 2.590728476821192, "grad_norm": 1.8734519481658936, "learning_rate": 9.270834733061999e-07, "loss": 1.0554, "step": 3423 }, { "epoch": 2.5914853358561967, "grad_norm": 1.9088720083236694, "learning_rate": 9.237139984381806e-07, "loss": 1.0847, "step": 3424 }, { "epoch": 2.5922421948912016, "grad_norm": 2.083169460296631, "learning_rate": 9.203503746502859e-07, "loss": 1.0714, "step": 3425 }, { "epoch": 2.5929990539262064, "grad_norm": 1.9658424854278564, "learning_rate": 9.169926041106579e-07, "loss": 1.0231, "step": 3426 }, { "epoch": 2.593755912961211, "grad_norm": 1.955154299736023, "learning_rate": 9.13640688983662e-07, "loss": 1.0539, "step": 3427 }, { "epoch": 2.5945127719962158, "grad_norm": 2.0878820419311523, "learning_rate": 9.102946314298959e-07, "loss": 1.0911, "step": 3428 }, { "epoch": 2.59526963103122, "grad_norm": 1.9909857511520386, "learning_rate": 9.069544336061716e-07, "loss": 1.083, "step": 3429 }, { "epoch": 2.596026490066225, "grad_norm": 2.0630910396575928, "learning_rate": 9.036200976655337e-07, "loss": 1.0977, "step": 3430 }, { "epoch": 2.59678334910123, "grad_norm": 1.982391595840454, "learning_rate": 9.002916257572411e-07, "loss": 1.0271, "step": 3431 }, { "epoch": 2.597540208136235, "grad_norm": 1.9987069368362427, "learning_rate": 8.969690200267786e-07, "loss": 1.0813, "step": 3432 }, { "epoch": 2.5982970671712393, "grad_norm": 1.983818531036377, "learning_rate": 8.936522826158452e-07, "loss": 1.0776, "step": 3433 }, { "epoch": 2.599053926206244, "grad_norm": 1.9349209070205688, "learning_rate": 8.903414156623622e-07, "loss": 1.0509, "step": 3434 }, { "epoch": 2.5998107852412486, "grad_norm": 2.01790714263916, "learning_rate": 8.870364213004612e-07, "loss": 1.0949, "step": 3435 }, { "epoch": 2.6005676442762535, "grad_norm": 2.0256693363189697, "learning_rate": 8.837373016604916e-07, "loss": 1.0572, "step": 3436 }, { "epoch": 2.6013245033112584, "grad_norm": 1.989288568496704, "learning_rate": 8.804440588690183e-07, "loss": 1.0321, "step": 3437 }, { "epoch": 2.602081362346263, "grad_norm": 2.1254732608795166, "learning_rate": 8.771566950488107e-07, "loss": 1.0513, "step": 3438 }, { "epoch": 2.6028382213812677, "grad_norm": 2.4187963008880615, "learning_rate": 8.738752123188587e-07, "loss": 1.0755, "step": 3439 }, { "epoch": 2.6035950804162726, "grad_norm": 2.0207037925720215, "learning_rate": 8.705996127943503e-07, "loss": 1.0669, "step": 3440 }, { "epoch": 2.604351939451277, "grad_norm": 2.1482834815979004, "learning_rate": 8.6732989858669e-07, "loss": 1.0675, "step": 3441 }, { "epoch": 2.605108798486282, "grad_norm": 1.9809141159057617, "learning_rate": 8.640660718034855e-07, "loss": 1.1257, "step": 3442 }, { "epoch": 2.605865657521287, "grad_norm": 2.1395535469055176, "learning_rate": 8.608081345485507e-07, "loss": 1.1311, "step": 3443 }, { "epoch": 2.6066225165562913, "grad_norm": 2.1757044792175293, "learning_rate": 8.575560889219027e-07, "loss": 1.0888, "step": 3444 }, { "epoch": 2.607379375591296, "grad_norm": 1.8578970432281494, "learning_rate": 8.543099370197591e-07, "loss": 1.0527, "step": 3445 }, { "epoch": 2.6081362346263006, "grad_norm": 1.966065526008606, "learning_rate": 8.51069680934539e-07, "loss": 1.0301, "step": 3446 }, { "epoch": 2.6088930936613055, "grad_norm": 1.9428819417953491, "learning_rate": 8.478353227548625e-07, "loss": 1.0255, "step": 3447 }, { "epoch": 2.6096499526963104, "grad_norm": 1.873252272605896, "learning_rate": 8.446068645655477e-07, "loss": 1.092, "step": 3448 }, { "epoch": 2.6104068117313153, "grad_norm": 2.0765583515167236, "learning_rate": 8.413843084476109e-07, "loss": 1.0985, "step": 3449 }, { "epoch": 2.6111636707663197, "grad_norm": 2.0844457149505615, "learning_rate": 8.381676564782655e-07, "loss": 1.1138, "step": 3450 }, { "epoch": 2.6119205298013246, "grad_norm": 1.9434021711349487, "learning_rate": 8.349569107309078e-07, "loss": 1.059, "step": 3451 }, { "epoch": 2.612677388836329, "grad_norm": 2.0002236366271973, "learning_rate": 8.317520732751409e-07, "loss": 1.0701, "step": 3452 }, { "epoch": 2.613434247871334, "grad_norm": 2.0566999912261963, "learning_rate": 8.285531461767541e-07, "loss": 1.066, "step": 3453 }, { "epoch": 2.614191106906339, "grad_norm": 1.926048994064331, "learning_rate": 8.253601314977264e-07, "loss": 1.0811, "step": 3454 }, { "epoch": 2.6149479659413433, "grad_norm": 2.086893320083618, "learning_rate": 8.22173031296225e-07, "loss": 1.0647, "step": 3455 }, { "epoch": 2.615704824976348, "grad_norm": 2.1144938468933105, "learning_rate": 8.189918476266104e-07, "loss": 1.0737, "step": 3456 }, { "epoch": 2.616461684011353, "grad_norm": 1.8913697004318237, "learning_rate": 8.158165825394222e-07, "loss": 1.0621, "step": 3457 }, { "epoch": 2.6172185430463575, "grad_norm": 1.850129246711731, "learning_rate": 8.126472380813851e-07, "loss": 1.0475, "step": 3458 }, { "epoch": 2.6179754020813624, "grad_norm": 1.9463554620742798, "learning_rate": 8.094838162954142e-07, "loss": 1.0652, "step": 3459 }, { "epoch": 2.6187322611163673, "grad_norm": 1.9536323547363281, "learning_rate": 8.063263192206013e-07, "loss": 1.0567, "step": 3460 }, { "epoch": 2.6194891201513717, "grad_norm": 1.9918063879013062, "learning_rate": 8.031747488922231e-07, "loss": 1.0604, "step": 3461 }, { "epoch": 2.6202459791863766, "grad_norm": 2.074878215789795, "learning_rate": 8.0002910734173e-07, "loss": 1.0467, "step": 3462 }, { "epoch": 2.621002838221381, "grad_norm": 2.0185697078704834, "learning_rate": 7.968893965967558e-07, "loss": 1.1014, "step": 3463 }, { "epoch": 2.621759697256386, "grad_norm": 1.9658541679382324, "learning_rate": 7.937556186811127e-07, "loss": 1.0582, "step": 3464 }, { "epoch": 2.622516556291391, "grad_norm": 2.0424704551696777, "learning_rate": 7.906277756147835e-07, "loss": 1.0843, "step": 3465 }, { "epoch": 2.6232734153263957, "grad_norm": 2.1158058643341064, "learning_rate": 7.875058694139282e-07, "loss": 1.0359, "step": 3466 }, { "epoch": 2.6240302743614, "grad_norm": 2.1120176315307617, "learning_rate": 7.843899020908823e-07, "loss": 1.0796, "step": 3467 }, { "epoch": 2.624787133396405, "grad_norm": 1.910689353942871, "learning_rate": 7.812798756541483e-07, "loss": 1.0547, "step": 3468 }, { "epoch": 2.6255439924314095, "grad_norm": 2.1395435333251953, "learning_rate": 7.781757921084019e-07, "loss": 1.0968, "step": 3469 }, { "epoch": 2.6263008514664143, "grad_norm": 1.9301244020462036, "learning_rate": 7.750776534544889e-07, "loss": 1.0543, "step": 3470 }, { "epoch": 2.6270577105014192, "grad_norm": 1.9667290449142456, "learning_rate": 7.719854616894243e-07, "loss": 1.0575, "step": 3471 }, { "epoch": 2.627814569536424, "grad_norm": 1.8904736042022705, "learning_rate": 7.688992188063853e-07, "loss": 1.0781, "step": 3472 }, { "epoch": 2.6285714285714286, "grad_norm": 1.991716980934143, "learning_rate": 7.658189267947159e-07, "loss": 1.052, "step": 3473 }, { "epoch": 2.6293282876064334, "grad_norm": 2.0166666507720947, "learning_rate": 7.627445876399259e-07, "loss": 1.0668, "step": 3474 }, { "epoch": 2.630085146641438, "grad_norm": 2.020129919052124, "learning_rate": 7.596762033236895e-07, "loss": 1.0539, "step": 3475 }, { "epoch": 2.6308420056764428, "grad_norm": 2.1295621395111084, "learning_rate": 7.566137758238386e-07, "loss": 1.0264, "step": 3476 }, { "epoch": 2.6315988647114477, "grad_norm": 2.1414082050323486, "learning_rate": 7.53557307114367e-07, "loss": 1.0476, "step": 3477 }, { "epoch": 2.632355723746452, "grad_norm": 1.8784303665161133, "learning_rate": 7.505067991654335e-07, "loss": 1.0252, "step": 3478 }, { "epoch": 2.633112582781457, "grad_norm": 2.0736515522003174, "learning_rate": 7.474622539433398e-07, "loss": 1.0661, "step": 3479 }, { "epoch": 2.6338694418164614, "grad_norm": 1.9620708227157593, "learning_rate": 7.444236734105581e-07, "loss": 1.0835, "step": 3480 }, { "epoch": 2.6346263008514663, "grad_norm": 2.1406285762786865, "learning_rate": 7.413910595257105e-07, "loss": 1.0577, "step": 3481 }, { "epoch": 2.635383159886471, "grad_norm": 1.9883054494857788, "learning_rate": 7.383644142435741e-07, "loss": 1.0546, "step": 3482 }, { "epoch": 2.636140018921476, "grad_norm": 1.9472057819366455, "learning_rate": 7.353437395150799e-07, "loss": 1.0452, "step": 3483 }, { "epoch": 2.6368968779564805, "grad_norm": 2.0464439392089844, "learning_rate": 7.323290372873055e-07, "loss": 1.1029, "step": 3484 }, { "epoch": 2.6376537369914854, "grad_norm": 1.9990071058273315, "learning_rate": 7.293203095034839e-07, "loss": 1.1126, "step": 3485 }, { "epoch": 2.63841059602649, "grad_norm": 2.022820234298706, "learning_rate": 7.263175581029933e-07, "loss": 1.0625, "step": 3486 }, { "epoch": 2.6391674550614947, "grad_norm": 1.835789442062378, "learning_rate": 7.233207850213639e-07, "loss": 1.0732, "step": 3487 }, { "epoch": 2.6399243140964996, "grad_norm": 1.9170242547988892, "learning_rate": 7.20329992190268e-07, "loss": 1.1162, "step": 3488 }, { "epoch": 2.6406811731315045, "grad_norm": 1.9878673553466797, "learning_rate": 7.173451815375276e-07, "loss": 1.0664, "step": 3489 }, { "epoch": 2.641438032166509, "grad_norm": 2.3943591117858887, "learning_rate": 7.14366354987102e-07, "loss": 1.1248, "step": 3490 }, { "epoch": 2.642194891201514, "grad_norm": 1.926537275314331, "learning_rate": 7.113935144591011e-07, "loss": 1.0654, "step": 3491 }, { "epoch": 2.6429517502365183, "grad_norm": 2.132347583770752, "learning_rate": 7.084266618697722e-07, "loss": 1.058, "step": 3492 }, { "epoch": 2.643708609271523, "grad_norm": 2.0414459705352783, "learning_rate": 7.054657991315009e-07, "loss": 1.0699, "step": 3493 }, { "epoch": 2.644465468306528, "grad_norm": 2.3885207176208496, "learning_rate": 7.025109281528162e-07, "loss": 1.0914, "step": 3494 }, { "epoch": 2.6452223273415325, "grad_norm": 2.1995675563812256, "learning_rate": 6.995620508383816e-07, "loss": 1.0691, "step": 3495 }, { "epoch": 2.6459791863765374, "grad_norm": 1.9995527267456055, "learning_rate": 6.966191690889987e-07, "loss": 1.0715, "step": 3496 }, { "epoch": 2.646736045411542, "grad_norm": 2.06939435005188, "learning_rate": 6.936822848016048e-07, "loss": 1.0522, "step": 3497 }, { "epoch": 2.6474929044465467, "grad_norm": 2.001063346862793, "learning_rate": 6.907513998692701e-07, "loss": 1.0726, "step": 3498 }, { "epoch": 2.6482497634815516, "grad_norm": 2.1571309566497803, "learning_rate": 6.878265161812005e-07, "loss": 1.0609, "step": 3499 }, { "epoch": 2.6490066225165565, "grad_norm": 2.0034592151641846, "learning_rate": 6.849076356227285e-07, "loss": 1.0636, "step": 3500 }, { "epoch": 2.649763481551561, "grad_norm": 1.8944875001907349, "learning_rate": 6.819947600753214e-07, "loss": 1.061, "step": 3501 }, { "epoch": 2.650520340586566, "grad_norm": 1.9522629976272583, "learning_rate": 6.790878914165723e-07, "loss": 1.1064, "step": 3502 }, { "epoch": 2.6512771996215703, "grad_norm": 1.9700335264205933, "learning_rate": 6.761870315202072e-07, "loss": 1.0509, "step": 3503 }, { "epoch": 2.652034058656575, "grad_norm": 1.9173399209976196, "learning_rate": 6.732921822560753e-07, "loss": 1.0467, "step": 3504 }, { "epoch": 2.65279091769158, "grad_norm": 2.1325631141662598, "learning_rate": 6.704033454901552e-07, "loss": 1.0547, "step": 3505 }, { "epoch": 2.653547776726585, "grad_norm": 1.9540364742279053, "learning_rate": 6.67520523084541e-07, "loss": 1.1084, "step": 3506 }, { "epoch": 2.6543046357615894, "grad_norm": 2.026878595352173, "learning_rate": 6.646437168974577e-07, "loss": 1.0511, "step": 3507 }, { "epoch": 2.6550614947965943, "grad_norm": 2.238311290740967, "learning_rate": 6.617729287832535e-07, "loss": 1.1053, "step": 3508 }, { "epoch": 2.6558183538315987, "grad_norm": 2.0573630332946777, "learning_rate": 6.589081605923916e-07, "loss": 1.0377, "step": 3509 }, { "epoch": 2.6565752128666036, "grad_norm": 2.0835254192352295, "learning_rate": 6.56049414171461e-07, "loss": 1.0403, "step": 3510 }, { "epoch": 2.6573320719016085, "grad_norm": 1.888987421989441, "learning_rate": 6.531966913631649e-07, "loss": 1.0874, "step": 3511 }, { "epoch": 2.658088930936613, "grad_norm": 1.9704266786575317, "learning_rate": 6.503499940063245e-07, "loss": 1.0968, "step": 3512 }, { "epoch": 2.658845789971618, "grad_norm": 2.062167167663574, "learning_rate": 6.475093239358764e-07, "loss": 1.074, "step": 3513 }, { "epoch": 2.6596026490066222, "grad_norm": 2.041229724884033, "learning_rate": 6.446746829828747e-07, "loss": 1.0881, "step": 3514 }, { "epoch": 2.660359508041627, "grad_norm": 2.0432045459747314, "learning_rate": 6.41846072974484e-07, "loss": 1.0497, "step": 3515 }, { "epoch": 2.661116367076632, "grad_norm": 2.037137746810913, "learning_rate": 6.390234957339877e-07, "loss": 1.1183, "step": 3516 }, { "epoch": 2.661873226111637, "grad_norm": 2.0373597145080566, "learning_rate": 6.362069530807692e-07, "loss": 1.0721, "step": 3517 }, { "epoch": 2.6626300851466413, "grad_norm": 2.2422561645507812, "learning_rate": 6.333964468303339e-07, "loss": 1.1072, "step": 3518 }, { "epoch": 2.6633869441816462, "grad_norm": 2.035428285598755, "learning_rate": 6.305919787942921e-07, "loss": 1.0755, "step": 3519 }, { "epoch": 2.6641438032166507, "grad_norm": 2.0684683322906494, "learning_rate": 6.277935507803559e-07, "loss": 1.0673, "step": 3520 }, { "epoch": 2.6649006622516556, "grad_norm": 2.1325268745422363, "learning_rate": 6.25001164592354e-07, "loss": 1.0269, "step": 3521 }, { "epoch": 2.6656575212866604, "grad_norm": 1.9109140634536743, "learning_rate": 6.222148220302141e-07, "loss": 1.0445, "step": 3522 }, { "epoch": 2.6664143803216653, "grad_norm": 2.0545241832733154, "learning_rate": 6.1943452488997e-07, "loss": 1.03, "step": 3523 }, { "epoch": 2.6671712393566698, "grad_norm": 1.9249234199523926, "learning_rate": 6.166602749637587e-07, "loss": 1.0247, "step": 3524 }, { "epoch": 2.6679280983916747, "grad_norm": 2.097187042236328, "learning_rate": 6.138920740398207e-07, "loss": 1.0982, "step": 3525 }, { "epoch": 2.668684957426679, "grad_norm": 2.3547725677490234, "learning_rate": 6.111299239024957e-07, "loss": 1.0561, "step": 3526 }, { "epoch": 2.669441816461684, "grad_norm": 1.9423364400863647, "learning_rate": 6.083738263322244e-07, "loss": 1.0832, "step": 3527 }, { "epoch": 2.670198675496689, "grad_norm": 1.9852162599563599, "learning_rate": 6.056237831055416e-07, "loss": 1.0746, "step": 3528 }, { "epoch": 2.6709555345316933, "grad_norm": 2.095628261566162, "learning_rate": 6.02879795995085e-07, "loss": 1.0651, "step": 3529 }, { "epoch": 2.671712393566698, "grad_norm": 2.109067916870117, "learning_rate": 6.001418667695884e-07, "loss": 1.0847, "step": 3530 }, { "epoch": 2.672469252601703, "grad_norm": 1.9949407577514648, "learning_rate": 5.97409997193879e-07, "loss": 1.1257, "step": 3531 }, { "epoch": 2.6732261116367075, "grad_norm": 2.216343641281128, "learning_rate": 5.946841890288763e-07, "loss": 1.0922, "step": 3532 }, { "epoch": 2.6739829706717124, "grad_norm": 1.9918749332427979, "learning_rate": 5.91964444031599e-07, "loss": 1.0832, "step": 3533 }, { "epoch": 2.6747398297067173, "grad_norm": 2.0035977363586426, "learning_rate": 5.892507639551483e-07, "loss": 1.1023, "step": 3534 }, { "epoch": 2.6754966887417218, "grad_norm": 1.9594407081604004, "learning_rate": 5.86543150548722e-07, "loss": 1.0479, "step": 3535 }, { "epoch": 2.6762535477767266, "grad_norm": 1.9996135234832764, "learning_rate": 5.838416055576072e-07, "loss": 1.0837, "step": 3536 }, { "epoch": 2.677010406811731, "grad_norm": 2.032686710357666, "learning_rate": 5.811461307231798e-07, "loss": 1.0673, "step": 3537 }, { "epoch": 2.677767265846736, "grad_norm": 1.7957963943481445, "learning_rate": 5.784567277829007e-07, "loss": 1.0672, "step": 3538 }, { "epoch": 2.678524124881741, "grad_norm": 1.936874508857727, "learning_rate": 5.757733984703174e-07, "loss": 1.1329, "step": 3539 }, { "epoch": 2.6792809839167457, "grad_norm": 2.072567939758301, "learning_rate": 5.730961445150644e-07, "loss": 1.1066, "step": 3540 }, { "epoch": 2.68003784295175, "grad_norm": 1.8656892776489258, "learning_rate": 5.704249676428575e-07, "loss": 1.1158, "step": 3541 }, { "epoch": 2.680794701986755, "grad_norm": 1.9235533475875854, "learning_rate": 5.677598695754967e-07, "loss": 1.0364, "step": 3542 }, { "epoch": 2.6815515610217595, "grad_norm": 2.159919261932373, "learning_rate": 5.651008520308641e-07, "loss": 1.0813, "step": 3543 }, { "epoch": 2.6823084200567644, "grad_norm": 2.093416929244995, "learning_rate": 5.624479167229225e-07, "loss": 1.082, "step": 3544 }, { "epoch": 2.6830652790917693, "grad_norm": 1.9927400350570679, "learning_rate": 5.598010653617116e-07, "loss": 1.0486, "step": 3545 }, { "epoch": 2.6838221381267737, "grad_norm": 1.986259937286377, "learning_rate": 5.571602996533528e-07, "loss": 1.0532, "step": 3546 }, { "epoch": 2.6845789971617786, "grad_norm": 2.1325762271881104, "learning_rate": 5.54525621300045e-07, "loss": 1.1321, "step": 3547 }, { "epoch": 2.6853358561967835, "grad_norm": 1.9752742052078247, "learning_rate": 5.518970320000578e-07, "loss": 1.0752, "step": 3548 }, { "epoch": 2.686092715231788, "grad_norm": 1.9965808391571045, "learning_rate": 5.492745334477438e-07, "loss": 1.0721, "step": 3549 }, { "epoch": 2.686849574266793, "grad_norm": 2.021066427230835, "learning_rate": 5.466581273335216e-07, "loss": 1.0819, "step": 3550 }, { "epoch": 2.6876064333017977, "grad_norm": 2.0489556789398193, "learning_rate": 5.440478153438891e-07, "loss": 1.0542, "step": 3551 }, { "epoch": 2.688363292336802, "grad_norm": 2.1207127571105957, "learning_rate": 5.414435991614129e-07, "loss": 1.0577, "step": 3552 }, { "epoch": 2.689120151371807, "grad_norm": 2.099400281906128, "learning_rate": 5.388454804647312e-07, "loss": 1.0527, "step": 3553 }, { "epoch": 2.6898770104068115, "grad_norm": 2.165239095687866, "learning_rate": 5.362534609285534e-07, "loss": 1.0728, "step": 3554 }, { "epoch": 2.6906338694418164, "grad_norm": 1.964612364768982, "learning_rate": 5.336675422236547e-07, "loss": 1.1085, "step": 3555 }, { "epoch": 2.6913907284768213, "grad_norm": 2.3169875144958496, "learning_rate": 5.31087726016876e-07, "loss": 1.0873, "step": 3556 }, { "epoch": 2.692147587511826, "grad_norm": 2.005558490753174, "learning_rate": 5.285140139711306e-07, "loss": 1.0435, "step": 3557 }, { "epoch": 2.6929044465468306, "grad_norm": 1.9185731410980225, "learning_rate": 5.259464077453933e-07, "loss": 1.1144, "step": 3558 }, { "epoch": 2.6936613055818355, "grad_norm": 1.933445930480957, "learning_rate": 5.233849089947034e-07, "loss": 1.0526, "step": 3559 }, { "epoch": 2.69441816461684, "grad_norm": 2.1504805088043213, "learning_rate": 5.208295193701673e-07, "loss": 1.0822, "step": 3560 }, { "epoch": 2.695175023651845, "grad_norm": 2.1270816326141357, "learning_rate": 5.182802405189443e-07, "loss": 1.0848, "step": 3561 }, { "epoch": 2.6959318826868497, "grad_norm": 2.112243890762329, "learning_rate": 5.157370740842649e-07, "loss": 1.0501, "step": 3562 }, { "epoch": 2.6966887417218546, "grad_norm": 2.2307591438293457, "learning_rate": 5.132000217054134e-07, "loss": 1.1388, "step": 3563 }, { "epoch": 2.697445600756859, "grad_norm": 2.053459405899048, "learning_rate": 5.106690850177358e-07, "loss": 1.0846, "step": 3564 }, { "epoch": 2.698202459791864, "grad_norm": 2.0699667930603027, "learning_rate": 5.08144265652635e-07, "loss": 1.0567, "step": 3565 }, { "epoch": 2.6989593188268683, "grad_norm": 2.0828826427459717, "learning_rate": 5.056255652375729e-07, "loss": 1.0729, "step": 3566 }, { "epoch": 2.6997161778618732, "grad_norm": 1.9452773332595825, "learning_rate": 5.031129853960639e-07, "loss": 1.0788, "step": 3567 }, { "epoch": 2.700473036896878, "grad_norm": 2.0344362258911133, "learning_rate": 5.006065277476771e-07, "loss": 1.054, "step": 3568 }, { "epoch": 2.7012298959318826, "grad_norm": 1.900039792060852, "learning_rate": 4.981061939080384e-07, "loss": 1.0262, "step": 3569 }, { "epoch": 2.7019867549668874, "grad_norm": 1.9985625743865967, "learning_rate": 4.956119854888261e-07, "loss": 1.0899, "step": 3570 }, { "epoch": 2.702743614001892, "grad_norm": 2.088229179382324, "learning_rate": 4.931239040977678e-07, "loss": 1.1423, "step": 3571 }, { "epoch": 2.703500473036897, "grad_norm": 1.9471749067306519, "learning_rate": 4.90641951338641e-07, "loss": 1.0762, "step": 3572 }, { "epoch": 2.7042573320719017, "grad_norm": 1.8323947191238403, "learning_rate": 4.88166128811277e-07, "loss": 1.0531, "step": 3573 }, { "epoch": 2.7050141911069066, "grad_norm": 1.9754245281219482, "learning_rate": 4.856964381115542e-07, "loss": 1.1185, "step": 3574 }, { "epoch": 2.705771050141911, "grad_norm": 1.9460619688034058, "learning_rate": 4.83232880831394e-07, "loss": 1.1218, "step": 3575 }, { "epoch": 2.706527909176916, "grad_norm": 2.2051377296447754, "learning_rate": 4.807754585587696e-07, "loss": 1.0773, "step": 3576 }, { "epoch": 2.7072847682119203, "grad_norm": 1.9983853101730347, "learning_rate": 4.783241728776997e-07, "loss": 1.0325, "step": 3577 }, { "epoch": 2.708041627246925, "grad_norm": 1.9599753618240356, "learning_rate": 4.7587902536824234e-07, "loss": 1.0701, "step": 3578 }, { "epoch": 2.70879848628193, "grad_norm": 2.0052897930145264, "learning_rate": 4.7344001760650454e-07, "loss": 1.0672, "step": 3579 }, { "epoch": 2.709555345316935, "grad_norm": 3.11828351020813, "learning_rate": 4.710071511646324e-07, "loss": 1.0932, "step": 3580 }, { "epoch": 2.7103122043519394, "grad_norm": 2.1355981826782227, "learning_rate": 4.685804276108169e-07, "loss": 1.1196, "step": 3581 }, { "epoch": 2.7110690633869443, "grad_norm": 2.2099850177764893, "learning_rate": 4.6615984850928456e-07, "loss": 1.1028, "step": 3582 }, { "epoch": 2.7118259224219488, "grad_norm": 1.9474663734436035, "learning_rate": 4.637454154203033e-07, "loss": 1.07, "step": 3583 }, { "epoch": 2.7125827814569536, "grad_norm": 2.1069188117980957, "learning_rate": 4.613371299001815e-07, "loss": 1.0899, "step": 3584 }, { "epoch": 2.7133396404919585, "grad_norm": 2.008517265319824, "learning_rate": 4.58934993501263e-07, "loss": 1.0885, "step": 3585 }, { "epoch": 2.714096499526963, "grad_norm": 1.88406503200531, "learning_rate": 4.5653900777192763e-07, "loss": 1.0659, "step": 3586 }, { "epoch": 2.714853358561968, "grad_norm": 2.1920740604400635, "learning_rate": 4.5414917425659094e-07, "loss": 1.1038, "step": 3587 }, { "epoch": 2.7156102175969723, "grad_norm": 2.047375440597534, "learning_rate": 4.5176549449570765e-07, "loss": 1.0542, "step": 3588 }, { "epoch": 2.716367076631977, "grad_norm": 1.9768850803375244, "learning_rate": 4.4938797002575485e-07, "loss": 1.0511, "step": 3589 }, { "epoch": 2.717123935666982, "grad_norm": 2.005725145339966, "learning_rate": 4.4701660237925116e-07, "loss": 1.0859, "step": 3590 }, { "epoch": 2.717880794701987, "grad_norm": 2.0299482345581055, "learning_rate": 4.446513930847431e-07, "loss": 1.052, "step": 3591 }, { "epoch": 2.7186376537369914, "grad_norm": 2.217197895050049, "learning_rate": 4.4229234366681054e-07, "loss": 1.083, "step": 3592 }, { "epoch": 2.7193945127719963, "grad_norm": 2.225231170654297, "learning_rate": 4.399394556460618e-07, "loss": 1.1048, "step": 3593 }, { "epoch": 2.7201513718070007, "grad_norm": 2.035879135131836, "learning_rate": 4.375927305391286e-07, "loss": 1.0064, "step": 3594 }, { "epoch": 2.7209082308420056, "grad_norm": 2.046074628829956, "learning_rate": 4.352521698586783e-07, "loss": 1.0649, "step": 3595 }, { "epoch": 2.7216650898770105, "grad_norm": 2.068490743637085, "learning_rate": 4.329177751133964e-07, "loss": 1.0509, "step": 3596 }, { "epoch": 2.7224219489120154, "grad_norm": 2.191215991973877, "learning_rate": 4.305895478079998e-07, "loss": 1.0413, "step": 3597 }, { "epoch": 2.72317880794702, "grad_norm": 2.310241937637329, "learning_rate": 4.2826748944323e-07, "loss": 1.0864, "step": 3598 }, { "epoch": 2.7239356669820247, "grad_norm": 2.196274757385254, "learning_rate": 4.2595160151584996e-07, "loss": 1.0302, "step": 3599 }, { "epoch": 2.724692526017029, "grad_norm": 2.0941972732543945, "learning_rate": 4.2364188551864284e-07, "loss": 1.0968, "step": 3600 }, { "epoch": 2.725449385052034, "grad_norm": 2.1524224281311035, "learning_rate": 4.213383429404197e-07, "loss": 1.0739, "step": 3601 }, { "epoch": 2.726206244087039, "grad_norm": 2.400557518005371, "learning_rate": 4.190409752660077e-07, "loss": 1.1176, "step": 3602 }, { "epoch": 2.7269631031220434, "grad_norm": 2.0198590755462646, "learning_rate": 4.16749783976255e-07, "loss": 1.0351, "step": 3603 }, { "epoch": 2.7277199621570483, "grad_norm": 2.151195526123047, "learning_rate": 4.144647705480291e-07, "loss": 1.0867, "step": 3604 }, { "epoch": 2.7284768211920527, "grad_norm": 1.927239179611206, "learning_rate": 4.1218593645421344e-07, "loss": 1.0605, "step": 3605 }, { "epoch": 2.7292336802270576, "grad_norm": 2.175260066986084, "learning_rate": 4.099132831637103e-07, "loss": 1.0312, "step": 3606 }, { "epoch": 2.7299905392620625, "grad_norm": 2.2161762714385986, "learning_rate": 4.0764681214143794e-07, "loss": 1.0217, "step": 3607 }, { "epoch": 2.7307473982970674, "grad_norm": 2.06466007232666, "learning_rate": 4.053865248483281e-07, "loss": 1.0851, "step": 3608 }, { "epoch": 2.731504257332072, "grad_norm": 2.1965982913970947, "learning_rate": 4.031324227413297e-07, "loss": 1.0758, "step": 3609 }, { "epoch": 2.7322611163670767, "grad_norm": 2.31892728805542, "learning_rate": 4.008845072734016e-07, "loss": 1.1159, "step": 3610 }, { "epoch": 2.733017975402081, "grad_norm": 2.0228688716888428, "learning_rate": 3.986427798935131e-07, "loss": 1.0769, "step": 3611 }, { "epoch": 2.733774834437086, "grad_norm": 2.0157992839813232, "learning_rate": 3.964072420466503e-07, "loss": 1.0597, "step": 3612 }, { "epoch": 2.734531693472091, "grad_norm": 1.9818907976150513, "learning_rate": 3.9417789517380527e-07, "loss": 1.0732, "step": 3613 }, { "epoch": 2.735288552507096, "grad_norm": 2.1533520221710205, "learning_rate": 3.919547407119824e-07, "loss": 1.063, "step": 3614 }, { "epoch": 2.7360454115421002, "grad_norm": 2.073683738708496, "learning_rate": 3.897377800941943e-07, "loss": 1.0551, "step": 3615 }, { "epoch": 2.736802270577105, "grad_norm": 2.021272897720337, "learning_rate": 3.875270147494558e-07, "loss": 1.1027, "step": 3616 }, { "epoch": 2.7375591296121096, "grad_norm": 2.308957099914551, "learning_rate": 3.853224461027956e-07, "loss": 1.076, "step": 3617 }, { "epoch": 2.7383159886471145, "grad_norm": 2.239806652069092, "learning_rate": 3.8312407557524466e-07, "loss": 1.0998, "step": 3618 }, { "epoch": 2.7390728476821193, "grad_norm": 2.1331143379211426, "learning_rate": 3.8093190458383777e-07, "loss": 1.1259, "step": 3619 }, { "epoch": 2.739829706717124, "grad_norm": 2.0615665912628174, "learning_rate": 3.7874593454161647e-07, "loss": 1.0191, "step": 3620 }, { "epoch": 2.7405865657521287, "grad_norm": 1.9834305047988892, "learning_rate": 3.7656616685762473e-07, "loss": 1.0553, "step": 3621 }, { "epoch": 2.7413434247871336, "grad_norm": 2.1964480876922607, "learning_rate": 3.7439260293690597e-07, "loss": 1.0388, "step": 3622 }, { "epoch": 2.742100283822138, "grad_norm": 1.9631261825561523, "learning_rate": 3.722252441805057e-07, "loss": 1.0515, "step": 3623 }, { "epoch": 2.742857142857143, "grad_norm": 2.0425281524658203, "learning_rate": 3.7006409198547004e-07, "loss": 1.0117, "step": 3624 }, { "epoch": 2.7436140018921478, "grad_norm": 2.2030279636383057, "learning_rate": 3.6790914774484625e-07, "loss": 1.0627, "step": 3625 }, { "epoch": 2.744370860927152, "grad_norm": 2.0677294731140137, "learning_rate": 3.6576041284767873e-07, "loss": 1.0504, "step": 3626 }, { "epoch": 2.745127719962157, "grad_norm": 1.951145887374878, "learning_rate": 3.6361788867900865e-07, "loss": 1.0804, "step": 3627 }, { "epoch": 2.7458845789971615, "grad_norm": 2.083099126815796, "learning_rate": 3.614815766198731e-07, "loss": 1.0902, "step": 3628 }, { "epoch": 2.7466414380321664, "grad_norm": 2.0568675994873047, "learning_rate": 3.593514780473093e-07, "loss": 1.0659, "step": 3629 }, { "epoch": 2.7473982970671713, "grad_norm": 2.0525779724121094, "learning_rate": 3.572275943343428e-07, "loss": 1.0885, "step": 3630 }, { "epoch": 2.748155156102176, "grad_norm": 2.105832576751709, "learning_rate": 3.55109926849998e-07, "loss": 1.0754, "step": 3631 }, { "epoch": 2.7489120151371806, "grad_norm": 2.1376736164093018, "learning_rate": 3.5299847695929306e-07, "loss": 1.1257, "step": 3632 }, { "epoch": 2.7496688741721855, "grad_norm": 1.8944578170776367, "learning_rate": 3.508932460232331e-07, "loss": 1.0465, "step": 3633 }, { "epoch": 2.75042573320719, "grad_norm": 2.0414884090423584, "learning_rate": 3.4879423539882017e-07, "loss": 1.0621, "step": 3634 }, { "epoch": 2.751182592242195, "grad_norm": 2.1808700561523438, "learning_rate": 3.467014464390431e-07, "loss": 1.0513, "step": 3635 }, { "epoch": 2.7519394512771997, "grad_norm": 2.060415506362915, "learning_rate": 3.446148804928836e-07, "loss": 1.0927, "step": 3636 }, { "epoch": 2.752696310312204, "grad_norm": 1.9510079622268677, "learning_rate": 3.425345389053098e-07, "loss": 1.0684, "step": 3637 }, { "epoch": 2.753453169347209, "grad_norm": 1.9349720478057861, "learning_rate": 3.4046042301727504e-07, "loss": 1.0437, "step": 3638 }, { "epoch": 2.754210028382214, "grad_norm": 1.9200588464736938, "learning_rate": 3.383925341657259e-07, "loss": 1.0417, "step": 3639 }, { "epoch": 2.7549668874172184, "grad_norm": 1.9135462045669556, "learning_rate": 3.363308736835918e-07, "loss": 1.0593, "step": 3640 }, { "epoch": 2.7557237464522233, "grad_norm": 2.030207633972168, "learning_rate": 3.342754428997865e-07, "loss": 1.0311, "step": 3641 }, { "epoch": 2.756480605487228, "grad_norm": 2.0563161373138428, "learning_rate": 3.3222624313920995e-07, "loss": 1.1101, "step": 3642 }, { "epoch": 2.7572374645222326, "grad_norm": 2.1681125164031982, "learning_rate": 3.301832757227478e-07, "loss": 1.1007, "step": 3643 }, { "epoch": 2.7579943235572375, "grad_norm": 1.9184566736221313, "learning_rate": 3.281465419672603e-07, "loss": 1.0738, "step": 3644 }, { "epoch": 2.758751182592242, "grad_norm": 2.2246665954589844, "learning_rate": 3.261160431855982e-07, "loss": 1.0967, "step": 3645 }, { "epoch": 2.759508041627247, "grad_norm": 2.213003396987915, "learning_rate": 3.240917806865891e-07, "loss": 1.1179, "step": 3646 }, { "epoch": 2.7602649006622517, "grad_norm": 2.001859426498413, "learning_rate": 3.2207375577504196e-07, "loss": 1.0601, "step": 3647 }, { "epoch": 2.7610217596972566, "grad_norm": 1.9976732730865479, "learning_rate": 3.2006196975174716e-07, "loss": 1.0809, "step": 3648 }, { "epoch": 2.761778618732261, "grad_norm": 2.0903263092041016, "learning_rate": 3.1805642391346757e-07, "loss": 1.1196, "step": 3649 }, { "epoch": 2.762535477767266, "grad_norm": 2.2454066276550293, "learning_rate": 3.160571195529498e-07, "loss": 1.1021, "step": 3650 }, { "epoch": 2.7632923368022704, "grad_norm": 2.1280694007873535, "learning_rate": 3.1406405795891286e-07, "loss": 1.1027, "step": 3651 }, { "epoch": 2.7640491958372753, "grad_norm": 2.1310126781463623, "learning_rate": 3.1207724041605493e-07, "loss": 0.9978, "step": 3652 }, { "epoch": 2.76480605487228, "grad_norm": 2.2121293544769287, "learning_rate": 3.1009666820505004e-07, "loss": 1.0708, "step": 3653 }, { "epoch": 2.765562913907285, "grad_norm": 2.256673812866211, "learning_rate": 3.081223426025437e-07, "loss": 1.094, "step": 3654 }, { "epoch": 2.7663197729422895, "grad_norm": 2.2821056842803955, "learning_rate": 3.0615426488115385e-07, "loss": 1.0542, "step": 3655 }, { "epoch": 2.7670766319772944, "grad_norm": 2.1040828227996826, "learning_rate": 3.0419243630947764e-07, "loss": 1.0439, "step": 3656 }, { "epoch": 2.767833491012299, "grad_norm": 2.050218343734741, "learning_rate": 3.022368581520758e-07, "loss": 1.0747, "step": 3657 }, { "epoch": 2.7685903500473037, "grad_norm": 1.962795376777649, "learning_rate": 3.0028753166948504e-07, "loss": 1.1227, "step": 3658 }, { "epoch": 2.7693472090823086, "grad_norm": 2.256727933883667, "learning_rate": 2.983444581182144e-07, "loss": 1.123, "step": 3659 }, { "epoch": 2.770104068117313, "grad_norm": 2.0236082077026367, "learning_rate": 2.964076387507367e-07, "loss": 1.0793, "step": 3660 }, { "epoch": 2.770860927152318, "grad_norm": 2.2242016792297363, "learning_rate": 2.944770748154961e-07, "loss": 1.0849, "step": 3661 }, { "epoch": 2.7716177861873224, "grad_norm": 1.8702110052108765, "learning_rate": 2.9255276755690594e-07, "loss": 1.0598, "step": 3662 }, { "epoch": 2.7723746452223272, "grad_norm": 2.0809333324432373, "learning_rate": 2.9063471821534544e-07, "loss": 1.1008, "step": 3663 }, { "epoch": 2.773131504257332, "grad_norm": 2.0802369117736816, "learning_rate": 2.8872292802715856e-07, "loss": 1.0757, "step": 3664 }, { "epoch": 2.773888363292337, "grad_norm": 2.361516237258911, "learning_rate": 2.868173982246573e-07, "loss": 1.1079, "step": 3665 }, { "epoch": 2.7746452223273415, "grad_norm": 2.069173574447632, "learning_rate": 2.8491813003611614e-07, "loss": 1.0559, "step": 3666 }, { "epoch": 2.7754020813623463, "grad_norm": 1.9263156652450562, "learning_rate": 2.830251246857745e-07, "loss": 1.054, "step": 3667 }, { "epoch": 2.776158940397351, "grad_norm": 1.9880831241607666, "learning_rate": 2.811383833938352e-07, "loss": 1.0915, "step": 3668 }, { "epoch": 2.7769157994323557, "grad_norm": 1.977330207824707, "learning_rate": 2.7925790737646375e-07, "loss": 1.0582, "step": 3669 }, { "epoch": 2.7776726584673606, "grad_norm": 2.2954440116882324, "learning_rate": 2.7738369784578694e-07, "loss": 1.093, "step": 3670 }, { "epoch": 2.7784295175023654, "grad_norm": 1.9425572156906128, "learning_rate": 2.755157560098875e-07, "loss": 1.0558, "step": 3671 }, { "epoch": 2.77918637653737, "grad_norm": 1.9914302825927734, "learning_rate": 2.736540830728152e-07, "loss": 1.1168, "step": 3672 }, { "epoch": 2.7799432355723748, "grad_norm": 2.00032114982605, "learning_rate": 2.717986802345765e-07, "loss": 1.0692, "step": 3673 }, { "epoch": 2.780700094607379, "grad_norm": 2.004713535308838, "learning_rate": 2.6994954869113416e-07, "loss": 1.0587, "step": 3674 }, { "epoch": 2.781456953642384, "grad_norm": 2.0385048389434814, "learning_rate": 2.6810668963441194e-07, "loss": 1.064, "step": 3675 }, { "epoch": 2.782213812677389, "grad_norm": 1.966386079788208, "learning_rate": 2.662701042522874e-07, "loss": 1.0086, "step": 3676 }, { "epoch": 2.7829706717123934, "grad_norm": 2.2672669887542725, "learning_rate": 2.644397937285963e-07, "loss": 1.0867, "step": 3677 }, { "epoch": 2.7837275307473983, "grad_norm": 1.9181667566299438, "learning_rate": 2.626157592431278e-07, "loss": 1.0969, "step": 3678 }, { "epoch": 2.7844843897824028, "grad_norm": 2.0945870876312256, "learning_rate": 2.607980019716272e-07, "loss": 1.0493, "step": 3679 }, { "epoch": 2.7852412488174076, "grad_norm": 1.9859826564788818, "learning_rate": 2.589865230857932e-07, "loss": 1.0695, "step": 3680 }, { "epoch": 2.7859981078524125, "grad_norm": 1.9504257440567017, "learning_rate": 2.5718132375327933e-07, "loss": 1.0653, "step": 3681 }, { "epoch": 2.7867549668874174, "grad_norm": 1.9905445575714111, "learning_rate": 2.5538240513768625e-07, "loss": 1.0907, "step": 3682 }, { "epoch": 2.787511825922422, "grad_norm": 2.076355457305908, "learning_rate": 2.535897683985702e-07, "loss": 1.0448, "step": 3683 }, { "epoch": 2.7882686849574267, "grad_norm": 1.986864447593689, "learning_rate": 2.518034146914401e-07, "loss": 1.0296, "step": 3684 }, { "epoch": 2.789025543992431, "grad_norm": 1.9109041690826416, "learning_rate": 2.5002334516774865e-07, "loss": 1.0455, "step": 3685 }, { "epoch": 2.789782403027436, "grad_norm": 2.183528423309326, "learning_rate": 2.482495609749042e-07, "loss": 1.0824, "step": 3686 }, { "epoch": 2.790539262062441, "grad_norm": 2.090740203857422, "learning_rate": 2.4648206325626e-07, "loss": 1.0755, "step": 3687 }, { "epoch": 2.791296121097446, "grad_norm": 2.1614151000976562, "learning_rate": 2.447208531511184e-07, "loss": 1.0617, "step": 3688 }, { "epoch": 2.7920529801324503, "grad_norm": 1.9354277849197388, "learning_rate": 2.429659317947277e-07, "loss": 1.0702, "step": 3689 }, { "epoch": 2.792809839167455, "grad_norm": 2.077448606491089, "learning_rate": 2.412173003182842e-07, "loss": 1.0656, "step": 3690 }, { "epoch": 2.7935666982024596, "grad_norm": 1.9370477199554443, "learning_rate": 2.394749598489302e-07, "loss": 1.1324, "step": 3691 }, { "epoch": 2.7943235572374645, "grad_norm": 2.0902650356292725, "learning_rate": 2.3773891150975041e-07, "loss": 1.1173, "step": 3692 }, { "epoch": 2.7950804162724694, "grad_norm": 2.1968994140625, "learning_rate": 2.3600915641977443e-07, "loss": 1.1001, "step": 3693 }, { "epoch": 2.795837275307474, "grad_norm": 1.9441262483596802, "learning_rate": 2.342856956939765e-07, "loss": 1.0932, "step": 3694 }, { "epoch": 2.7965941343424787, "grad_norm": 2.0278730392456055, "learning_rate": 2.3256853044327348e-07, "loss": 1.1073, "step": 3695 }, { "epoch": 2.7973509933774836, "grad_norm": 1.9890429973602295, "learning_rate": 2.308576617745247e-07, "loss": 1.0403, "step": 3696 }, { "epoch": 2.798107852412488, "grad_norm": 1.9365586042404175, "learning_rate": 2.2915309079052886e-07, "loss": 1.0604, "step": 3697 }, { "epoch": 2.798864711447493, "grad_norm": 2.051670789718628, "learning_rate": 2.2745481859002917e-07, "loss": 1.0739, "step": 3698 }, { "epoch": 2.799621570482498, "grad_norm": 1.945073127746582, "learning_rate": 2.2576284626770157e-07, "loss": 1.0621, "step": 3699 }, { "epoch": 2.8003784295175023, "grad_norm": 2.0430312156677246, "learning_rate": 2.2407717491416676e-07, "loss": 1.058, "step": 3700 }, { "epoch": 2.801135288552507, "grad_norm": 2.074920177459717, "learning_rate": 2.2239780561598455e-07, "loss": 1.0765, "step": 3701 }, { "epoch": 2.8018921475875116, "grad_norm": 2.323629140853882, "learning_rate": 2.2072473945564961e-07, "loss": 1.1025, "step": 3702 }, { "epoch": 2.8026490066225165, "grad_norm": 1.8663524389266968, "learning_rate": 2.1905797751159689e-07, "loss": 1.0688, "step": 3703 }, { "epoch": 2.8034058656575214, "grad_norm": 2.141047239303589, "learning_rate": 2.1739752085819388e-07, "loss": 1.0787, "step": 3704 }, { "epoch": 2.8041627246925263, "grad_norm": 2.179725408554077, "learning_rate": 2.15743370565744e-07, "loss": 1.0848, "step": 3705 }, { "epoch": 2.8049195837275307, "grad_norm": 2.0025246143341064, "learning_rate": 2.1409552770048975e-07, "loss": 1.0256, "step": 3706 }, { "epoch": 2.8056764427625356, "grad_norm": 2.1321537494659424, "learning_rate": 2.124539933246042e-07, "loss": 1.1045, "step": 3707 }, { "epoch": 2.80643330179754, "grad_norm": 2.0465590953826904, "learning_rate": 2.108187684961972e-07, "loss": 1.1277, "step": 3708 }, { "epoch": 2.807190160832545, "grad_norm": 2.1039795875549316, "learning_rate": 2.091898542693078e-07, "loss": 1.0712, "step": 3709 }, { "epoch": 2.80794701986755, "grad_norm": 2.3281686305999756, "learning_rate": 2.0756725169391007e-07, "loss": 1.1043, "step": 3710 }, { "epoch": 2.8087038789025542, "grad_norm": 2.027113914489746, "learning_rate": 2.0595096181591037e-07, "loss": 1.1017, "step": 3711 }, { "epoch": 2.809460737937559, "grad_norm": 2.1701509952545166, "learning_rate": 2.04340985677141e-07, "loss": 1.0812, "step": 3712 }, { "epoch": 2.810217596972564, "grad_norm": 2.0328516960144043, "learning_rate": 2.0273732431537025e-07, "loss": 1.0951, "step": 3713 }, { "epoch": 2.8109744560075685, "grad_norm": 2.0888283252716064, "learning_rate": 2.0113997876429446e-07, "loss": 1.1183, "step": 3714 }, { "epoch": 2.8117313150425733, "grad_norm": 1.8546501398086548, "learning_rate": 1.9954895005353692e-07, "loss": 1.0742, "step": 3715 }, { "epoch": 2.8124881740775782, "grad_norm": 1.985583782196045, "learning_rate": 1.9796423920865021e-07, "loss": 1.1027, "step": 3716 }, { "epoch": 2.8132450331125827, "grad_norm": 2.0510141849517822, "learning_rate": 1.9638584725111498e-07, "loss": 1.0609, "step": 3717 }, { "epoch": 2.8140018921475876, "grad_norm": 2.359945058822632, "learning_rate": 1.9481377519834112e-07, "loss": 1.0421, "step": 3718 }, { "epoch": 2.814758751182592, "grad_norm": 2.107235908508301, "learning_rate": 1.9324802406365883e-07, "loss": 1.0114, "step": 3719 }, { "epoch": 2.815515610217597, "grad_norm": 1.9575122594833374, "learning_rate": 1.9168859485632866e-07, "loss": 1.0763, "step": 3720 }, { "epoch": 2.8162724692526018, "grad_norm": 2.15492582321167, "learning_rate": 1.901354885815348e-07, "loss": 1.0527, "step": 3721 }, { "epoch": 2.8170293282876067, "grad_norm": 2.049591302871704, "learning_rate": 1.8858870624038632e-07, "loss": 1.0314, "step": 3722 }, { "epoch": 2.817786187322611, "grad_norm": 2.623854875564575, "learning_rate": 1.8704824882991584e-07, "loss": 1.0589, "step": 3723 }, { "epoch": 2.818543046357616, "grad_norm": 1.8997153043746948, "learning_rate": 1.8551411734307744e-07, "loss": 1.0561, "step": 3724 }, { "epoch": 2.8192999053926204, "grad_norm": 2.2084269523620605, "learning_rate": 1.8398631276875118e-07, "loss": 1.0703, "step": 3725 }, { "epoch": 2.8200567644276253, "grad_norm": 1.8947069644927979, "learning_rate": 1.82464836091734e-07, "loss": 1.0454, "step": 3726 }, { "epoch": 2.82081362346263, "grad_norm": 1.8694313764572144, "learning_rate": 1.8094968829274663e-07, "loss": 1.0885, "step": 3727 }, { "epoch": 2.821570482497635, "grad_norm": 2.0910801887512207, "learning_rate": 1.7944087034843233e-07, "loss": 1.0546, "step": 3728 }, { "epoch": 2.8223273415326395, "grad_norm": 1.8177095651626587, "learning_rate": 1.7793838323135016e-07, "loss": 1.0405, "step": 3729 }, { "epoch": 2.8230842005676444, "grad_norm": 2.050400733947754, "learning_rate": 1.7644222790998186e-07, "loss": 1.0905, "step": 3730 }, { "epoch": 2.823841059602649, "grad_norm": 1.9098093509674072, "learning_rate": 1.7495240534872614e-07, "loss": 1.031, "step": 3731 }, { "epoch": 2.8245979186376537, "grad_norm": 2.1355783939361572, "learning_rate": 1.734689165078998e-07, "loss": 1.0944, "step": 3732 }, { "epoch": 2.8253547776726586, "grad_norm": 1.9840859174728394, "learning_rate": 1.7199176234373553e-07, "loss": 0.9905, "step": 3733 }, { "epoch": 2.826111636707663, "grad_norm": 2.0721471309661865, "learning_rate": 1.7052094380838532e-07, "loss": 1.059, "step": 3734 }, { "epoch": 2.826868495742668, "grad_norm": 2.345816135406494, "learning_rate": 1.69056461849917e-07, "loss": 1.0611, "step": 3735 }, { "epoch": 2.8276253547776724, "grad_norm": 2.2599689960479736, "learning_rate": 1.675983174123143e-07, "loss": 1.0965, "step": 3736 }, { "epoch": 2.8283822138126773, "grad_norm": 2.1439452171325684, "learning_rate": 1.6614651143547243e-07, "loss": 1.0312, "step": 3737 }, { "epoch": 2.829139072847682, "grad_norm": 2.0652458667755127, "learning_rate": 1.647010448552047e-07, "loss": 1.0469, "step": 3738 }, { "epoch": 2.829895931882687, "grad_norm": 2.0724799633026123, "learning_rate": 1.63261918603237e-07, "loss": 1.1211, "step": 3739 }, { "epoch": 2.8306527909176915, "grad_norm": 1.8345634937286377, "learning_rate": 1.618291336072078e-07, "loss": 1.0573, "step": 3740 }, { "epoch": 2.8314096499526964, "grad_norm": 1.9076229333877563, "learning_rate": 1.6040269079066806e-07, "loss": 1.0767, "step": 3741 }, { "epoch": 2.832166508987701, "grad_norm": 1.9758639335632324, "learning_rate": 1.5898259107308255e-07, "loss": 1.0678, "step": 3742 }, { "epoch": 2.8329233680227057, "grad_norm": 1.982330560684204, "learning_rate": 1.5756883536982296e-07, "loss": 1.1126, "step": 3743 }, { "epoch": 2.8336802270577106, "grad_norm": 1.9688644409179688, "learning_rate": 1.5616142459217799e-07, "loss": 1.1118, "step": 3744 }, { "epoch": 2.8344370860927155, "grad_norm": 2.031545639038086, "learning_rate": 1.5476035964734117e-07, "loss": 1.061, "step": 3745 }, { "epoch": 2.83519394512772, "grad_norm": 1.9219672679901123, "learning_rate": 1.5336564143841856e-07, "loss": 1.031, "step": 3746 }, { "epoch": 2.835950804162725, "grad_norm": 2.1881892681121826, "learning_rate": 1.5197727086442445e-07, "loss": 1.0689, "step": 3747 }, { "epoch": 2.8367076631977293, "grad_norm": 1.885879397392273, "learning_rate": 1.505952488202789e-07, "loss": 1.0866, "step": 3748 }, { "epoch": 2.837464522232734, "grad_norm": 2.17256760597229, "learning_rate": 1.492195761968146e-07, "loss": 1.0774, "step": 3749 }, { "epoch": 2.838221381267739, "grad_norm": 2.0141475200653076, "learning_rate": 1.4785025388076906e-07, "loss": 1.1078, "step": 3750 }, { "epoch": 2.8389782403027435, "grad_norm": 2.1120545864105225, "learning_rate": 1.4648728275478566e-07, "loss": 1.0698, "step": 3751 }, { "epoch": 2.8397350993377484, "grad_norm": 1.9993555545806885, "learning_rate": 1.451306636974159e-07, "loss": 1.0529, "step": 3752 }, { "epoch": 2.840491958372753, "grad_norm": 1.9042015075683594, "learning_rate": 1.4378039758311616e-07, "loss": 1.035, "step": 3753 }, { "epoch": 2.8412488174077577, "grad_norm": 2.0726895332336426, "learning_rate": 1.4243648528224414e-07, "loss": 1.0772, "step": 3754 }, { "epoch": 2.8420056764427626, "grad_norm": 2.004347085952759, "learning_rate": 1.4109892766106804e-07, "loss": 1.066, "step": 3755 }, { "epoch": 2.8427625354777675, "grad_norm": 2.1998095512390137, "learning_rate": 1.397677255817563e-07, "loss": 1.1011, "step": 3756 }, { "epoch": 2.843519394512772, "grad_norm": 1.9528348445892334, "learning_rate": 1.3844287990238113e-07, "loss": 1.105, "step": 3757 }, { "epoch": 2.844276253547777, "grad_norm": 2.1867001056671143, "learning_rate": 1.3712439147691946e-07, "loss": 1.0787, "step": 3758 }, { "epoch": 2.8450331125827812, "grad_norm": 2.0233795642852783, "learning_rate": 1.3581226115524753e-07, "loss": 1.0587, "step": 3759 }, { "epoch": 2.845789971617786, "grad_norm": 1.9928818941116333, "learning_rate": 1.345064897831441e-07, "loss": 1.0421, "step": 3760 }, { "epoch": 2.846546830652791, "grad_norm": 1.9571059942245483, "learning_rate": 1.3320707820229063e-07, "loss": 1.0569, "step": 3761 }, { "epoch": 2.847303689687796, "grad_norm": 2.076955795288086, "learning_rate": 1.3191402725026765e-07, "loss": 1.0854, "step": 3762 }, { "epoch": 2.8480605487228003, "grad_norm": 2.1233267784118652, "learning_rate": 1.3062733776055504e-07, "loss": 1.0457, "step": 3763 }, { "epoch": 2.8488174077578052, "grad_norm": 1.9417656660079956, "learning_rate": 1.2934701056253526e-07, "loss": 1.0308, "step": 3764 }, { "epoch": 2.8495742667928097, "grad_norm": 1.9117321968078613, "learning_rate": 1.2807304648148552e-07, "loss": 1.0519, "step": 3765 }, { "epoch": 2.8503311258278146, "grad_norm": 1.987637996673584, "learning_rate": 1.2680544633858457e-07, "loss": 1.0602, "step": 3766 }, { "epoch": 2.8510879848628194, "grad_norm": 2.072512626647949, "learning_rate": 1.2554421095090923e-07, "loss": 1.1063, "step": 3767 }, { "epoch": 2.851844843897824, "grad_norm": 2.4176509380340576, "learning_rate": 1.2428934113143005e-07, "loss": 1.0889, "step": 3768 }, { "epoch": 2.8526017029328288, "grad_norm": 2.25588059425354, "learning_rate": 1.2304083768902016e-07, "loss": 1.0894, "step": 3769 }, { "epoch": 2.853358561967833, "grad_norm": 1.9961562156677246, "learning_rate": 1.2179870142844305e-07, "loss": 1.07, "step": 3770 }, { "epoch": 2.854115421002838, "grad_norm": 2.0790538787841797, "learning_rate": 1.2056293315036139e-07, "loss": 1.1308, "step": 3771 }, { "epoch": 2.854872280037843, "grad_norm": 2.107841968536377, "learning_rate": 1.1933353365133393e-07, "loss": 1.1053, "step": 3772 }, { "epoch": 2.855629139072848, "grad_norm": 1.9669723510742188, "learning_rate": 1.1811050372381292e-07, "loss": 1.1049, "step": 3773 }, { "epoch": 2.8563859981078523, "grad_norm": 1.9607486724853516, "learning_rate": 1.1689384415614223e-07, "loss": 1.0817, "step": 3774 }, { "epoch": 2.857142857142857, "grad_norm": 1.8808550834655762, "learning_rate": 1.1568355573256491e-07, "loss": 1.0328, "step": 3775 }, { "epoch": 2.8578997161778616, "grad_norm": 2.0765459537506104, "learning_rate": 1.1447963923321327e-07, "loss": 1.0449, "step": 3776 }, { "epoch": 2.8586565752128665, "grad_norm": 1.9006658792495728, "learning_rate": 1.1328209543411224e-07, "loss": 1.0046, "step": 3777 }, { "epoch": 2.8594134342478714, "grad_norm": 2.1098666191101074, "learning_rate": 1.1209092510718261e-07, "loss": 1.0837, "step": 3778 }, { "epoch": 2.8601702932828763, "grad_norm": 2.088935375213623, "learning_rate": 1.1090612902023337e-07, "loss": 1.0702, "step": 3779 }, { "epoch": 2.8609271523178808, "grad_norm": 2.2443082332611084, "learning_rate": 1.0972770793696717e-07, "loss": 1.0864, "step": 3780 }, { "epoch": 2.8616840113528856, "grad_norm": 2.380600929260254, "learning_rate": 1.0855566261697372e-07, "loss": 1.073, "step": 3781 }, { "epoch": 2.86244087038789, "grad_norm": 2.0619399547576904, "learning_rate": 1.073899938157375e-07, "loss": 1.0486, "step": 3782 }, { "epoch": 2.863197729422895, "grad_norm": 1.9430749416351318, "learning_rate": 1.0623070228463008e-07, "loss": 1.0664, "step": 3783 }, { "epoch": 2.8639545884579, "grad_norm": 1.9037846326828003, "learning_rate": 1.0507778877091445e-07, "loss": 1.041, "step": 3784 }, { "epoch": 2.8647114474929043, "grad_norm": 1.8566458225250244, "learning_rate": 1.0393125401773843e-07, "loss": 1.066, "step": 3785 }, { "epoch": 2.865468306527909, "grad_norm": 1.8834096193313599, "learning_rate": 1.027910987641447e-07, "loss": 1.0902, "step": 3786 }, { "epoch": 2.866225165562914, "grad_norm": 2.1945738792419434, "learning_rate": 1.0165732374505733e-07, "loss": 1.0731, "step": 3787 }, { "epoch": 2.8669820245979185, "grad_norm": 2.057518720626831, "learning_rate": 1.0052992969128971e-07, "loss": 1.0807, "step": 3788 }, { "epoch": 2.8677388836329234, "grad_norm": 1.897512435913086, "learning_rate": 9.940891732954447e-08, "loss": 1.0146, "step": 3789 }, { "epoch": 2.8684957426679283, "grad_norm": 1.9884440898895264, "learning_rate": 9.829428738240904e-08, "loss": 1.0896, "step": 3790 }, { "epoch": 2.8692526017029327, "grad_norm": 2.2165613174438477, "learning_rate": 9.718604056835573e-08, "loss": 1.014, "step": 3791 }, { "epoch": 2.8700094607379376, "grad_norm": 2.517817258834839, "learning_rate": 9.608417760174488e-08, "loss": 1.0497, "step": 3792 }, { "epoch": 2.870766319772942, "grad_norm": 1.8578647375106812, "learning_rate": 9.498869919281952e-08, "loss": 1.064, "step": 3793 }, { "epoch": 2.871523178807947, "grad_norm": 2.168428897857666, "learning_rate": 9.389960604770966e-08, "loss": 1.1338, "step": 3794 }, { "epoch": 2.872280037842952, "grad_norm": 1.902740716934204, "learning_rate": 9.281689886842575e-08, "loss": 1.0833, "step": 3795 }, { "epoch": 2.8730368968779567, "grad_norm": 2.10799503326416, "learning_rate": 9.174057835286632e-08, "loss": 1.0498, "step": 3796 }, { "epoch": 2.873793755912961, "grad_norm": 1.9497560262680054, "learning_rate": 9.067064519481139e-08, "loss": 1.0824, "step": 3797 }, { "epoch": 2.874550614947966, "grad_norm": 2.052687406539917, "learning_rate": 8.96071000839214e-08, "loss": 1.0838, "step": 3798 }, { "epoch": 2.8753074739829705, "grad_norm": 2.033168315887451, "learning_rate": 8.854994370574378e-08, "loss": 1.0696, "step": 3799 }, { "epoch": 2.8760643330179754, "grad_norm": 1.928364872932434, "learning_rate": 8.749917674170415e-08, "loss": 1.0824, "step": 3800 }, { "epoch": 2.8768211920529803, "grad_norm": 1.9337732791900635, "learning_rate": 8.645479986911066e-08, "loss": 1.117, "step": 3801 }, { "epoch": 2.8775780510879847, "grad_norm": 1.929337501525879, "learning_rate": 8.541681376115416e-08, "loss": 1.0448, "step": 3802 }, { "epoch": 2.8783349101229896, "grad_norm": 2.0957815647125244, "learning_rate": 8.438521908690244e-08, "loss": 1.0833, "step": 3803 }, { "epoch": 2.8790917691579945, "grad_norm": 2.090304136276245, "learning_rate": 8.336001651130706e-08, "loss": 1.0567, "step": 3804 }, { "epoch": 2.879848628192999, "grad_norm": 2.115295648574829, "learning_rate": 8.234120669519771e-08, "loss": 1.031, "step": 3805 }, { "epoch": 2.880605487228004, "grad_norm": 1.9636808633804321, "learning_rate": 8.132879029528445e-08, "loss": 1.0494, "step": 3806 }, { "epoch": 2.8813623462630087, "grad_norm": 1.9101999998092651, "learning_rate": 8.03227679641533e-08, "loss": 1.0571, "step": 3807 }, { "epoch": 2.882119205298013, "grad_norm": 1.9551316499710083, "learning_rate": 7.932314035027393e-08, "loss": 1.0658, "step": 3808 }, { "epoch": 2.882876064333018, "grad_norm": 2.1239876747131348, "learning_rate": 7.832990809798869e-08, "loss": 1.0788, "step": 3809 }, { "epoch": 2.8836329233680225, "grad_norm": 1.9690558910369873, "learning_rate": 7.734307184752134e-08, "loss": 1.0772, "step": 3810 }, { "epoch": 2.8843897824030273, "grad_norm": 2.071542263031006, "learning_rate": 7.636263223496941e-08, "loss": 1.0839, "step": 3811 }, { "epoch": 2.8851466414380322, "grad_norm": 2.1702964305877686, "learning_rate": 7.538858989231189e-08, "loss": 1.0452, "step": 3812 }, { "epoch": 2.885903500473037, "grad_norm": 2.1600115299224854, "learning_rate": 7.442094544740037e-08, "loss": 1.133, "step": 3813 }, { "epoch": 2.8866603595080416, "grad_norm": 1.943969964981079, "learning_rate": 7.34596995239646e-08, "loss": 1.0342, "step": 3814 }, { "epoch": 2.8874172185430464, "grad_norm": 2.029170513153076, "learning_rate": 7.250485274160693e-08, "loss": 1.0983, "step": 3815 }, { "epoch": 2.888174077578051, "grad_norm": 2.1345629692077637, "learning_rate": 7.1556405715809e-08, "loss": 1.0854, "step": 3816 }, { "epoch": 2.8889309366130558, "grad_norm": 1.8675469160079956, "learning_rate": 7.061435905792389e-08, "loss": 1.0661, "step": 3817 }, { "epoch": 2.8896877956480607, "grad_norm": 2.0254111289978027, "learning_rate": 6.967871337518176e-08, "loss": 1.103, "step": 3818 }, { "epoch": 2.8904446546830656, "grad_norm": 2.162344455718994, "learning_rate": 6.874946927068538e-08, "loss": 1.0878, "step": 3819 }, { "epoch": 2.89120151371807, "grad_norm": 2.2124130725860596, "learning_rate": 6.782662734341012e-08, "loss": 1.0599, "step": 3820 }, { "epoch": 2.891958372753075, "grad_norm": 2.1409800052642822, "learning_rate": 6.691018818820837e-08, "loss": 1.0525, "step": 3821 }, { "epoch": 2.8927152317880793, "grad_norm": 2.1800687313079834, "learning_rate": 6.600015239579959e-08, "loss": 1.0602, "step": 3822 }, { "epoch": 2.893472090823084, "grad_norm": 2.0903069972991943, "learning_rate": 6.50965205527814e-08, "loss": 1.0851, "step": 3823 }, { "epoch": 2.894228949858089, "grad_norm": 1.9317938089370728, "learning_rate": 6.419929324162068e-08, "loss": 1.0319, "step": 3824 }, { "epoch": 2.8949858088930935, "grad_norm": 2.0327014923095703, "learning_rate": 6.330847104065472e-08, "loss": 1.1128, "step": 3825 }, { "epoch": 2.8957426679280984, "grad_norm": 2.1695809364318848, "learning_rate": 6.242405452409559e-08, "loss": 1.0591, "step": 3826 }, { "epoch": 2.896499526963103, "grad_norm": 2.077954053878784, "learning_rate": 6.154604426202468e-08, "loss": 1.0295, "step": 3827 }, { "epoch": 2.8972563859981078, "grad_norm": 2.0263519287109375, "learning_rate": 6.067444082039482e-08, "loss": 1.0147, "step": 3828 }, { "epoch": 2.8980132450331126, "grad_norm": 2.1431772708892822, "learning_rate": 5.980924476102595e-08, "loss": 1.0512, "step": 3829 }, { "epoch": 2.8987701040681175, "grad_norm": 1.9561032056808472, "learning_rate": 5.895045664161168e-08, "loss": 1.0426, "step": 3830 }, { "epoch": 2.899526963103122, "grad_norm": 2.133995532989502, "learning_rate": 5.8098077015713814e-08, "loss": 1.0365, "step": 3831 }, { "epoch": 2.900283822138127, "grad_norm": 2.5247886180877686, "learning_rate": 5.7252106432762304e-08, "loss": 1.1153, "step": 3832 }, { "epoch": 2.9010406811731313, "grad_norm": 1.9548890590667725, "learning_rate": 5.6412545438057476e-08, "loss": 1.0739, "step": 3833 }, { "epoch": 2.901797540208136, "grad_norm": 1.98203444480896, "learning_rate": 5.557939457276783e-08, "loss": 1.0844, "step": 3834 }, { "epoch": 2.902554399243141, "grad_norm": 2.1283376216888428, "learning_rate": 5.475265437393116e-08, "loss": 1.0675, "step": 3835 }, { "epoch": 2.903311258278146, "grad_norm": 1.917360782623291, "learning_rate": 5.393232537444783e-08, "loss": 1.0464, "step": 3836 }, { "epoch": 2.9040681173131504, "grad_norm": 1.9345555305480957, "learning_rate": 5.3118408103091954e-08, "loss": 1.0937, "step": 3837 }, { "epoch": 2.9048249763481553, "grad_norm": 2.1080758571624756, "learning_rate": 5.2310903084502445e-08, "loss": 1.0984, "step": 3838 }, { "epoch": 2.9055818353831597, "grad_norm": 2.0316121578216553, "learning_rate": 5.150981083918309e-08, "loss": 1.105, "step": 3839 }, { "epoch": 2.9063386944181646, "grad_norm": 2.1214966773986816, "learning_rate": 5.0715131883506914e-08, "loss": 1.0481, "step": 3840 }, { "epoch": 2.9070955534531695, "grad_norm": 2.003058433532715, "learning_rate": 4.99268667297129e-08, "loss": 1.0848, "step": 3841 }, { "epoch": 2.907852412488174, "grad_norm": 2.0405402183532715, "learning_rate": 4.9145015885902656e-08, "loss": 1.1065, "step": 3842 }, { "epoch": 2.908609271523179, "grad_norm": 2.1864330768585205, "learning_rate": 4.836957985604592e-08, "loss": 1.1217, "step": 3843 }, { "epoch": 2.9093661305581833, "grad_norm": 1.8275071382522583, "learning_rate": 4.7600559139976164e-08, "loss": 1.0634, "step": 3844 }, { "epoch": 2.910122989593188, "grad_norm": 2.006591320037842, "learning_rate": 4.683795423339395e-08, "loss": 1.0702, "step": 3845 }, { "epoch": 2.910879848628193, "grad_norm": 2.4923205375671387, "learning_rate": 4.608176562786352e-08, "loss": 1.0633, "step": 3846 }, { "epoch": 2.911636707663198, "grad_norm": 1.8209044933319092, "learning_rate": 4.533199381080951e-08, "loss": 1.0856, "step": 3847 }, { "epoch": 2.9123935666982024, "grad_norm": 2.0942399501800537, "learning_rate": 4.458863926552586e-08, "loss": 1.0936, "step": 3848 }, { "epoch": 2.9131504257332073, "grad_norm": 2.149657964706421, "learning_rate": 4.385170247116687e-08, "loss": 1.0472, "step": 3849 }, { "epoch": 2.9139072847682117, "grad_norm": 1.905176043510437, "learning_rate": 4.3121183902750584e-08, "loss": 1.0524, "step": 3850 }, { "epoch": 2.9146641438032166, "grad_norm": 2.1164419651031494, "learning_rate": 4.2397084031158755e-08, "loss": 1.0406, "step": 3851 }, { "epoch": 2.9154210028382215, "grad_norm": 1.955041527748108, "learning_rate": 4.1679403323133525e-08, "loss": 1.0788, "step": 3852 }, { "epoch": 2.9161778618732264, "grad_norm": 2.0062403678894043, "learning_rate": 4.096814224128301e-08, "loss": 1.1013, "step": 3853 }, { "epoch": 2.916934720908231, "grad_norm": 2.1404199600219727, "learning_rate": 4.0263301244073465e-08, "loss": 1.1179, "step": 3854 }, { "epoch": 2.9176915799432357, "grad_norm": 1.968444585800171, "learning_rate": 3.9564880785834875e-08, "loss": 1.0376, "step": 3855 }, { "epoch": 2.91844843897824, "grad_norm": 1.9066696166992188, "learning_rate": 3.887288131676096e-08, "loss": 1.0348, "step": 3856 }, { "epoch": 2.919205298013245, "grad_norm": 2.04758620262146, "learning_rate": 3.818730328290026e-08, "loss": 1.0696, "step": 3857 }, { "epoch": 2.91996215704825, "grad_norm": 1.8712373971939087, "learning_rate": 3.750814712616839e-08, "loss": 1.0559, "step": 3858 }, { "epoch": 2.9207190160832543, "grad_norm": 1.9164494276046753, "learning_rate": 3.6835413284338016e-08, "loss": 1.0717, "step": 3859 }, { "epoch": 2.9214758751182592, "grad_norm": 1.9720449447631836, "learning_rate": 3.616910219104442e-08, "loss": 1.1104, "step": 3860 }, { "epoch": 2.9222327341532637, "grad_norm": 2.1905975341796875, "learning_rate": 3.5509214275779944e-08, "loss": 1.1058, "step": 3861 }, { "epoch": 2.9229895931882686, "grad_norm": 1.912367582321167, "learning_rate": 3.4855749963898434e-08, "loss": 1.0694, "step": 3862 }, { "epoch": 2.9237464522232735, "grad_norm": 2.054760456085205, "learning_rate": 3.420870967661412e-08, "loss": 1.0661, "step": 3863 }, { "epoch": 2.9245033112582783, "grad_norm": 2.100724220275879, "learning_rate": 3.3568093830998316e-08, "loss": 1.0685, "step": 3864 }, { "epoch": 2.925260170293283, "grad_norm": 1.9942377805709839, "learning_rate": 3.2933902839982706e-08, "loss": 1.0362, "step": 3865 }, { "epoch": 2.9260170293282877, "grad_norm": 1.9845491647720337, "learning_rate": 3.230613711235715e-08, "loss": 1.0948, "step": 3866 }, { "epoch": 2.926773888363292, "grad_norm": 1.9502067565917969, "learning_rate": 3.168479705276969e-08, "loss": 1.0292, "step": 3867 }, { "epoch": 2.927530747398297, "grad_norm": 2.2125320434570312, "learning_rate": 3.106988306172764e-08, "loss": 1.0965, "step": 3868 }, { "epoch": 2.928287606433302, "grad_norm": 2.0215206146240234, "learning_rate": 3.046139553559317e-08, "loss": 1.0451, "step": 3869 }, { "epoch": 2.9290444654683068, "grad_norm": 2.161459445953369, "learning_rate": 2.985933486658992e-08, "loss": 1.0826, "step": 3870 }, { "epoch": 2.929801324503311, "grad_norm": 2.112816572189331, "learning_rate": 2.926370144279531e-08, "loss": 1.1162, "step": 3871 }, { "epoch": 2.930558183538316, "grad_norm": 2.043856382369995, "learning_rate": 2.8674495648147115e-08, "loss": 1.0377, "step": 3872 }, { "epoch": 2.9313150425733205, "grad_norm": 2.230227470397949, "learning_rate": 2.809171786243685e-08, "loss": 1.0775, "step": 3873 }, { "epoch": 2.9320719016083254, "grad_norm": 1.8375619649887085, "learning_rate": 2.7515368461316434e-08, "loss": 1.0217, "step": 3874 }, { "epoch": 2.9328287606433303, "grad_norm": 1.9141755104064941, "learning_rate": 2.694544781629039e-08, "loss": 1.038, "step": 3875 }, { "epoch": 2.9335856196783348, "grad_norm": 2.1602697372436523, "learning_rate": 2.6381956294720323e-08, "loss": 1.0714, "step": 3876 }, { "epoch": 2.9343424787133396, "grad_norm": 1.923949956893921, "learning_rate": 2.5824894259825987e-08, "loss": 1.0561, "step": 3877 }, { "epoch": 2.9350993377483445, "grad_norm": 2.1230146884918213, "learning_rate": 2.5274262070678672e-08, "loss": 1.0689, "step": 3878 }, { "epoch": 2.935856196783349, "grad_norm": 2.0640814304351807, "learning_rate": 2.4730060082210033e-08, "loss": 1.0851, "step": 3879 }, { "epoch": 2.936613055818354, "grad_norm": 1.8468024730682373, "learning_rate": 2.4192288645203268e-08, "loss": 1.0588, "step": 3880 }, { "epoch": 2.9373699148533587, "grad_norm": 2.06715726852417, "learning_rate": 2.3660948106297502e-08, "loss": 1.04, "step": 3881 }, { "epoch": 2.938126773888363, "grad_norm": 2.129422426223755, "learning_rate": 2.313603880798671e-08, "loss": 1.0935, "step": 3882 }, { "epoch": 2.938883632923368, "grad_norm": 1.9994871616363525, "learning_rate": 2.2617561088619707e-08, "loss": 1.0823, "step": 3883 }, { "epoch": 2.9396404919583725, "grad_norm": 1.9387072324752808, "learning_rate": 2.2105515282399045e-08, "loss": 1.0506, "step": 3884 }, { "epoch": 2.9403973509933774, "grad_norm": 1.9773590564727783, "learning_rate": 2.1599901719382117e-08, "loss": 1.0713, "step": 3885 }, { "epoch": 2.9411542100283823, "grad_norm": 1.9165699481964111, "learning_rate": 2.110072072547893e-08, "loss": 1.0889, "step": 3886 }, { "epoch": 2.941911069063387, "grad_norm": 1.9767038822174072, "learning_rate": 2.060797262245434e-08, "loss": 1.1121, "step": 3887 }, { "epoch": 2.9426679280983916, "grad_norm": 1.9442821741104126, "learning_rate": 2.012165772792693e-08, "loss": 1.0852, "step": 3888 }, { "epoch": 2.9434247871333965, "grad_norm": 1.9988024234771729, "learning_rate": 1.96417763553668e-08, "loss": 1.0733, "step": 3889 }, { "epoch": 2.944181646168401, "grad_norm": 2.0853540897369385, "learning_rate": 1.91683288141e-08, "loss": 1.0869, "step": 3890 }, { "epoch": 2.944938505203406, "grad_norm": 1.8882020711898804, "learning_rate": 1.8701315409300757e-08, "loss": 1.0716, "step": 3891 }, { "epoch": 2.9456953642384107, "grad_norm": 1.9533286094665527, "learning_rate": 1.8240736442000363e-08, "loss": 1.0977, "step": 3892 }, { "epoch": 2.946452223273415, "grad_norm": 2.2869935035705566, "learning_rate": 1.7786592209081624e-08, "loss": 1.036, "step": 3893 }, { "epoch": 2.94720908230842, "grad_norm": 1.8621643781661987, "learning_rate": 1.733888300327774e-08, "loss": 1.0655, "step": 3894 }, { "epoch": 2.947965941343425, "grad_norm": 2.069187641143799, "learning_rate": 1.689760911317565e-08, "loss": 1.0701, "step": 3895 }, { "epoch": 2.9487228003784294, "grad_norm": 2.112271547317505, "learning_rate": 1.64627708232138e-08, "loss": 1.0697, "step": 3896 }, { "epoch": 2.9494796594134343, "grad_norm": 2.0788121223449707, "learning_rate": 1.6034368413683266e-08, "loss": 1.0746, "step": 3897 }, { "epoch": 2.950236518448439, "grad_norm": 2.0578696727752686, "learning_rate": 1.56124021607244e-08, "loss": 1.0702, "step": 3898 }, { "epoch": 2.9509933774834436, "grad_norm": 2.171917676925659, "learning_rate": 1.519687233633019e-08, "loss": 1.0755, "step": 3899 }, { "epoch": 2.9517502365184485, "grad_norm": 1.9235490560531616, "learning_rate": 1.4787779208345125e-08, "loss": 1.0209, "step": 3900 }, { "epoch": 2.952507095553453, "grad_norm": 2.046241283416748, "learning_rate": 1.4385123040465213e-08, "loss": 1.0691, "step": 3901 }, { "epoch": 2.953263954588458, "grad_norm": 2.0865299701690674, "learning_rate": 1.398890409223575e-08, "loss": 1.0399, "step": 3902 }, { "epoch": 2.9540208136234627, "grad_norm": 2.0731747150421143, "learning_rate": 1.3599122619053542e-08, "loss": 1.0499, "step": 3903 }, { "epoch": 2.9547776726584676, "grad_norm": 2.198157548904419, "learning_rate": 1.32157788721658e-08, "loss": 1.0247, "step": 3904 }, { "epoch": 2.955534531693472, "grad_norm": 2.0734500885009766, "learning_rate": 1.2838873098669024e-08, "loss": 1.0417, "step": 3905 }, { "epoch": 2.956291390728477, "grad_norm": 1.8891007900238037, "learning_rate": 1.2468405541513447e-08, "loss": 1.0541, "step": 3906 }, { "epoch": 2.9570482497634814, "grad_norm": 1.9352359771728516, "learning_rate": 1.210437643949415e-08, "loss": 1.0932, "step": 3907 }, { "epoch": 2.9578051087984862, "grad_norm": 1.9413546323776245, "learning_rate": 1.1746786027259944e-08, "loss": 1.082, "step": 3908 }, { "epoch": 2.958561967833491, "grad_norm": 2.091618299484253, "learning_rate": 1.1395634535308943e-08, "loss": 1.0216, "step": 3909 }, { "epoch": 2.959318826868496, "grad_norm": 2.132253408432007, "learning_rate": 1.1050922189986316e-08, "loss": 1.0842, "step": 3910 }, { "epoch": 2.9600756859035005, "grad_norm": 2.1053178310394287, "learning_rate": 1.0712649213489865e-08, "loss": 1.0316, "step": 3911 }, { "epoch": 2.9608325449385053, "grad_norm": 2.1205570697784424, "learning_rate": 1.0380815823864458e-08, "loss": 1.065, "step": 3912 }, { "epoch": 2.96158940397351, "grad_norm": 2.1178319454193115, "learning_rate": 1.0055422235004254e-08, "loss": 1.0666, "step": 3913 }, { "epoch": 2.9623462630085147, "grad_norm": 2.021894693374634, "learning_rate": 9.736468656653818e-09, "loss": 1.0567, "step": 3914 }, { "epoch": 2.9631031220435196, "grad_norm": 1.921276569366455, "learning_rate": 9.423955294405891e-09, "loss": 1.093, "step": 3915 }, { "epoch": 2.963859981078524, "grad_norm": 2.062957286834717, "learning_rate": 9.117882349702507e-09, "loss": 1.0449, "step": 3916 }, { "epoch": 2.964616840113529, "grad_norm": 2.258112668991089, "learning_rate": 8.818250019831662e-09, "loss": 1.0587, "step": 3917 }, { "epoch": 2.9653736991485333, "grad_norm": 1.8993000984191895, "learning_rate": 8.52505849793286e-09, "loss": 1.0402, "step": 3918 }, { "epoch": 2.966130558183538, "grad_norm": 1.8386201858520508, "learning_rate": 8.23830797299268e-09, "loss": 1.1062, "step": 3919 }, { "epoch": 2.966887417218543, "grad_norm": 2.060410976409912, "learning_rate": 7.957998629846991e-09, "loss": 1.066, "step": 3920 }, { "epoch": 2.967644276253548, "grad_norm": 2.098123073577881, "learning_rate": 7.684130649177623e-09, "loss": 1.0881, "step": 3921 }, { "epoch": 2.9684011352885524, "grad_norm": 2.2169816493988037, "learning_rate": 7.416704207515695e-09, "loss": 1.0661, "step": 3922 }, { "epoch": 2.9691579943235573, "grad_norm": 2.0316176414489746, "learning_rate": 7.155719477241619e-09, "loss": 1.0737, "step": 3923 }, { "epoch": 2.9699148533585618, "grad_norm": 1.8836135864257812, "learning_rate": 6.901176626581769e-09, "loss": 1.0435, "step": 3924 }, { "epoch": 2.9706717123935666, "grad_norm": 2.030869960784912, "learning_rate": 6.653075819609588e-09, "loss": 1.0358, "step": 3925 }, { "epoch": 2.9714285714285715, "grad_norm": 2.1759679317474365, "learning_rate": 6.411417216247812e-09, "loss": 1.066, "step": 3926 }, { "epoch": 2.9721854304635764, "grad_norm": 2.092773199081421, "learning_rate": 6.176200972265136e-09, "loss": 1.031, "step": 3927 }, { "epoch": 2.972942289498581, "grad_norm": 2.3539814949035645, "learning_rate": 5.947427239279547e-09, "loss": 1.1136, "step": 3928 }, { "epoch": 2.9736991485335857, "grad_norm": 2.3484017848968506, "learning_rate": 5.725096164753884e-09, "loss": 1.0145, "step": 3929 }, { "epoch": 2.97445600756859, "grad_norm": 1.9310166835784912, "learning_rate": 5.509207892001385e-09, "loss": 1.0231, "step": 3930 }, { "epoch": 2.975212866603595, "grad_norm": 1.9161075353622437, "learning_rate": 5.299762560177922e-09, "loss": 1.0041, "step": 3931 }, { "epoch": 2.9759697256386, "grad_norm": 2.0112030506134033, "learning_rate": 5.096760304289763e-09, "loss": 1.1227, "step": 3932 }, { "epoch": 2.9767265846736044, "grad_norm": 2.0244789123535156, "learning_rate": 4.900201255189143e-09, "loss": 1.0922, "step": 3933 }, { "epoch": 2.9774834437086093, "grad_norm": 2.514658212661743, "learning_rate": 4.710085539575363e-09, "loss": 1.054, "step": 3934 }, { "epoch": 2.9782403027436137, "grad_norm": 1.9655340909957886, "learning_rate": 4.526413279993689e-09, "loss": 1.0453, "step": 3935 }, { "epoch": 2.9789971617786186, "grad_norm": 1.988950490951538, "learning_rate": 4.349184594836453e-09, "loss": 1.112, "step": 3936 }, { "epoch": 2.9797540208136235, "grad_norm": 2.0003416538238525, "learning_rate": 4.178399598341953e-09, "loss": 1.0888, "step": 3937 }, { "epoch": 2.9805108798486284, "grad_norm": 1.8892840147018433, "learning_rate": 4.014058400597776e-09, "loss": 1.0316, "step": 3938 }, { "epoch": 2.981267738883633, "grad_norm": 1.8778574466705322, "learning_rate": 3.856161107533029e-09, "loss": 1.0264, "step": 3939 }, { "epoch": 2.9820245979186377, "grad_norm": 1.9889436960220337, "learning_rate": 3.70470782092722e-09, "loss": 1.0775, "step": 3940 }, { "epoch": 2.982781456953642, "grad_norm": 1.9453630447387695, "learning_rate": 3.55969863840471e-09, "loss": 1.0661, "step": 3941 }, { "epoch": 2.983538315988647, "grad_norm": 2.1750118732452393, "learning_rate": 3.421133653436929e-09, "loss": 1.0582, "step": 3942 }, { "epoch": 2.984295175023652, "grad_norm": 1.9517415761947632, "learning_rate": 3.289012955339048e-09, "loss": 1.061, "step": 3943 }, { "epoch": 2.985052034058657, "grad_norm": 1.9353458881378174, "learning_rate": 3.16333662927553e-09, "loss": 1.0533, "step": 3944 }, { "epoch": 2.9858088930936613, "grad_norm": 2.1572377681732178, "learning_rate": 3.044104756254578e-09, "loss": 1.0871, "step": 3945 }, { "epoch": 2.986565752128666, "grad_norm": 1.9636902809143066, "learning_rate": 2.9313174131325764e-09, "loss": 1.0751, "step": 3946 }, { "epoch": 2.9873226111636706, "grad_norm": 1.9330499172210693, "learning_rate": 2.8249746726085392e-09, "loss": 1.0858, "step": 3947 }, { "epoch": 2.9880794701986755, "grad_norm": 1.8204699754714966, "learning_rate": 2.7250766032307735e-09, "loss": 1.034, "step": 3948 }, { "epoch": 2.9888363292336804, "grad_norm": 1.9643014669418335, "learning_rate": 2.6316232693913253e-09, "loss": 1.062, "step": 3949 }, { "epoch": 2.989593188268685, "grad_norm": 2.25469708442688, "learning_rate": 2.544614731329312e-09, "loss": 1.0581, "step": 3950 }, { "epoch": 2.9903500473036897, "grad_norm": 2.0587730407714844, "learning_rate": 2.464051045128703e-09, "loss": 1.0528, "step": 3951 }, { "epoch": 2.9911069063386946, "grad_norm": 1.9544748067855835, "learning_rate": 2.389932262720538e-09, "loss": 1.0655, "step": 3952 }, { "epoch": 2.991863765373699, "grad_norm": 2.1084601879119873, "learning_rate": 2.3222584318784854e-09, "loss": 1.1346, "step": 3953 }, { "epoch": 2.992620624408704, "grad_norm": 2.051255226135254, "learning_rate": 2.261029596226618e-09, "loss": 1.1014, "step": 3954 }, { "epoch": 2.993377483443709, "grad_norm": 2.079298734664917, "learning_rate": 2.206245795231637e-09, "loss": 1.092, "step": 3955 }, { "epoch": 2.9941343424787132, "grad_norm": 1.9083516597747803, "learning_rate": 2.157907064203985e-09, "loss": 1.0385, "step": 3956 }, { "epoch": 2.994891201513718, "grad_norm": 2.315964460372925, "learning_rate": 2.1160134343056167e-09, "loss": 1.0794, "step": 3957 }, { "epoch": 2.9956480605487226, "grad_norm": 2.072871685028076, "learning_rate": 2.080564932537786e-09, "loss": 1.0603, "step": 3958 }, { "epoch": 2.9964049195837275, "grad_norm": 1.996877908706665, "learning_rate": 2.0515615817510374e-09, "loss": 1.066, "step": 3959 }, { "epoch": 2.9971617786187323, "grad_norm": 2.1000583171844482, "learning_rate": 2.0290034006407686e-09, "loss": 1.0554, "step": 3960 }, { "epoch": 2.9979186376537372, "grad_norm": 2.0836434364318848, "learning_rate": 2.0128904037472256e-09, "loss": 1.0769, "step": 3961 }, { "epoch": 2.9986754966887417, "grad_norm": 1.999711275100708, "learning_rate": 2.0032226014555062e-09, "loss": 1.0731, "step": 3962 }, { "epoch": 2.9994323557237466, "grad_norm": 2.6111867427825928, "learning_rate": 2e-09, "loss": 1.1002, "step": 3963 } ], "logging_steps": 1.0, "max_steps": 3963, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.729010317829918e+19, "train_batch_size": 2, "trial_name": null, "trial_params": null }